blob: 1829f6ffb69df38e92be6ca482d4700bdfc145b3 [file] [log] [blame]
// Copyright 2020 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package classifier
import (
"bytes"
"errors"
"io/ioutil"
"log"
"os"
"path/filepath"
"sort"
"strings"
"testing"
"testing/iotest"
"github.com/davecgh/go-spew/spew"
"github.com/google/go-cmp/cmp"
"path"
)
type scenario struct {
expected []string
data []byte
}
var defaultThreshold = .8
var baseLicenses = "assets"
func classifier() (*Classifier, error) {
c := NewClassifier(defaultThreshold)
return c, c.LoadLicenses(path.Join(baseLicenses))
}
func getScenarioFilenames() ([]string, error) {
scenarios := "./scenarios"
var files []string
err := filepath.Walk(path.Join(scenarios), func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if strings.HasSuffix(path, "md") || info.IsDir() {
return nil
}
files = append(files, path)
return nil
})
return files, err
}
func TestMatchScenarios(t *testing.T) {
c, err := classifier()
if err != nil {
t.Fatalf("couldn't instantiate standard test classifier: %v", err)
}
files, err := getScenarioFilenames()
if err != nil {
t.Fatalf("encountered error walking scenarios directory: %v", err)
}
for _, f := range files {
s := readScenario(f)
m := c.Match(s.data)
checkMatches(t, m.Matches, f, s.expected)
}
}
func readScenario(path string) *scenario {
var s scenario
b, err := ioutil.ReadFile(path)
if err != nil {
log.Fatalf("Couldn't read scenario %s: %v", path, err)
}
// A scenario consists of any number of comment lines, which are ignored, then a line of the form
// EXPECTED: A,B,C
//
// or EXPECTED:<EOL>
// where A,B,C is a comma-separated list of expected licenses.
lines := strings.SplitN(string(b), "EXPECTED:", 2)
// The first part of lines is description, which we ignore. We then split on a linefeed to get the
// list of licenses and the rest of the data content.
lines = strings.SplitN(lines[1], "\n", 2)
if lines[0] != "" {
s.expected = strings.Split(lines[0], ",")
} else {
s.expected = []string{}
}
s.data = []byte(lines[1])
return &s
}
func TestContainsAndOverlaps(t *testing.T) {
tests := []struct {
name string
a, b *Match
contains bool
overlaps bool
}{
{
name: "no intersection",
a: &Match{
StartLine: 1,
EndLine: 3,
},
b: &Match{
StartLine: 4,
EndLine: 5,
},
contains: false,
overlaps: false,
},
{
name: "overlap at end",
a: &Match{
StartLine: 4,
EndLine: 10,
},
b: &Match{
StartLine: 1,
EndLine: 5,
},
contains: false,
overlaps: true,
},
{
name: "overlap at end",
a: &Match{
StartLine: 1,
EndLine: 10,
},
b: &Match{
StartLine: 4,
EndLine: 12,
},
contains: false,
overlaps: true,
},
{
name: "contains",
a: &Match{
StartLine: 1,
EndLine: 10,
},
b: &Match{
StartLine: 4,
EndLine: 7,
},
contains: true,
overlaps: false,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
if got := contains(test.a, test.b); got != test.contains {
t.Errorf("contains: got %v want %v", got, test.contains)
}
if got := overlaps(test.a, test.b); got != test.overlaps {
t.Errorf("overlaps: got %v want %v", got, test.overlaps)
}
})
}
}
func TestLicName(t *testing.T) {
tests := []struct {
name string
expected string
}{
{
// The filename for a license
name: "GPL-2.0.txt",
expected: "GPL-2.0",
},
{
// The filename for a header reference to a license
name: "GPL-2.0.header.txt",
expected: "GPL-2.0",
},
{
// The filename for a variant header reference to a license
name: "GPL-2.0.header_a.txt",
expected: "GPL-2.0",
},
{
// The filename for a variant license body
name: "Apache-2.0_no_toc.txt",
expected: "Apache-2.0",
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
})
}
}
func TestMatchFrom(t *testing.T) {
tr := iotest.TimeoutReader(strings.NewReader("some data"))
c, err := classifier()
if err != nil {
t.Fatalf("couldn't instantiate standard Google classifier: %v", err)
}
_, err = c.MatchFrom(tr)
if !errors.Is(err, iotest.ErrTimeout) {
t.Errorf("got %v want %v", err, iotest.ErrTimeout)
}
files, err := getScenarioFilenames()
if err != nil {
t.Fatalf("encountered error walking scenarios directory: %v", err)
}
for _, f := range files {
s := readScenario(f)
r := bytes.NewReader(s.data)
m, err := c.MatchFrom(r)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
checkMatches(t, m.Matches, f, s.expected)
}
}
// checkMatches diffs the resulting matches against the expected content and
// sets test results.
func checkMatches(t *testing.T, m Matches, f string, e []string) {
found := make(map[string]bool)
// Uniquify the licenses found
for _, l := range m {
found[l.Name] = true
}
var names []string
for l := range found {
names = append(names, l)
}
sort.Strings(names)
if len(names) != len(e) {
t.Errorf("Match(%q) number matches: %v, want %v: %v", f, len(names), len(e), spew.Sdump(m))
return
}
for i := 0; i < len(names); i++ {
w := strings.TrimSpace(e[i])
if got, want := names[i], w; got != want {
t.Errorf("Match(%q) = %q, want %q", f, got, want)
}
}
}
func TestLicenseName(t *testing.T) {
tests := []struct {
input string
want string
}{
{
input: "License/example/file.txt",
want: "example",
},
{
input: "License/example/a.txt",
want: "example",
},
{
input: "Header/example/header.txt",
want: "example",
},
{
input: "Header/example/a.txt",
want: "example",
},
}
for _, tt := range tests {
t.Run(tt.input, func(t *testing.T) {
got := LicenseName(tt.input)
if diff := cmp.Diff(tt.want, got); diff != "" {
t.Errorf("Unexpected result; diff %v", diff)
}
})
}
}
func TestNormalize(t *testing.T) {
tests := []struct {
input string
want string
}{
{
input: "Words With Extra Spaces are flattened out, preserving case",
want: "Words With Extra Spaces are flattened out preserving case",
},
{
input: "",
want: "",
},
{
input: " License ",
want: "License",
},
{
// This tests that the line breaks in the input text are properly
// preserved, which is important for visual diffing.
input: `Preserving
line
breaks is important`,
want: `Preserving
line
breaks is important`,
},
{
// This tests that soft EOL functionality doesn't affect normalized output
input: `This is a sentence looking construct. This is another sentence. What happens?`,
want: `This is a sentence looking construct This is another sentence What happens`,
},
{
input: `header
........................ This is oddly formatted`,
want: `header
This is oddly formatted`,
},
{
input: `baseball basket-
ball football`,
want: "baseball basketball\nfootball",
},
}
for _, tt := range tests {
t.Run(tt.input, func(t *testing.T) {
c, err := classifier()
if err != nil {
t.Fatalf("couldn't instantiate standard Google classifier: %v", err)
}
got := c.Normalize([]byte(tt.input))
if diff := cmp.Diff(tt.want, string(got)); diff != "" {
t.Errorf("Unexpected result; diff %v", diff)
}
})
}
}