blob: f0cbe6ec2727290ca1c9925c70bac79f0c90b02b [file] [log] [blame]
// Copyright 2022 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package license
import (
"fmt"
"io/ioutil"
"path/filepath"
"regexp"
"strings"
"go.fuchsia.dev/fuchsia/tools/check-licenses/file"
)
// Pattern contains a searchable regex pattern for finding license text
// in source files and LICENSE files across the repository.
type Pattern struct {
// Name is the name of the license pattern file.
Name string
// Type is the type of license that this pattern matches.
// e.g. BSD, MIT, etc..
// This is set using the name of the parent folder where the pattern file lives.
Type string
// Category is the category that this license belongs to.
// Approved, Restricted, Allowlist_Only
// This is set using the name of the grandparent folder where the pattern file lives.
Category string
// AllowList is a string of regex patterns that match with project paths.
// This license pattern is only allowed to match with projects listed here.
AllowList []string
// Matches maintains a slice of pointers to the data fragments that it matched against.
// This is used in some templates in the result package, for grouping license texts
// by pattern type in the resulting NOTICE file.
Matches []*file.FileData
// The regex pattern.
// This is exported so the result package can save the pattern to disk at the end.
Re *regexp.Regexp
// Maps that keep track of previous successful and failed
// searches, keyed using filedata hash.
PreviousMatches map[string]bool
PreviousMismatches map[string]bool
isHeader bool
}
// Order implements sort.Interface for []*Pattern based on the Name field.
type Order []*Pattern
func (a Order) Len() int { return len(a) }
func (a Order) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a Order) Less(i, j int) bool { return a[i].Name < a[j].Name }
// NewPattern returns a Pattern object with the regex pattern loaded from the .lic folder.
// Some preprocessing is done to the pattern (e.g. removing code comment characters).
func NewPattern(path string) (*Pattern, error) {
bytes, err := ioutil.ReadFile(path)
if err != nil {
return nil, err
}
regex := string(bytes)
// Remove any duplicate whitespace characters
regex = strings.Join(strings.Fields(regex), " ")
// Update regex to ignore multiple white spaces, newlines, comments.
regex = strings.ReplaceAll(regex, ` `, `(?:[\s\\#\*\/]|\^L)*`)
// Convert date strings to a regex that supports any date
dates := regexp.MustCompile(`(\D)[\d]{4}(\D)`)
regex = dates.ReplaceAllString(regex, `$1[\d]{4}$2`)
re, err := regexp.Compile(regex)
if err != nil {
return nil, fmt.Errorf("%s: %w", path, err)
}
name := filepath.Base(path)
// Retrieve the license type (e.g. MIT, BSD) from the filepath.
licType := filepath.Base(filepath.Dir(path))
// Retrieve the license category (e.g. Approved, Restricted) from the filepath.
licCategory := filepath.Base(filepath.Dir(filepath.Dir(path)))
allowlist := make([]string, 0)
if licCategory == "approved" || licCategory == "notice" {
allowlist = append(allowlist, ".*")
} else {
// allowlist_only and restricted
// TODO: make restricted licenses un-allowlist-able.
if regexes, ok := AllowListPatternMap[name]; ok {
allowlist = append(allowlist, regexes...)
}
}
return &Pattern{
Name: name,
Type: licType,
Category: licCategory,
AllowList: allowlist,
Matches: make([]*file.FileData, 0),
PreviousMatches: make(map[string]bool),
PreviousMismatches: make(map[string]bool),
Re: re,
}, nil
}
// Search the given data slice for text that matches this Pattern regex.
func (p *Pattern) Search(d *file.FileData) bool {
// If the data is empty, and this pattern is "_empty", return true.
if len(d.Data) == 0 && p.Name == "_empty" {
return true
}
// If we've seen this data segment before, return the previous result.
// This should be faster than running the regex search.
if _, ok := p.PreviousMatches[d.Hash()]; ok && !p.isHeader {
p.Matches = append(p.Matches, d)
d.LicenseType = p.Type
return true
} else if _, ok := p.PreviousMismatches[d.Hash()]; ok {
return false
}
if m := p.Re.Find(d.Data); m != nil {
// If this is a source file with the copyright header info at the top,
// modify the filedata object to hold the copyright info instead of the
// full file contents.
if p.isHeader {
d.SetData(m)
}
d.LicenseType = p.Type
p.Matches = append(p.Matches, d)
p.PreviousMatches[d.Hash()] = true
return true
}
p.PreviousMismatches[d.Hash()] = true
return false
}