blob: 2cfcf17c9effe66238cbe0bdfdaeff7cc8369321 [file] [log] [blame]
// Copyright 2020 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package checklicenses
import (
"fmt"
"io/ioutil"
"path/filepath"
"regexp"
"strings"
"sync"
)
// License contains a searchable regex pattern for finding license text
// in source files and LICENSE files across the repository.
type License struct {
pattern *regexp.Regexp
matches map[string]*Match
Category string `json:"category"`
ValidType bool `json:"valid license"`
AllowedDirs []string
BadLicenseUsage []string
sync.Mutex
}
// licenseByPattern implements sort.Interface for []*License based on the length of the Pattern field.
// Licenses with a "fuchsia" category are sorted above all other licenses.
type licenseByPattern []*License
func (a licenseByPattern) Len() int { return len(a) }
func (a licenseByPattern) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a licenseByPattern) Less(i, j int) bool {
l := strings.Contains(a[i].Category, "fuchsia")
r := strings.Contains(a[j].Category, "fuchsia")
if l != r {
return l
}
return len(a[i].pattern.String()) < len(a[j].pattern.String())
}
// NewLicense returns a License object with the regex pattern loaded from the .lic folder.
// Some preprocessing is done to the pattern (e.g. removing code comment characters).
func NewLicense(path string, config *Config) (*License, error) {
bytes, err := ioutil.ReadFile(path)
if err != nil {
return nil, err
}
regex := string(bytes)
// Update regex to ignore multiple white spaces, newlines, comments.
// But first, trim whitespace away so we don't include unnecessary
// comment syntax.
regex = strings.Trim(regex, "\n ")
regex = strings.ReplaceAll(regex, "\n", `([\s\\#\*\/]|\^L)*`)
regex = strings.ReplaceAll(regex, " ", `([\s\\#\*\/]|\^L)*`)
re, err := regexp.Compile(regex)
if err != nil {
return nil, fmt.Errorf("%s: %w", path, err)
}
isValid := !(contains(config.ProhibitedLicenseTypes, filepath.Base(path)) || contains(config.ProhibitedLicenseTypes, filepath.Dir(path)))
return &License{
pattern: re,
Category: filepath.Base(path),
ValidType: isValid,
matches: map[string]*Match{},
AllowedDirs: config.LicenseAllowList[filepath.Base(path)],
BadLicenseUsage: []string{},
}, nil
}
// NOTICE files currently need to be processed differently compared to regular single-license files.
// This custom license type allows us to collect and present them properly in the final output file.
func NewCustomLicense(name string) *License {
regex := "(?s)(?P<text>.*)"
re, _ := regexp.Compile(regex)
return &License{
pattern: re,
Category: "custom",
ValidType: true,
matches: map[string]*Match{},
BadLicenseUsage: []string{},
}
}
func (l *License) CheckAllowList(path string) bool {
allowed := true
if len(l.AllowedDirs) > 0 && !contains(l.AllowedDirs, path) {
allowed = false
if l.ValidType {
l.BadLicenseUsage = append(l.BadLicenseUsage, path)
}
}
return allowed
}
// Search the given data slice for text that matches this License pattern.
func (l *License) Search(data []byte, file string) (bool, *Match) {
if m := l.pattern.FindSubmatch(data); m != nil {
l.CheckAllowList(file)
match := NewMatch(m, file, l.pattern)
l.Lock()
key := hash(match.Text)
if l.matches[key] == nil {
l.matches[key] = match
} else {
l.matches[key].merge(match)
}
l.Unlock()
return true, match
}
return false, nil
}
// Many license texts differ by whitespaces, and are otherwise identical.
// This hash function lets us tweak how we dedup the license texts.
func hash(s string) string {
// For the sake of hashing, ignore code comment characters ('#', '//')
s = strings.ReplaceAll(s, "#", " ")
s = strings.ReplaceAll(s, "//", " ")
// Ignore bullet point markings ('*', '-')
s = strings.ReplaceAll(s, "*", " ")
s = strings.ReplaceAll(s, "-", " ")
return strings.Join(strings.Fields(s), " ")
}