| // Copyright 2017 Google Inc. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| // The identify_license program tries to identify the license type of an |
| // unknown license. The file containing the license text is specified on the |
| // command line. Multiple license files can be analyzed with a single command. |
| // The type of the license is returned along with the confidence level of the |
| // match. The confidence level is between 0.0 and 1.0, with 1.0 indicating an |
| // exact match and 0.0 indicating a complete mismatch. The results are sorted |
| // by confidence level. |
| // |
| // $ identifylicense <LICENSE_OR_DIRECTORY> <LICENSE_OR_DIRECTORY> ... |
| // LICENSE2: MIT (confidence: 0.987) |
| // LICENSE1: BSD-2-Clause (confidence: 0.833) |
| package main |
| |
| import ( |
| "context" |
| "encoding/json" |
| "flag" |
| "fmt" |
| "strings" |
| |
| //"google3/file/base/go/contrib/walk/walk" |
| //"google3/file/base/go/file" |
| "io/fs" |
| "io/ioutil" |
| "log" |
| "os" |
| "path/filepath" |
| "regexp" |
| "sort" |
| "time" |
| |
| classifier "github.com/google/licenseclassifier/v2" |
| "github.com/google/licenseclassifier/v2/tools/identify_license/backend" |
| "github.com/google/licenseclassifier/v2/tools/identify_license/results" |
| ) |
| |
| var ( |
| headers = flag.Bool("headers", false, "match license headers") |
| jsonFname = flag.String("json", "", "filename to write JSON output to.") |
| includeText = flag.Bool("include_text", false, "include the license text in the JSON output") |
| numTasks = flag.Int("tasks", 1000, "the number of license scanning tasks running concurrently") |
| timeout = flag.Duration("timeout", 24*time.Hour, "timeout before giving up on classifying a file.") |
| tracePhases = flag.String("trace_phases", "", "comma-separated list of phases of the license classifier to trace") |
| traceLicenses = flag.String("trace_licenses", "", "comma-separated list of licenses for the license classifier to trace") |
| ignorePaths = flag.String("ignore_paths_re", "", "comma-separated list of regular expressions that match file paths to ignore") |
| ) |
| |
| // expandFiles recursively returns a list of files stored in a list of |
| // directories. If an input is not a directory, it is added to the output list. |
| func expandFiles(ctx context.Context, paths []string) ([]string, error) { |
| var finalPaths []string |
| |
| ip, err := parseIgnorePaths() |
| if err != nil { |
| return nil, fmt.Errorf("could not parse ignore paths: %v", err) |
| } |
| |
| handleFile := func(path string) { |
| if shouldIgnore(ip, path) { |
| return |
| } |
| finalPaths = append(finalPaths, path) |
| } |
| |
| for _, p := range paths { |
| p, err := filepath.Abs(p) |
| if err != nil { |
| return nil, err |
| } |
| |
| err = filepath.Walk(p, func(path string, info os.FileInfo, err error) error { |
| if err != nil { |
| return err |
| } |
| if info.IsDir() { |
| if shouldIgnore(ip, info.Name()) { |
| return fs.SkipDir |
| } |
| return nil // walk the directory |
| } |
| handleFile(path) |
| return nil |
| }) |
| if err != nil { |
| return nil, err |
| } |
| } |
| return finalPaths, nil |
| } |
| |
| func shouldIgnore(ignorePaths []*regexp.Regexp, path string) bool { |
| for _, r := range ignorePaths { |
| if exactRegexMatch(r, path) { |
| return true |
| } |
| } |
| return false |
| } |
| |
| func exactRegexMatch(r *regexp.Regexp, s string) bool { |
| m := r.FindStringIndex(s) |
| if m == nil { |
| return false |
| } |
| return (m[0] == 0) && (m[1] == len(s)) |
| } |
| |
| func parseIgnorePaths() (out []*regexp.Regexp, err error) { |
| for _, p := range strings.Split(*ignorePaths, ",") { |
| r, err := regexp.Compile(p) |
| if err != nil { |
| return nil, err |
| } |
| out = append(out, r) |
| } |
| return out, nil |
| } |
| |
| // outputJSON writes the output formatted as JSON to a file. |
| func outputJSON(filename *string, res results.LicenseTypes, includeText bool) error { |
| d, err := results.NewJSONResult(res, includeText) |
| if err != nil { |
| return err |
| } |
| fc, err := json.MarshalIndent(d, "", " ") |
| if err != nil { |
| return err |
| } |
| return ioutil.WriteFile(*filename, fc, 0644) |
| } |
| |
| func init() { |
| flag.Usage = func() { |
| fmt.Fprintf(os.Stderr, `Usage: %s <licensefile> ... |
| |
| Identify an unknown license. |
| |
| Options: |
| `, filepath.Base(os.Args[0])) |
| flag.PrintDefaults() |
| } |
| } |
| |
| func main() { |
| flag.Parse() |
| |
| be, err := backend.New() |
| if err != nil { |
| log.Fatalf("cannot create license classifier: %v", err) |
| } |
| |
| paths, err := expandFiles(context.Background(), flag.Args()) |
| defer be.Close() |
| be.SetTraceConfiguration( |
| &classifier.TraceConfiguration{ |
| TracePhases: *tracePhases, |
| TraceLicenses: *traceLicenses, |
| }) |
| |
| ctx, cancel := context.WithTimeout(context.Background(), *timeout) |
| defer cancel() |
| if errs := be.ClassifyLicensesWithContext(ctx, *numTasks, paths, *headers); errs != nil { |
| be.Close() |
| for _, err := range errs { |
| log.Printf("classify license failed: %v", err) |
| } |
| log.Fatal("cannot classify licenses") |
| } |
| |
| results := be.GetResults() |
| if len(results) == 0 { |
| log.Fatal("Couldn't classify license(s)") |
| } |
| |
| sort.Sort(results) |
| for _, r := range results { |
| name := r.Name |
| if r.MatchType != "License" && r.MatchType != "Header" { |
| name = fmt.Sprintf("%s:%s", r.MatchType, r.Name) |
| } |
| fmt.Printf("%s %s (variant: %v, confidence: %v, start: %v, end: %v)\n", |
| r.Filename, name, r.Variant, r.Confidence, r.StartLine, r.EndLine) |
| } |
| if len(*jsonFname) > 0 { |
| err = outputJSON(jsonFname, results, *includeText) |
| if err != nil { |
| log.Fatalf("Couldn't write JSON output to file %s: %v", *jsonFname, err) |
| } |
| } |
| } |