| // Copyright 2017 Google Inc. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| // Package serializer normalizes the license text and calculates the hash |
| // values for all substrings in the license. It then outputs the normalized |
| // text and hashes to disk in a compressed archive. |
| package serializer |
| |
| import ( |
| "archive/tar" |
| "bytes" |
| "compress/gzip" |
| "io" |
| "log" |
| "path/filepath" |
| "strings" |
| |
| "github.com/google/licenseclassifier" |
| "github.com/google/licenseclassifier/stringclassifier/searchset" |
| ) |
| |
| // ArchiveLicenses takes all of the known license texts, normalizes them, then |
| // calculates the hash values of all substrings. The resulting normalized text |
| // and hashed substring values are then serialized into an archive file. |
| func ArchiveLicenses(licenses []string, w io.Writer) error { |
| gw := gzip.NewWriter(w) |
| defer gw.Close() |
| |
| tw := tar.NewWriter(gw) |
| for _, license := range licenses { |
| // All license files have a ".txt" extension. |
| ext := filepath.Ext(license) |
| if ext != ".txt" { |
| continue |
| } |
| |
| contents, err := licenseclassifier.ReadLicenseFile(license) |
| if err != nil { |
| return err |
| } |
| |
| str := licenseclassifier.TrimExtraneousTrailingText(string(contents)) |
| for _, n := range licenseclassifier.Normalizers { |
| str = n(str) |
| } |
| |
| baseName := strings.TrimSuffix(filepath.Base(license), ext) |
| |
| // Serialize the normalized license text. |
| log.Printf("Serializing %q", baseName) |
| hdr := &tar.Header{ |
| Name: filepath.Base(license), |
| Mode: 0644, |
| Size: int64(len(str)), |
| } |
| |
| if err := tw.WriteHeader(hdr); err != nil { |
| return err |
| } |
| if _, err := tw.Write([]byte(str)); err != nil { |
| return err |
| } |
| |
| // Calculate the substrings' checksums |
| set := searchset.New(str, searchset.DefaultGranularity) |
| |
| var s bytes.Buffer |
| if err := set.Serialize(&s); err != nil { |
| return err |
| } |
| |
| // Serialize the checksums. |
| hdr = &tar.Header{ |
| Name: baseName + ".hash", |
| Mode: 0644, |
| Size: int64(s.Len()), |
| } |
| |
| if err := tw.WriteHeader(hdr); err != nil { |
| return err |
| } |
| if _, err := tw.Write(s.Bytes()); err != nil { |
| return err |
| } |
| } |
| |
| return tw.Close() |
| } |