blob: 65697d96ffbea1f9b74c41fd77542f9f2c07747d [file] [log] [blame]
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package serializer normalizes the license text and calculates the hash
// values for all substrings in the license. It then outputs the normalized
// text and hashes to disk in a compressed archive.
package serializer
import (
"archive/tar"
"bytes"
"compress/gzip"
"io"
"log"
"path/filepath"
"strings"
"github.com/google/licenseclassifier"
"github.com/google/licenseclassifier/stringclassifier/searchset"
)
// ArchiveLicenses takes all of the known license texts, normalizes them, then
// calculates the hash values of all substrings. The resulting normalized text
// and hashed substring values are then serialized into an archive file.
func ArchiveLicenses(licenses []string, w io.Writer) error {
gw := gzip.NewWriter(w)
defer gw.Close()
tw := tar.NewWriter(gw)
for _, license := range licenses {
// All license files have a ".txt" extension.
ext := filepath.Ext(license)
if ext != ".txt" {
continue
}
contents, err := licenseclassifier.ReadLicenseFile(license)
if err != nil {
return err
}
str := licenseclassifier.TrimExtraneousTrailingText(string(contents))
for _, n := range licenseclassifier.Normalizers {
str = n(str)
}
baseName := strings.TrimSuffix(filepath.Base(license), ext)
// Serialize the normalized license text.
log.Printf("Serializing %q", baseName)
hdr := &tar.Header{
Name: filepath.Base(license),
Mode: 0644,
Size: int64(len(str)),
}
if err := tw.WriteHeader(hdr); err != nil {
return err
}
if _, err := tw.Write([]byte(str)); err != nil {
return err
}
// Calculate the substrings' checksums
set := searchset.New(str, searchset.DefaultGranularity)
var s bytes.Buffer
if err := set.Serialize(&s); err != nil {
return err
}
// Serialize the checksums.
hdr = &tar.Header{
Name: baseName + ".hash",
Mode: 0644,
Size: int64(s.Len()),
}
if err := tw.WriteHeader(hdr); err != nil {
return err
}
if _, err := tw.Write(s.Bytes()); err != nil {
return err
}
}
return tw.Close()
}