blob: 21b4dc14cee7c8c56b2d03313bef92a731132a0f [file] [log] [blame]
#!/usr/bin/env fuchsia-vendored-python
# Copyright 2023 The Fuchsia Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Generates licenses SPDX from GN metadata."""
import json
import hashlib
import dataclasses
from file_access import FileAccess
from gn_label import GnLabel
from typing import Callable, Dict, List, Any, Tuple, TypeAlias
import os
AnyDict: TypeAlias = Dict[Any, Any]
@dataclasses.dataclass(frozen=False)
class SpdxWriter:
"SPDX json file writer"
file_access: FileAccess
document_id: str
root_package_id: str
root_package_name: str
json_document: Dict[str, Any] = dataclasses.field(default_factory=dict)
# Once _init_json is called, the following collections, are referenced
# within the json_document, and further changes to them will be reflected
# in the json outputted by the save* methods.
document_describes: List["str"] = dataclasses.field(default_factory=list)
packages: List[Dict[str, Any]] = dataclasses.field(default_factory=list)
relationships: List[Dict[str, Any]] = dataclasses.field(
default_factory=list
)
extracted_licenses: List[Dict[str, Any]] = dataclasses.field(
default_factory=list
)
license_json_by_ref: Dict[str, AnyDict] = dataclasses.field(
default_factory=dict
)
package_json_by_ids: Dict[str, AnyDict] = dataclasses.field(
default_factory=dict
)
@staticmethod
def create(root_package_name: str, file_access: FileAccess) -> "SpdxWriter":
writer = SpdxWriter(
file_access=file_access,
document_id="SPDXRef-DOCUMENT",
root_package_id="SPDXRef-Package-Root",
root_package_name=root_package_name,
)
writer._init_json()
return writer
def _init_json(self) -> None:
self.json_document.update(
{
"spdxVersion": "SPDX-2.3",
"SPDXID": self.document_id,
"name": self.root_package_name,
"documentNamespace": "",
"creationInfo": {
"creators": [f"Tool: {os.path.basename(__file__)}"],
},
"dataLicense": "CC0-1.0",
"documentDescribes": self.document_describes,
"packages": self.packages,
"relationships": self.relationships,
"hasExtractedLicensingInfos": self.extracted_licenses,
}
)
self.document_describes.append(self.root_package_id)
self.packages.append(
{
"SPDXID": self.root_package_id,
"name": self.root_package_name,
}
)
def add_license(
self,
public_package_name: str,
license_labels: Tuple[GnLabel],
collection_hint: str,
) -> None:
package_id = self._spdx_package_id(public_package_name, license_labels)
if package_id in self.package_json_by_ids:
# since the package_id is derived by the package name and license paths,
# we can assume that if we already added this id, no need to add
# new package or license elements.
return
license_refs = []
for license_label in license_labels:
license_ref = self._spdx_license_ref(
public_package_name, license_label
)
license_refs.append(license_ref)
if license_ref not in self.license_json_by_ref:
license_text = self.file_access.read_text(license_label)
license_text = (
license_text.strip()
) # Remove trailing whitespace
extracted_license = {
"name": public_package_name,
"licenseId": license_ref,
"extractedText": license_text,
"crossRefs": [
{
"url": license_label.code_search_url(),
}
],
}
if collection_hint:
extracted_license["_hint"] = collection_hint
self.license_json_by_ref[license_ref] = extracted_license
self.extracted_licenses.append(extracted_license)
package_json = {
"SPDXID": package_id,
"name": public_package_name,
"licenseConcluded": " AND ".join(license_refs),
}
self.package_json_by_ids[package_id] = package_json
self.document_describes.append(package_id)
self.packages.append(package_json)
self.relationships.append(
{
"spdxElementId": self.root_package_id,
"relatedSpdxElement": package_id,
"relationshipType": "CONTAINS",
}
)
def _sort_elements(self) -> None:
"""Sorts all output elements alphabetically.
This ensures consistent and developer-friendly output independent on input ordering.
"""
self.extracted_licenses.sort(
key=lambda x: x["name"].lower() + x["licenseId"]
)
self.packages.sort(key=lambda x: x["name"].lower() + x["SPDXID"])
self.document_describes.sort()
self.relationships.sort(
key=lambda x: x["spdxElementId"] + x["relatedSpdxElement"]
)
def save(self, file_path: str) -> None:
self._sort_elements()
with open(file_path, "w") as f:
json.dump(self.json_document, f, indent=4)
def save_to_string(self) -> str:
self._sort_elements()
return json.dumps(self.json_document, indent=4)
def _spdx_package_id(
self, public_package_name: str, license_labels: Tuple[GnLabel]
) -> str:
md5 = hashlib.md5()
md5.update(public_package_name.strip().encode("utf-8"))
for ll in license_labels:
md5.update(str(ll.path_str).encode("utf-8"))
digest = md5.hexdigest()
return f"SPDXRef-Package-{digest}"
def _spdx_license_ref(
self, public_package_name: str, license_label: GnLabel
) -> str:
md5 = hashlib.md5()
md5.update(public_package_name.strip().encode("utf-8"))
md5.update(str(license_label.path_str).encode("utf-8"))
digest = md5.hexdigest()
return f"LicenseRef-{digest}"