build/licenses/python/spdx_writer.py - fuchsia - Git at Google

 #!/usr/bin/env fuchsia-vendored-python
 # Copyright 2023 The Fuchsia Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 """Generates licenses SPDX from GN metadata."""

 import json
 import hashlib
 import dataclasses
 from file_access import FileAccess
 from gn_label import GnLabel
 from typing import Callable, Dict, List, Any, Tuple, TypeAlias
 import os

 AnyDict: TypeAlias = Dict[Any, Any]


 @dataclasses.dataclass(frozen=False)
 class SpdxWriter:
     "SPDX json file writer"

     file_access: FileAccess
     document_id: str
     root_package_id: str
     root_package_name: str

     json_document: Dict[str, Any] = dataclasses.field(default_factory=dict)

     # Once _init_json is called, the following collections, are referenced
     # within the json_document, and further changes to them will be reflected
     # in the json outputted by the save* methods.

     document_describes: List["str"] = dataclasses.field(default_factory=list)
     packages: List[Dict[str, Any]] = dataclasses.field(default_factory=list)
     relationships: List[Dict[str, Any]] = dataclasses.field(
         default_factory=list
     )
     extracted_licenses: List[Dict[str, Any]] = dataclasses.field(
         default_factory=list
     )
     license_json_by_ref: Dict[str, AnyDict] = dataclasses.field(
         default_factory=dict
     )
     package_json_by_ids: Dict[str, AnyDict] = dataclasses.field(
         default_factory=dict
     )

     @staticmethod
     def create(root_package_name: str, file_access: FileAccess) -> "SpdxWriter":
         writer = SpdxWriter(
             file_access=file_access,
             document_id="SPDXRef-DOCUMENT",
             root_package_id="SPDXRef-Package-Root",
             root_package_name=root_package_name,
         )
         writer._init_json()
         return writer

     def _init_json(self) -> None:
         self.json_document.update(
             {
                 "spdxVersion": "SPDX-2.3",
                 "SPDXID": self.document_id,
                 "name": self.root_package_name,
                 "documentNamespace": "",
                 "creationInfo": {
                     "creators": [f"Tool: {os.path.basename(__file__)}"],
                 },
                 "dataLicense": "CC0-1.0",
                 "documentDescribes": self.document_describes,
                 "packages": self.packages,
                 "relationships": self.relationships,
                 "hasExtractedLicensingInfos": self.extracted_licenses,
             }
         )

         self.document_describes.append(self.root_package_id)
         self.packages.append(
             {
                 "SPDXID": self.root_package_id,
                 "name": self.root_package_name,
             }
         )

     def add_license(
         self,
         public_package_name: str,
         license_labels: Tuple[GnLabel],
         collection_hint: str,
     ) -> None:
         package_id = self._spdx_package_id(public_package_name, license_labels)

         if package_id in self.package_json_by_ids:
             # since the package_id is derived by the package name and license paths,
             # we can assume that if we already added this id, no need to add
             # new package or license elements.
             return

         license_refs = []
         for license_label in license_labels:
             license_ref = self._spdx_license_ref(
                 public_package_name, license_label
             )
             license_refs.append(license_ref)

             if license_ref not in self.license_json_by_ref:
                 license_text = self.file_access.read_text(license_label)
                 license_text = (
                     license_text.strip()
                 )  # Remove trailing whitespace
                 extracted_license = {
                     "name": public_package_name,
                     "licenseId": license_ref,
                     "extractedText": license_text,
                     "crossRefs": [
                         {
                             "url": license_label.code_search_url(),
                         }
                     ],
                 }
                 if collection_hint:
                     extracted_license["_hint"] = collection_hint
                 self.license_json_by_ref[license_ref] = extracted_license
                 self.extracted_licenses.append(extracted_license)

         package_json = {
             "SPDXID": package_id,
             "name": public_package_name,
             "licenseConcluded": " AND ".join(license_refs),
         }
         self.package_json_by_ids[package_id] = package_json
         self.document_describes.append(package_id)
         self.packages.append(package_json)
         self.relationships.append(
             {
                 "spdxElementId": self.root_package_id,
                 "relatedSpdxElement": package_id,
                 "relationshipType": "CONTAINS",
             }
         )

     def _sort_elements(self) -> None:
         """Sorts all output elements alphabetically.

         This ensures consistent and developer-friendly output independent on input ordering.
         """
         self.extracted_licenses.sort(
             key=lambda x: x["name"].lower() + x["licenseId"]
         )
         self.packages.sort(key=lambda x: x["name"].lower() + x["SPDXID"])
         self.document_describes.sort()
         self.relationships.sort(
             key=lambda x: x["spdxElementId"] + x["relatedSpdxElement"]
         )

     def save(self, file_path: str) -> None:
         self._sort_elements()
         with open(file_path, "w") as f:
             json.dump(self.json_document, f, indent=4)

     def save_to_string(self) -> str:
         self._sort_elements()
         return json.dumps(self.json_document, indent=4)

     def _spdx_package_id(
         self, public_package_name: str, license_labels: Tuple[GnLabel]
     ) -> str:
         md5 = hashlib.md5()
         md5.update(public_package_name.strip().encode("utf-8"))
         for ll in license_labels:
             md5.update(str(ll.path_str).encode("utf-8"))
         digest = md5.hexdigest()
         return f"SPDXRef-Package-{digest}"

     def _spdx_license_ref(
         self, public_package_name: str, license_label: GnLabel
     ) -> str:
         md5 = hashlib.md5()
         md5.update(public_package_name.strip().encode("utf-8"))
         md5.update(str(license_label.path_str).encode("utf-8"))
         digest = md5.hexdigest()
         return f"LicenseRef-{digest}"
	#!/usr/bin/env fuchsia-vendored-python
	# Copyright 2023 The Fuchsia Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.
	"""Generates licenses SPDX from GN metadata."""

	import json
	import hashlib
	import dataclasses
	from file_access import FileAccess
	from gn_label import GnLabel
	from typing import Callable, Dict, List, Any, Tuple, TypeAlias
	import os

	AnyDict: TypeAlias = Dict[Any, Any]


	@dataclasses.dataclass(frozen=False)
	class SpdxWriter:
	"SPDX json file writer"

	file_access: FileAccess
	document_id: str
	root_package_id: str
	root_package_name: str

	json_document: Dict[str, Any] = dataclasses.field(default_factory=dict)

	# Once _init_json is called, the following collections, are referenced
	# within the json_document, and further changes to them will be reflected
	# in the json outputted by the save* methods.

	document_describes: List["str"] = dataclasses.field(default_factory=list)
	packages: List[Dict[str, Any]] = dataclasses.field(default_factory=list)
	relationships: List[Dict[str, Any]] = dataclasses.field(
	default_factory=list
	)
	extracted_licenses: List[Dict[str, Any]] = dataclasses.field(
	default_factory=list
	)
	license_json_by_ref: Dict[str, AnyDict] = dataclasses.field(
	default_factory=dict
	)
	package_json_by_ids: Dict[str, AnyDict] = dataclasses.field(
	default_factory=dict
	)

	@staticmethod
	def create(root_package_name: str, file_access: FileAccess) -> "SpdxWriter":
	writer = SpdxWriter(
	file_access=file_access,
	document_id="SPDXRef-DOCUMENT",
	root_package_id="SPDXRef-Package-Root",
	root_package_name=root_package_name,
	)
	writer._init_json()
	return writer

	def _init_json(self) -> None:
	self.json_document.update(
	{
	"spdxVersion": "SPDX-2.3",
	"SPDXID": self.document_id,
	"name": self.root_package_name,
	"documentNamespace": "",
	"creationInfo": {
	"creators": [f"Tool: {os.path.basename(__file__)}"],
	},
	"dataLicense": "CC0-1.0",
	"documentDescribes": self.document_describes,
	"packages": self.packages,
	"relationships": self.relationships,
	"hasExtractedLicensingInfos": self.extracted_licenses,
	}
	)

	self.document_describes.append(self.root_package_id)
	self.packages.append(
	{
	"SPDXID": self.root_package_id,
	"name": self.root_package_name,
	}
	)

	def add_license(
	self,
	public_package_name: str,
	license_labels: Tuple[GnLabel],
	collection_hint: str,
	) -> None:
	package_id = self._spdx_package_id(public_package_name, license_labels)

	if package_id in self.package_json_by_ids:
	# since the package_id is derived by the package name and license paths,
	# we can assume that if we already added this id, no need to add
	# new package or license elements.
	return

	license_refs = []
	for license_label in license_labels:
	license_ref = self._spdx_license_ref(
	public_package_name, license_label
	)
	license_refs.append(license_ref)

	if license_ref not in self.license_json_by_ref:
	license_text = self.file_access.read_text(license_label)
	license_text = (
	license_text.strip()
	) # Remove trailing whitespace
	extracted_license = {
	"name": public_package_name,
	"licenseId": license_ref,
	"extractedText": license_text,
	"crossRefs": [
	{
	"url": license_label.code_search_url(),
	}
	],
	}
	if collection_hint:
	extracted_license["_hint"] = collection_hint
	self.license_json_by_ref[license_ref] = extracted_license
	self.extracted_licenses.append(extracted_license)

	package_json = {
	"SPDXID": package_id,
	"name": public_package_name,
	"licenseConcluded": " AND ".join(license_refs),
	}
	self.package_json_by_ids[package_id] = package_json
	self.document_describes.append(package_id)
	self.packages.append(package_json)
	self.relationships.append(
	{
	"spdxElementId": self.root_package_id,
	"relatedSpdxElement": package_id,
	"relationshipType": "CONTAINS",
	}
	)

	def _sort_elements(self) -> None:
	"""Sorts all output elements alphabetically.

	This ensures consistent and developer-friendly output independent on input ordering.
	"""
	self.extracted_licenses.sort(
	key=lambda x: x["name"].lower() + x["licenseId"]
	)
	self.packages.sort(key=lambda x: x["name"].lower() + x["SPDXID"])
	self.document_describes.sort()
	self.relationships.sort(
	key=lambda x: x["spdxElementId"] + x["relatedSpdxElement"]
	)

	def save(self, file_path: str) -> None:
	self._sort_elements()
	with open(file_path, "w") as f:
	json.dump(self.json_document, f, indent=4)

	def save_to_string(self) -> str:
	self._sort_elements()
	return json.dumps(self.json_document, indent=4)

	def _spdx_package_id(
	self, public_package_name: str, license_labels: Tuple[GnLabel]
	) -> str:
	md5 = hashlib.md5()
	md5.update(public_package_name.strip().encode("utf-8"))
	for ll in license_labels:
	md5.update(str(ll.path_str).encode("utf-8"))
	digest = md5.hexdigest()
	return f"SPDXRef-Package-{digest}"

	def _spdx_license_ref(
	self, public_package_name: str, license_label: GnLabel
	) -> str:
	md5 = hashlib.md5()
	md5.update(public_package_name.strip().encode("utf-8"))
	md5.update(str(license_label.path_str).encode("utf-8"))
	digest = md5.hexdigest()
	return f"LicenseRef-{digest}"