bazel_rules_fuchsia/fuchsia/tools/licenses/classification_types.py - sdk-integration - Git at Google

 # Copyright 2022 The Fuchsia Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 """Types for classifying licenses"""

 from collections import defaultdict
 import csv
 import dataclasses
 import json
 from fuchsia.tools.licenses.common_types import *
 from fuchsia.tools.licenses.spdx_types import *
 from hashlib import md5
 from typing import Any, Callable, ClassVar, Dict, Pattern, List

 # Work-around for b/258523163. We need to guarantee that at least one license is
 # identified by https://github.com/google/licenseclassifier/tree/main/tools/identify_license
 # or it exists with an error.
 # TODO(b/258523163): Remove once fixed
 b258523163_workaround = 'b258523163_workaround.txt'


 @dataclasses.dataclass(frozen=True)
 class IdentifiedSnippet:
     """Information about a single license snippet (text part of a large license text)"""

     # 'identified_as' value for unidentified snippets.
     UNIDENTIFIED_IDENTIFICATION: ClassVar[str] = "[UNIDENTIFIED]"

     identified_as: str
     confidence: float
     start_line: int
     end_line: int

     condition: str = None
     # Conditions from overriding rules
     overriden_conditions: List[str] = None
     # Dependents that were not matched by any rule
     dependents_unmatched_by_overriding_rules: List[str] = None
     # all rules that matched this IdentifiedSnippet
     overriding_rules: List["ConditionOverrideRule"] = None

     # verification results
     verified: bool = None
     verification_message: str = None

     # checksum for snippet text
     snippet_checksum: str = None
     snippet_text: str = None

     # A suggested override rule
     suggested_override_rule: "ConditionOverrideRule" = None

     def create_empty(extracted_text_lines) -> "IdentifiedSnippet":
         return IdentifiedSnippet(
             identified_as=IdentifiedSnippet.UNIDENTIFIED_IDENTIFICATION,
             confidence=1.0,
             start_line=1,
             end_line=len(extracted_text_lines) + 1)

     def from_identify_license_dict(
             dictionary: Dict[str, Any], location: Any) -> "IdentifiedSnippet":
         """
         Create a IdentifiedSnippet instance from a dictionary in the output format of
         https://github.com/google/licenseclassifier/tree/main/tools/identify_license.

         i.e.
         {
             "Name": str
             "Confidence": int or float
             "StartLine": int
             "EndLine": int
         }
         """
         r = DictReader(dictionary, location)

         # Confidence could be an int or a float. Convert to a float.
         try:
             confidence = r.get('Confidence', expected_type=float)
         except LicenseException:
             confidence = float(r.get('Confidence', expected_type=int))

         return IdentifiedSnippet(
             identified_as=r.get('Name'),
             confidence=confidence,
             start_line=r.get('StartLine', expected_type=int),
             end_line=r.get('EndLine', expected_type=int))

     def to_json_dict(self):
         # The fields are output in a certain order to produce a more readable output.
         out = {
             "identified_as": self.identified_as,
             "condition": self.condition,
             "verified": self.verified,
         }

         if self.verification_message:
             out["verification_message"] = self.verification_message
         if self.overriden_conditions:
             out["overriden_conditions"] = self.overriden_conditions
         if self.dependents_unmatched_by_overriding_rules:
             out["dependents_unmatched_by_overriding_rules"] = self.dependents_unmatched_by_overriding_rules
         if self.overriding_rules:
             out["overriding_rules"] = [
                 r.to_json_dict() for r in self.overriding_rules
             ]
         if self.suggested_override_rule:
             out["suggested_override_rule"] = self.suggested_override_rule.to_json_dict(
             )

         out.update(
             {
                 "confidence": self.confidence,
                 "start_line": self.start_line,
                 "end_line": self.end_line,
                 "snippet_checksum": self.snippet_checksum,
                 "snippet_text": self.snippet_text,
             })
         return out

     def from_json_dict(reader: DictReader) -> "IdentifiedSnippet":
         suggested_override_rule = None
         if reader.exists("suggested_override_rule"):
             suggested_override_rule = ConditionOverrideRule.from_json_dict(
                 reader.get_reader("suggested_override_rule"), reader.location)

         overriding_rules = None
         if reader.exists("overriding_rules"):
             overriding_rules = [
                 ConditionOverrideRule.from_json_dict(r, reader.location)
                 for r in reader.get_readers_list("overriding_rules")
             ]

         return IdentifiedSnippet(
             identified_as=reader.get("identified_as"),
             condition=reader.get("condition"),
             verified=reader.get_or("verified", default=False),
             verification_message=reader.get_or(
                 "verification_message", default=None),
             overriden_conditions=reader.get_or(
                 "overriden_conditions", default=None, expected_type=list),
             dependents_unmatched_by_overriding_rules=reader.get_or(
                 "dependents_unmatched_by_overriding_rules",
                 default=None,
                 expected_type=list),
             overriding_rules=overriding_rules,
             suggested_override_rule=suggested_override_rule,
             confidence=reader.get("confidence", expected_type=float),
             start_line=reader.get("start_line", expected_type=int),
             end_line=reader.get("end_line", expected_type=int),
             snippet_checksum=reader.get("snippet_checksum"),
             snippet_text=reader.get("snippet_text"),
         )

     def number_of_lines(self):
         return self.start_line - self.end_line + 1

     def add_snippet_text(self, lines: List[str]):
         text = '\n'.join(lines[self.start_line - 1:self.end_line])
         checksum = md5(text.encode('utf-8')).hexdigest()
         return dataclasses.replace(
             self, snippet_text=text, snippet_checksum=checksum)

     def set_conditions(self, policy: "ConditionsPolicy"):
         return dataclasses.replace(
             self, condition=policy.get_condition(self.identified_as))

     def override_conditions(
             self, license: "LicenseClassification",
             rules: List["ConditionOverrideRule"]):
         all_matching_rules = []

         new_conditions = set()

         remaining_dependents = set(license.dependents)
         for rule in rules:
             # Check that the in optimization in LicenseClassification was applied
             assert rule.match_license_names.matches(license.name)

             # Match identification, checksome, condition
             if not rule.match_identifications.matches(self.identified_as):
                 continue
             if not rule.match_snippet_checksums.matches(self.snippet_checksum):
                 continue
             if not rule.match_conditions.matches(self.condition):
                 continue

             # Match dependents
             some_matching_dependents = rule.match_dependents.get_matches(
                 license.dependents)
             if not some_matching_dependents:
                 continue

             new_conditions.add(rule.override_condition_to)
             all_matching_rules.append(rule)
             for d in some_matching_dependents:
                 if d in remaining_dependents:
                     remaining_dependents.remove(d)

         if all_matching_rules:
             return dataclasses.replace(
                 self,
                 overriden_conditions=sorted(list(new_conditions)),
                 dependents_unmatched_by_overriding_rules=sorted(
                     list(remaining_dependents)),
                 overriding_rules=all_matching_rules,
             )
         else:
             return self

     def verify_conditions(
             self, license: "LicenseClassification",
             allowed_conditions: Set[str]):
         """Sets the 'verified' and 'verification_message' fields"""
         verified = True
         message = None
         diallowed_override_conditions = []
         if self.overriden_conditions:
             diallowed_override_conditions = [
                 c for c in self.overriden_conditions
                 if c not in allowed_conditions
             ]
         if not self.overriding_rules:
             # Simple case: No overriding rules were involved.
             if self.condition not in allowed_conditions:
                 verified = False
                 message = f"'{self.condition}' condition is not an allowed"
         elif diallowed_override_conditions:
             # Some overriding rules were involved: Check their overriding conditions.
             rule_paths = [
                 r.rule_file_path
                 for r in self.overriding_rules
                 if r.override_condition_to in diallowed_override_conditions
             ]
             verified = False
             message = f"The conditions {diallowed_override_conditions} are not allowed."\
                         f" They were introduced by these rules: {rule_paths}."
         elif self.dependents_unmatched_by_overriding_rules:
             # Some license dependents didn't match any rule. Check the original
             # conditions.
             rule_paths = [r.rule_file_path for r in self.overriding_rules]
             if self.condition not in allowed_conditions:
                 verified = False
                 message = f"The overriding rules {rule_paths} changed the conditions to " \
                     f"{self.overriden_conditions} but the rules don't match the dependencies " \
                     f"{self.dependents_unmatched_by_overriding_rules} that remain with the " \
                     f"condition '{self.condition} that is not allowed'."

         if verified:
             assert message == None
             suggested_override_rule = None
         else:
             assert message != None
             suggested_override_rule = ConditionOverrideRule.suggested_for_snippet(
                 license, self, allowed_conditions)

         return dataclasses.replace(
             self,
             verified=verified,
             verification_message=message,
             suggested_override_rule=suggested_override_rule)

     def detailed_verification_message(
             self, license: "LicenseClassification") -> str:
         """Returns a very detailed verification failure message or None"""

         if self.verified:
             return None

         dependents_str = "\n".join([f"  {d}" for d in license.dependents])
         license_links = "\n".join([f"  {l}" for l in license.links])
         snippet = self.snippet_text
         if len(snippet) > 200:
             snippet = snippet[0:200] + "<trunctated>"

         message = f"""
 License '{license.name}' has a snippet identified as '{self.identified_as}'.

 License links:
 {license_links}

 The license is depended on by:
 {dependents_str}

 Snippet begin line: {self.start_line}
 Snippet end line: {self.end_line}
 Snippet checksum: {self.snippet_checksum}
 Snippet: <begin>
 {snippet}
 <end>

 Verification message:
 {self.verification_message}

 To fix this verification problem you should either:
 1. Remove the dependency on projects with this license in the dependent code bases.
 2. If the dependency is required and approved by the legal council of your project,
    you apply a local condition override, such as:
 {json.dumps(self.suggested_override_rule.to_json_dict(), indent=4)}
 """
         return message


 @dataclasses.dataclass(frozen=True)
 class LicenseClassification:
     """Classification results for a single license"""

     license_id: str
     identifications: List[IdentifiedSnippet]
     name: str = None
     links: List[str] = None
     dependents: List[str] = None

     # license size & identification stats
     size_bytes: int = None
     size_lines: int = None
     unidentified_lines: int = None

     def to_json_dict(self):
         out = {
             "license_id": self.license_id,
             "name": self.name,
             "links": self.links,
             "dependents": self.dependents,
             "identifications": [m.to_json_dict() for m in self.identifications],
             "identification_stats":
                 {
                     "size_bytes": self.size_bytes,
                     "size_lines": self.size_lines,
                     "unidentified_lines": self.unidentified_lines,
                 },
         }

         return out

     def from_json_dict(reader: DictReader) -> "LicenseClassification":
         identifications = [
             IdentifiedSnippet.from_json_dict(r)
             for r in reader.get_readers_list("identifications")
         ]
         stats_reader = reader.get_reader("identification_stats")

         return LicenseClassification(
             license_id=reader.get("license_id"),
             name=reader.get("name"),
             links=reader.get_string_list("links"),
             dependents=reader.get_string_list("dependents"),
             identifications=identifications,
             size_bytes=stats_reader.get_or(
                 "size_bytes", default=None, expected_type=int,
                 accept_none=True),
             size_lines=stats_reader.get_or(
                 "size_lines", default=None, expected_type=int,
                 accept_none=True),
             unidentified_lines=stats_reader.get_or(
                 "unidentified_lines",
                 default=None,
                 expected_type=int,
                 accept_none=True),
         )

     def add_license_information(self, index: SpdxIndex):
         spdx_license = index.get_license_by_id(self.license_id)
         snippet_lines = spdx_license.extracted_text_lines()
         identifications = [
             i.add_snippet_text(snippet_lines) for i in self.identifications
         ]
         links = []
         if spdx_license.cross_refs:
             links.extend(spdx_license.cross_refs)
         if spdx_license.see_also:
             links.extend(spdx_license.see_also)
         chains = index.dependency_chains_for_license(spdx_license.license_id)
         dependents = [">".join([p.name for p in chain]) for chain in chains]
         # Sort and dedup dependent chains: There might be duplicate chains since
         # the package names are not globally unique.
         dependents = sorted(set(dependents))
         return dataclasses.replace(
             self,
             identifications=identifications,
             name=spdx_license.name,
             links=links,
             dependents=dependents,
         )

     def compute_identification_stats(self, index: SpdxIndex):
         spdx_license = index.get_license_by_id(self.license_id)

         extracted_text = spdx_license.extracted_text
         extracted_lines = spdx_license.extracted_text_lines()

         lines_identified = 0
         for identification in self.identifications:
             lines_identified += identification.number_of_lines()

         return dataclasses.replace(
             self,
             size_bytes=len(extracted_text),
             size_lines=len(extracted_lines),
             unidentified_lines=len(extracted_lines) - lines_identified,
         )

     def _transform_identifications(self, function) -> "LicenseClassification":
         """Returns a copy of this object with the identifications transformed by function"""
         return dataclasses.replace(
             self, identifications=[function(i) for i in self.identifications])

     def set_conditions(self, policy: "ConditionsPolicy"):
         return self._transform_identifications(
             lambda x: x.set_conditions(policy))

     def override_conditions(self, rule_set: "ConditionOverrideRuleSet"):
         # Optimize by filtering rules that match the license name and any dependents
         relevant_rules = []
         for rule in rule_set.rules:
             if rule.match_license_names.matches(self.name):
                 if rule.match_dependents.matches_any(self.dependents):
                     relevant_rules.append(rule)

         if relevant_rules:
             return self._transform_identifications(
                 lambda x: x.override_conditions(self, relevant_rules))
         else:
             return self

     def verify_conditions(self, allowed_conditions: Set[str]):
         return self._transform_identifications(
             lambda x: x.verify_conditions(self, allowed_conditions))

     def verification_errors(self) -> List[str]:
         out = []
         for i in self.identifications:
             msg = i.detailed_verification_message(self)
             if msg:
                 out.append(msg)
         return out


 @dataclasses.dataclass(frozen=True)
 class LicensesClassifications:
     classifications_by_id: Dict[str, LicenseClassification]

     def create_empty() -> "LicenseClassification":
         return LicensesClassifications(classifications_by_id={})

     def from_identify_license_output_json(
         identify_license_output_path: str,
         license_paths_by_license_id: Dict[str,
                                           str]) -> "LicensesClassifications":
         json_output = json.load(open(identify_license_output_path, 'r'))
         # Expected results from https://github.com/google/licenseclassifier/tree/main/tools/identify_license
         # have the following json layout:
         # [
         #     {
         #         "Filepath": ...
         #         "Classifications: [
         #             {
         #                 "Name": ...
         #                 "Confidence": int or float
         #                 "StartLine": int
         #                 "EndLine": int
         #             },
         #             { ...},
         #             ...
         #         ]
         #     },
         #     { ... },
         #     ...
         # ]

         results_by_file_path = {}
         for one_output in json_output:
             file_name = one_output['Filepath']
             if file_name == b258523163_workaround:
                 continue
             results_by_file_path[file_name] = one_output['Classifications']

         identifications_by_license_id = defaultdict(list)
         for license_id, file_name in license_paths_by_license_id.items():
             if file_name in results_by_file_path.keys():
                 for match_json in results_by_file_path[file_name]:
                     identifications_by_license_id[license_id].append(
                         IdentifiedSnippet.from_identify_license_dict(
                             dictionary=match_json,
                             location=identify_license_output_path))
         license_classifications = {}
         for license_id, identifications in identifications_by_license_id.items(
         ):
             license_classifications[license_id] = LicenseClassification(
                 license_id=license_id, identifications=identifications)

         return LicensesClassifications(license_classifications)

     def to_json_list(self) -> List[Any]:
         output = []
         for license_id in sorted(self.classifications_by_id.keys()):
             output.append(self.classifications_by_id[license_id].to_json_dict())
         return output

     def to_json(self, json_file_path: str):
         with open(json_file_path, 'w') as output_file:
             json.dump(self.to_json_list(), output_file, indent=4)

     def from_json_list(
             input: List[Any], location: str) -> "LicensesClassifications":
         if not isinstance(input, List):
             raise LicenseException(
                 f"Expected a list of classification json values, but got {type(input)}",
                 location)
         classifications_by_id = {}
         for value in input:
             if not isinstance(value, dict):
                 raise LicenseException(
                     f"Expected json dict but got {type(input)}", location)
             value_reader = DictReader(value, location)
             classification = LicenseClassification.from_json_dict(value_reader)
             if classification.license_id in classifications_by_id:
                 raise LicenseException(
                     f"Multiple classifications with license_id '{classification.license_id}'",
                     location)
             classifications_by_id[classification.license_id] = classification

         return LicensesClassifications(classifications_by_id)

     def from_json(json_file_path: str) -> "LicensesClassifications":
         with open(json_file_path, "r") as f:
             try:
                 json_obj = json.load(f)
             except json.decoder.JSONDecodeError as e:
                 raise LicenseException(
                     f"Failed to parse json: {e}", json_file_path)
             return LicensesClassifications.from_json_list(
                 json_obj, json_file_path)

     def _transform(
         self, function: Callable[[LicenseClassification], LicenseClassification]
     ) -> "LicensesClassifications":
         """Returns a copy of this object with the classifications transformed by function"""
         new = self.classifications_by_id.copy()
         for k, v in new.items():
             new[k] = function(v)
         return dataclasses.replace(self, classifications_by_id=new)

     def set_conditions(
             self, policy: "ConditionsPolicy") -> "LicensesClassifications":
         return self._transform(lambda x: x.set_conditions(policy))

     def add_classifications(
             self,
             to_add: List[LicenseClassification]) -> "LicensesClassifications":
         new = self.classifications_by_id.copy()
         for license_classification in to_add:
             license_id = license_classification.license_id
             assert license_id not in new, f"{license_id} already exists"
             new[license_id] = license_classification
         return dataclasses.replace(self, classifications_by_id=new)

     def add_licenses_information(self, spdx_index: SpdxIndex):
         return self._transform(lambda x: x.add_license_information(spdx_index))

     def compute_identification_stats(self, spdx_index: SpdxIndex):
         return self._transform(
             lambda x: x.compute_identification_stats(spdx_index))

     def override_conditions(
             self,
             rule_set: "ConditionOverrideRuleSet") -> "LicensesClassifications":
         return self._transform(lambda x: x.override_conditions(rule_set))

     def verify_conditions(
             self, allowed_conditions: Set[str]) -> "LicensesClassifications":
         return self._transform(
             lambda x: x.verify_conditions(allowed_conditions))

     def verification_errors(self):
         error_messages = []
         for c in self.classifications_by_id.values():
             error_messages.extend(c.verification_errors())
         return error_messages

     def identifications_count(self):
         c = 0
         for v in self.classifications_by_id.values():
             c += len(v.identifications)
         return c

     def failed_verifications_count(self):
         c = 0
         for v in self.classifications_by_id.values():
             for i in v.identifications:
                 if not i.verified:
                     c += 1
         return c

     def licenses_count(self):
         return len(self.classifications_by_id)

     def license_ids(self):
         return self.classifications_by_id.keys()


 @dataclasses.dataclass(frozen=True)
 class ConditionsPolicy:
     """
     A map of identification names (e.g. MIT, GPL) to policy condition names
     (e.g. notice, by_exception_only).
     """
     _condition_by_name: Dict[str, str]
     _default_condition: str

     def from_csv_file(csv_file_path, default_condition):
         """
         Creates a LicensesPolicy from a policy conditions csv file.

         The file has 2 columns: license (name), condition
         """
         map: Dict[str, str] = {}

         with open(csv_file_path, 'r') as read_obj:
             csv_dict_reader = csv.DictReader(read_obj)
             for row in csv_dict_reader:
                 name = row["license"]
                 assert name not in map, f"{name} already defined"
                 map[name] = row["condition"]
         return ConditionsPolicy(map, default_condition)

     def get_condition(self, identification_name: str):
         """The condition associated with the name, or None"""
         if identification_name in self._condition_by_name:
             return self._condition_by_name[identification_name]
         else:
             return self._default_condition


 @dataclasses.dataclass(frozen=True)
 class AsterixStringExpression:
     """Utility for partial string matching (asterix matches)"""
     starts_with_asterix: bool
     ends_with_asterix: bool
     parts: List[str]

     def create(expression: str) -> "AsterixStringExpression":
         return AsterixStringExpression(
             starts_with_asterix=expression.startswith("*"),
             ends_with_asterix=expression.endswith("*"),
             parts=[p for p in expression.split("*") if p],
         )

     def matches(self, value) -> bool:
         if not self.parts:
             return True
         offset = 0

         if not self.starts_with_asterix and not value.startswith(self.parts[0]):
             return False

         for part in self.parts:
             next_match = value.find(part, offset)
             if next_match == -1:
                 return False
             offset = next_match + len(part)

         return offset == len(value) or self.ends_with_asterix


 @dataclasses.dataclass(frozen=True)
 class StringMatcher:
     """
     A utility to perform override rule string matching.

     Supports exact and * matches.
     """

     all_expressions: List[str]

     exact_expressions: Set[str]
     asterix_expressions: List[AsterixStringExpression]

     def create(expressions: List[str]) -> "StringMatcher":
         assert isinstance(expressions, list)
         exact_expressions = set()
         asterix_expressions = []
         for e in expressions:
             assert isinstance(e, str)
             if "*" in e:
                 asterix_expressions.append(AsterixStringExpression.create(e))
             else:
                 exact_expressions.add(e)

         return StringMatcher(
             all_expressions=expressions,
             exact_expressions=exact_expressions,
             asterix_expressions=asterix_expressions)

     def create_match_everything() -> "StringMatcher":
         return StringMatcher.create(["*"])

     def to_json(self) -> Any:
         return self.all_expressions

     def matches(self, input: str) -> bool:
         if input in self.exact_expressions:
             return True
         for asterix_expression in self.asterix_expressions:
             if asterix_expression.matches(input):
                 return True
         return False

     def get_matches(self, inputs: List[str]) -> List[str]:
         """
         Matches all the inputs against the internal expressions.

         Returns the ones that match or an empty list if none matched.
         """
         return [i for i in inputs if self.matches(i)]

     def matches_any(self, inputs: List[str]) -> bool:
         """
         Matches all the inputs against the internal expressions.

         Returns true if any inputs where matched.
         """
         if not self.all_expressions or not inputs:
             return False
         for input in inputs:
             if self.matches(input):
                 return True
         return False

     def matches_all(self, inputs: List[str]) -> bool:
         """
         Matches all the inputs against the internal expressions.

         Returns true if all inputs where matched.
         """
         if not self.all_expressions or not inputs:
             return False

         for input in inputs:
             if not self.matches(input):
                 return False
         return True


 @dataclasses.dataclass(frozen=True)
 class ConditionOverrideRule:
     """Rule for overriding a classified license condition"""

     # Path to the condition override rule.
     rule_file_path: str
     # Will override the condition to this condition
     override_condition_to: str
     bug: str
     # List facilitates easier to read multi-line comments in JSON.
     comment: List[str]

     # matching
     match_license_names: StringMatcher
     match_identifications: StringMatcher
     match_conditions: StringMatcher
     match_dependents: StringMatcher
     match_snippet_checksums: StringMatcher

     def from_json_dict(dictionary, rule_file_path) -> "ConditionOverrideRule":
         if isinstance(dictionary, DictReader):
             reader = dictionary
         else:
             reader = DictReader(dictionary=dictionary, location=rule_file_path)

         override_condition_to = reader.get("override_condition_to")
         bug = reader.get("bug")
         if not bug:
             raise LicenseException(
                 "'bug' fields cannot be empty", rule_file_path)
         comment = reader.get("comment", expected_type=list)

         def verify_list_not_empty(list_value) -> str:
             if not list_value:
                 return "list is empty"
             for v in list_value:
                 if not v:
                     return "empty value in list"
             return None

         criteria_reader = reader.get_reader("match_criteria")

         def read_required_matcher_field(name) -> StringMatcher:
             value = criteria_reader.get(
                 name, expected_type=list, verify=verify_list_not_empty)
             return StringMatcher.create(value)

         match_license_names = read_required_matcher_field("license_names")
         match_conditions = read_required_matcher_field("conditions")
         match_dependents = read_required_matcher_field("dependents")
         match_identifications = read_required_matcher_field("identifications")

         # Checksum matching is optional except for unidentified snippets.
         match_snippet_checksums = criteria_reader.get_or(
             "snippet_checksums", expected_type=list, default=None)

         if match_identifications.matches(
                 IdentifiedSnippet.UNIDENTIFIED_IDENTIFICATION):
             if not match_snippet_checksums:
                 raise LicenseException(
                     f"Rules that match license_names `{IdentifiedSnippet.UNIDENTIFIED_IDENTIFICATION}`"
                     "must also set `snippet_checksum`", rule_file_path)
             if [s for s in match_snippet_checksums if "*" in s]:
                 raise LicenseException(
                     "Rules that license_names " \
                     f" `{IdentifiedSnippet.UNIDENTIFIED_IDENTIFICATION}`"\
                     " cannot have `*` expressions in `match_snippet_checksum`",
                     rule_file_path)
         if match_snippet_checksums == None:
             match_snippet_checksums = StringMatcher.create_match_everything()
         else:
             match_snippet_checksums = StringMatcher.create(
                 match_snippet_checksums)

         # If there is a rule_file_path value in the dict, use it instead.
         rule_file_path = reader.get_or("rule_file_path", default=rule_file_path)

         return ConditionOverrideRule(
             rule_file_path=rule_file_path,
             override_condition_to=override_condition_to,
             bug=bug,
             comment=comment,
             match_license_names=match_license_names,
             match_identifications=match_identifications,
             match_conditions=match_conditions,
             match_dependents=match_dependents,
             match_snippet_checksums=match_snippet_checksums,
         )

     def to_json_dict(self):
         # Fields are output in a certain order for better readability
         out = {}
         if self.rule_file_path:
             out["rule_file_path"] = self.rule_file_path

         out.update(
             {
                 "override_condition_to": self.override_condition_to,
                 "bug": self.bug,
                 "comment": self.comment,
                 "match_criteria":
                     {
                         "license_names":
                             self.match_license_names.to_json(),
                         "identifications":
                             self.match_identifications.to_json(),
                         "conditions":
                             self.match_conditions.to_json(),
                         "snippet_checksums":
                             self.match_snippet_checksums.to_json(),
                         "dependents":
                             self.match_dependents.to_json(),
                     },
             })
         return out

     def suggested_for_snippet(
             license: LicenseClassification, snippet: IdentifiedSnippet,
             allowed_conditions: Set[str]) -> "ConditionOverrideRule":
         """Creates a an override rule suggestion for the given license snippet"""
         dependents = license.dependents
         if snippet.dependents_unmatched_by_overriding_rules:
             dependents = snippet.dependents_unmatched_by_overriding_rules
         return ConditionOverrideRule(
             rule_file_path=None,
             override_condition_to="<CHOOSE ONE OF " +
             ", ".join([f"'{c}'" for c in allowed_conditions]) + ">",
             bug="<INSERT TICKET URL>",
             comment=["<INSERT DOCUMENTATION FOR OVERRIDE RULE>"],
             match_license_names=StringMatcher.create([license.name]),
             match_snippet_checksums=StringMatcher.create(
                 [snippet.snippet_checksum]),
             match_identifications=StringMatcher.create([snippet.identified_as]),
             match_conditions=StringMatcher.create([snippet.condition]),
             match_dependents=StringMatcher.create(dependents))


 @dataclasses.dataclass(frozen=True)
 class ConditionOverrideRuleSet:

     rules: List[ConditionOverrideRule]

     def merge(
             self,
             other: "ConditionOverrideRuleSet") -> "ConditionOverrideRuleSet":
         new = list(self.rules)
         new.extend(other.rules)
         return dataclasses.replace(self, rules=new)

     def from_json(file_path: str) -> "ConditionOverrideRuleSet":
         with open(file_path, "r") as f:
             try:
                 json_obj = json.load(f)
             except json.decoder.JSONDecodeError as e:
                 raise LicenseException(f"Failed to parse json: {e}", file_path)

             if not isinstance(json_obj, list) and not isinstance(json_obj,
                                                                  dict):
                 raise LicenseException(
                     f"Expected List[dict] or dict at top-level json but found {type(json_obj)}",
                     file_path)

             if isinstance(json_obj, dict):
                 json_obj = [json_obj]

             rules = []
             for child_json in json_obj:
                 if not isinstance(child_json, dict):
                     raise LicenseException(
                         f"Expected dict but found {type(child_json)}",
                         file_path)
                 rules.append(
                     ConditionOverrideRule.from_json_dict(
                         DictReader(child_json, file_path),
                         rule_file_path=file_path))

             return ConditionOverrideRuleSet(rules)