Merge pull request #63 from aiuto/pinfo

Add package_info rule and a new gatherer to collect it.
diff --git a/BUILD b/BUILD
index 98caf14..40cfcad 100644
--- a/BUILD
+++ b/BUILD
@@ -13,9 +13,11 @@
 # limitations under the License.
 
 load("@rules_license//rules:license.bzl", "license")
+load("@rules_license//rules:package_info.bzl", "package_info")
+load("@rules_license//:version.bzl", "version")
 
 package(
-    default_applicable_licenses = [":license"],
+    default_applicable_licenses = [":license", ":package_info"],
     default_visibility = ["//visibility:public"],
 )
 
@@ -29,6 +31,12 @@
     license_text = "LICENSE",
 )
 
+package_info(
+    name = "package_info",
+    package_name = "rules_license",
+    package_version = version,
+)
+
 exports_files(
     ["LICENSE", "WORKSPACE"],
     visibility = ["//visibility:public"],
diff --git a/examples/sboms/BUILD b/examples/sboms/BUILD
new file mode 100644
index 0000000..0c31a04
--- /dev/null
+++ b/examples/sboms/BUILD
@@ -0,0 +1,13 @@
+# Demonstrate the generate_sbom rule
+
+load("@rules_license//rules:sbom.bzl", "generate_sbom")
+
+# There are not a lot of targets in this rule set to build a SBOM from
+# so we will (in a very self-referential way) generate one for the tool
+# which generates the SBOMs
+# See the output in bazel-bin/examples/sboms/write_sbom.txt
+generate_sbom(
+    name = "write_sbom_sbom",
+    out = "write_sbom.txt",
+    deps = ["//tools:write_sbom"],
+)
diff --git a/rules/gather_licenses_info.bzl b/rules/gather_licenses_info.bzl
index a5f1a41..b676972 100644
--- a/rules/gather_licenses_info.bzl
+++ b/rules/gather_licenses_info.bzl
@@ -16,7 +16,7 @@
 load(
     "@rules_license//rules:licenses_core.bzl",
     "TraceInfo",
-    "gather_licenses_info_common",
+    "gather_metadata_info_common",
     "should_traverse",
 )
 load(
@@ -41,7 +41,7 @@
     return s
 
 def _gather_licenses_info_impl(target, ctx):
-    return gather_licenses_info_common(target, ctx, TransitiveLicensesInfo, NAMESPACES, should_traverse)
+    return gather_metadata_info_common(target, ctx, TransitiveLicensesInfo, NAMESPACES, [], should_traverse)
 
 gather_licenses_info = aspect(
     doc = """Collects LicenseInfo providers into a single TransitiveLicensesInfo provider.""",
diff --git a/rules/gather_metadata.bzl b/rules/gather_metadata.bzl
new file mode 100644
index 0000000..9e96cba
--- /dev/null
+++ b/rules/gather_metadata.bzl
@@ -0,0 +1,302 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Rules and macros for collecting LicenseInfo providers."""
+
+load(
+    "@rules_license//rules:licenses_core.bzl",
+    "TraceInfo",
+    "gather_metadata_info_common",
+    "should_traverse",
+)
+load(
+    "@rules_license//rules:providers.bzl",
+    "MetadataInfo",
+    "PackageInfo",
+    "TransitiveMetadataInfo",
+)
+
+# Definition for compliance namespace, used for filtering licenses
+# based on the namespace to which they belong.
+NAMESPACES = ["compliance"]
+
+def _strip_null_repo(label):
+    """Removes the null repo name (e.g. @//) from a string.
+
+    The is to make str(label) compatible between bazel 5.x and 6.x
+    """
+    s = str(label)
+    if s.startswith('@//'):
+        return s[1:]
+    elif s.startswith('@@//'):
+        return s[2:]
+    return s
+
+def _bazel_package(label):
+    l = _strip_null_repo(label)
+    return l[0:-(len(label.name) + 1)]
+
+def _gather_metadata_info_impl(target, ctx):
+    return gather_metadata_info_common(target, ctx, TransitiveMetadataInfo, NAMESPACES, [MetadataInfo, PackageInfo], should_traverse)
+
+gather_metadata_info = aspect(
+    doc = """Collects LicenseInfo providers into a single TransitiveMetadataInfo provider.""",
+    implementation = _gather_metadata_info_impl,
+    attr_aspects = ["*"],
+    attrs = {
+        "_trace": attr.label(default = "@rules_license//rules:trace_target"),
+    },
+    provides = [TransitiveMetadataInfo],
+    apply_to_generating_rules = True,
+)
+
+def _write_metadata_info_impl(target, ctx):
+    """Write transitive license info into a JSON file
+
+    Args:
+      target: The target of the aspect.
+      ctx: The aspect evaluation context.
+
+    Returns:
+      OutputGroupInfo
+    """
+
+    if not TransitiveMetadataInfo in target:
+        return [OutputGroupInfo(licenses = depset())]
+    info = target[TransitiveMetadataInfo]
+    outs = []
+
+    # If the result doesn't contain licenses, we simply return the provider
+    if not hasattr(info, "target_under_license"):
+        return [OutputGroupInfo(licenses = depset())]
+
+    # Write the output file for the target
+    name = "%s_metadata_info.json" % ctx.label.name
+    content = "[\n%s\n]\n" % ",\n".join(metadata_info_to_json(info))
+    out = ctx.actions.declare_file(name)
+    ctx.actions.write(
+        output = out,
+        content = content,
+    )
+    outs.append(out)
+
+    if ctx.attr._trace[TraceInfo].trace:
+        trace = ctx.actions.declare_file("%s_trace_info.json" % ctx.label.name)
+        ctx.actions.write(output = trace, content = "\n".join(info.traces))
+        outs.append(trace)
+
+    return [OutputGroupInfo(licenses = depset(outs))]
+
+gather_metadata_info_and_write = aspect(
+    doc = """Collects TransitiveMetadataInfo providers and writes JSON representation to a file.
+
+    Usage:
+      bazel build //some:target \
+          --aspects=@rules_license//rules:gather_metadata_info.bzl%gather_metadata_info_and_write
+          --output_groups=licenses
+    """,
+    implementation = _write_metadata_info_impl,
+    attr_aspects = ["*"],
+    attrs = {
+        "_trace": attr.label(default = "@rules_license//rules:trace_target"),
+    },
+    provides = [OutputGroupInfo],
+    requires = [gather_metadata_info],
+    apply_to_generating_rules = True,
+)
+
+def write_metadata_info(ctx, deps, json_out):
+    """Writes TransitiveMetadataInfo providers for a set of targets as JSON.
+
+    TODO(aiuto): Document JSON schema. But it is under development, so the current
+    best place to look is at tests/hello_licenses.golden.
+
+    Usage:
+      write_metadata_info must be called from a rule implementation, where the
+      rule has run the gather_metadata_info aspect on its deps to
+      collect the transitive closure of LicenseInfo providers into a
+      LicenseInfo provider.
+
+      foo = rule(
+        implementation = _foo_impl,
+        attrs = {
+           "deps": attr.label_list(aspects = [gather_metadata_info])
+        }
+      )
+
+      def _foo_impl(ctx):
+        ...
+        out = ctx.actions.declare_file("%s_licenses.json" % ctx.label.name)
+        write_metadata_info(ctx, ctx.attr.deps, metadata_file)
+
+    Args:
+      ctx: context of the caller
+      deps: a list of deps which should have TransitiveMetadataInfo providers.
+            This requires that you have run the gather_metadata_info
+            aspect over them
+      json_out: output handle to write the JSON info
+    """
+    licenses = []
+    for dep in deps:
+        if TransitiveMetadataInfo in dep:
+            licenses.extend(metadata_info_to_json(dep[TransitiveMetadataInfo]))
+    ctx.actions.write(
+        output = json_out,
+        content = "[\n%s\n]\n" % ",\n".join(licenses),
+    )
+
+def metadata_info_to_json(metadata_info):
+    """Render a single LicenseInfo provider to JSON
+
+    Args:
+      metadata_info: A LicenseInfo.
+
+    Returns:
+      [(str)] list of LicenseInfo values rendered as JSON.
+    """
+
+    main_template = """  {{
+    "top_level_target": "{top_level_target}",
+    "dependencies": [{dependencies}
+    ],
+    "licenses": [{licenses}
+    ],
+    "packages": [{packages}
+    ]\n  }}"""
+
+    dep_template = """
+      {{
+        "target_under_license": "{target_under_license}",
+        "licenses": [
+          {licenses}
+        ]
+      }}"""
+
+    license_template = """
+      {{
+        "label": "{label}",
+        "bazel_package": "{bazel_package}",
+        "license_kinds": [{kinds}
+        ],
+        "copyright_notice": "{copyright_notice}",
+        "package_name": "{package_name}",
+        "package_url": "{package_url}",
+        "package_version": "{package_version}",
+        "license_text": "{license_text}",
+        "used_by": [
+          {used_by}
+        ]
+      }}"""
+
+    kind_template = """
+          {{
+            "target": "{kind_path}",
+            "name": "{kind_name}",
+            "conditions": {kind_conditions}
+          }}"""
+
+    package_info_template = """
+          {{
+            "target": "{label}",
+            "bazel_package": "{bazel_package}",
+            "package_name": "{package_name}",
+            "package_url": "{package_url}",
+            "package_version": "{package_version}"
+          }}"""
+
+    # Build reverse map of license to user
+    used_by = {}
+    for dep in metadata_info.deps.to_list():
+        # Undo the concatenation applied when stored in the provider.
+        dep_licenses = dep.licenses.split(",")
+        for license in dep_licenses:
+            if license not in used_by:
+                used_by[license] = []
+            used_by[license].append(_strip_null_repo(dep.target_under_license))
+
+    all_licenses = []
+    for license in sorted(metadata_info.licenses.to_list(), key = lambda x: x.label):
+        kinds = []
+        for kind in sorted(license.license_kinds, key = lambda x: x.name):
+            kinds.append(kind_template.format(
+                kind_name = kind.name,
+                kind_path = kind.label,
+                kind_conditions = kind.conditions,
+            ))
+
+        if license.license_text:
+            # Special handling for synthetic LicenseInfo
+            text_path = (license.license_text.package + "/" + license.license_text.name if type(license.license_text) == "Label" else license.license_text.path)
+            all_licenses.append(license_template.format(
+                copyright_notice = license.copyright_notice,
+                kinds = ",".join(kinds),
+                license_text = text_path,
+                package_name = license.package_name,
+                package_url = license.package_url,
+                package_version = license.package_version,
+                label = _strip_null_repo(license.label),
+                bazel_package =  _bazel_package(license.label),
+                used_by = ",\n          ".join(sorted(['"%s"' % x for x in used_by[str(license.label)]])),
+            ))
+
+    all_deps = []
+    for dep in sorted(metadata_info.deps.to_list(), key = lambda x: x.target_under_license):
+        metadata_used = []
+
+        # Undo the concatenation applied when stored in the provider.
+        dep_licenses = dep.licenses.split(",")
+        all_deps.append(dep_template.format(
+            target_under_license = _strip_null_repo(dep.target_under_license),
+            licenses = ",\n          ".join(sorted(['"%s"' % _strip_null_repo(x) for x in dep_licenses])),
+        ))
+
+    all_packages = []
+    # We would use this if we had distinct depsets for every provider type.
+    #for package in sorted(metadata_info.package_info.to_list(), key = lambda x: x.label):
+    #    all_packages.append(package_info_template.format(
+    #        label = _strip_null_repo(package.label),
+    #        package_name = package.package_name,
+    #        package_url = package.package_url,
+    #        package_version = package.package_version,
+    #    ))
+
+    for mi in sorted(metadata_info.other_metadata.to_list(), key = lambda x: x.label):
+        # Maybe use a map of provider class to formatter.  A generic dict->json function
+        # in starlark would help
+
+        # This format is for using distinct providers.  I like the compile time safety.
+        if mi.type == "package_info":
+            all_packages.append(package_info_template.format(
+                label = _strip_null_repo(mi.label),
+                bazel_package =  _bazel_package(mi.label),
+                package_name = mi.package_name,
+                package_url = mi.package_url,
+                package_version = mi.package_version,
+            ))
+        # experimental: Support the MetadataInfo bag of data
+        if mi.type == "package_info_alt":
+            all_packages.append(package_info_template.format(
+                label = _strip_null_repo(mi.label),
+                bazel_package =  _bazel_package(mi.label),
+                # data is just a bag, so we need to use get() or ""
+                package_name = mi.data.get("package_name") or "",
+                package_url = mi.data.get("package_url") or "",
+                package_version = mi.data.get("package_version") or "",
+            ))
+
+    return [main_template.format(
+        top_level_target = _strip_null_repo(metadata_info.target_under_license),
+        dependencies = ",".join(all_deps),
+        licenses = ",".join(all_licenses),
+        packages = ",".join(all_packages),
+    )]
diff --git a/rules/licenses_core.bzl b/rules/licenses_core.bzl
index 42702bd..cf476a4 100644
--- a/rules/licenses_core.bzl
+++ b/rules/licenses_core.bzl
@@ -19,6 +19,7 @@
     "@rules_license//rules:providers.bzl",
     "LicenseInfo",
     "LicensedTargetInfo",
+    "TransitiveLicensesInfo",
 )
 
 
@@ -66,7 +67,7 @@
 
     return True
 
-def _get_transitive_licenses(ctx, trans_licenses, trans_deps, traces, provider, filter_func):
+def _get_transitive_metadata(ctx, trans_licenses, trans_other_metadata, trans_package_info, trans_deps, traces, provider, filter_func):
     attrs = [a for a in dir(ctx.rule.attr)]
     for name in attrs:
         if not filter_func(ctx, name):
@@ -96,8 +97,21 @@
                     for trace in info.traces:
                         traces.append("(" + ", ".join([str(ctx.label), ctx.rule.kind, name]) + ") -> " + trace)
 
-def gather_licenses_info_common(target, ctx, provider_factory, namespaces, filter_func):
-    """Collect license info from myself and my deps.
+                # We only need one or the other of these stanzas.
+                # If we use a polymorphic approach to metadata providers, then
+                # this works.
+                if hasattr(info, "other_metadata"):
+                    if info.other_metadata:
+                        trans_other_metadata.append(info.other_metadata)
+                # But if we want more precise type safety, we would have a
+                # trans_* for each type of metadata. That is not user
+                # extensibile.
+                if hasattr(info, "package_info"):
+                    if info.package_info:
+                        trans_package_info.append(info.package_info)
+
+def gather_metadata_info_common(target, ctx, provider_factory, namespaces, metadata_providers, filter_func):
+    """Collect license and other metadata info from myself and my deps.
 
     Any single target might directly depend on a license, or depend on
     something that transitively depends on a license, or neither.
@@ -116,6 +130,7 @@
       ctx: The aspect evaluation context.
       provider_factory: abstracts the provider returned by this aspect
       namespaces: a list of namespaces licenses must match to be included
+      metadata_providers: a list of other providers of interest
       filter_func: a function that returns true iff the dep edge should be ignored
 
     Returns:
@@ -124,6 +139,8 @@
 
     # First we gather my direct license attachments
     licenses = []
+    other_metadata = []
+    package_info = []
     if ctx.rule.kind == "_license":
         # Don't try to gather licenses from the license rule itself. We'll just
         # blunder into the text file of the license and pick up the default
@@ -144,14 +161,18 @@
                             licenses.append(lic)
                     else:
                         fail("should have a namespace")
-
+                for m_p in metadata_providers:
+                    if m_p in dep:
+                        other_metadata.append(dep[m_p])
 
     # Now gather transitive collection of providers from the targets
     # this target depends upon.
     trans_licenses = []
+    trans_other_metadata = []
+    trans_package_info = []
     trans_deps = []
     traces = []
-    _get_transitive_licenses(ctx, trans_licenses, trans_deps, traces, provider_factory, filter_func)
+    _get_transitive_metadata(ctx, trans_licenses, trans_other_metadata, trans_package_info, trans_deps, traces, provider_factory, filter_func)
 
     if not licenses and not trans_licenses:
         return [provider_factory(deps = depset(), licenses = depset(), traces = [])]
@@ -179,9 +200,22 @@
     else:
         direct_license_uses = None
 
+    # This is a bit of a hack for bazel 5.x.  We can not pass extra fields to
+    # the provider constructor, so we need to do something special for each.
+    # In Bazel 6.x we can use a provider initializer function that would take
+    # all the args and only use the ones it wants.
+    if provider_factory == TransitiveLicensesInfo:
+        return [provider_factory(
+            target_under_license = target.label,
+            licenses = depset(tuple(licenses), transitive = trans_licenses),
+            deps = depset(direct = direct_license_uses, transitive = trans_deps),
+            traces = traces,
+        )]
+
     return [provider_factory(
         target_under_license = target.label,
         licenses = depset(tuple(licenses), transitive = trans_licenses),
+        other_metadata = depset(tuple(other_metadata), transitive = trans_other_metadata),
         deps = depset(direct = direct_license_uses, transitive = trans_deps),
         traces = traces,
     )]
diff --git a/rules/package_info.bzl b/rules/package_info.bzl
new file mode 100644
index 0000000..a8643f8
--- /dev/null
+++ b/rules/package_info.bzl
@@ -0,0 +1,100 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Rules for declaring metadata about a package."""
+
+load(
+    "@rules_license//rules:providers.bzl",
+    "MetadataInfo",
+    "PackageInfo",
+)
+
+#
+# package_info()
+#
+
+def _package_info_impl(ctx):
+    provider = PackageInfo(
+        # Metadata providers must include a type discriminator. We don't need it
+        # to collect the providers, but we do need it to write the JSON. We
+        # key on the type field to look up the correct block of code to pull
+        # data out and format it. We can't to the lookup on the provider class.
+        type = "package_info",
+        label = ctx.label,
+        package_name = ctx.attr.package_name or ctx.build_file_path.rstrip("/BUILD"),
+        package_url = ctx.attr.package_url,
+        package_version = ctx.attr.package_version,
+    )
+    # Experimental alternate design, using a generic 'data' back to hold things
+    generic_provider = MetadataInfo(
+        type = "package_info_alt",
+        label = ctx.label,
+        data = {
+            "package_name": ctx.attr.package_name or ctx.build_file_path.rstrip("/BUILD"),
+            "package_url": ctx.attr.package_url,
+            "package_version": ctx.attr.package_version
+        }
+    )
+    return [provider, generic_provider]
+
+_package_info = rule(
+    implementation = _package_info_impl,
+    attrs = {
+        "package_name": attr.string(
+            doc = "A human readable name identifying this package." +
+                  " This may be used to produce an index of OSS packages used by" +
+                  " an applicatation.",
+        ),
+        "package_url": attr.string(
+            doc = "The URL this instance of the package was download from." +
+                  " This may be used to produce an index of OSS packages used by" +
+                  " an applicatation.",
+        ),
+        "package_version": attr.string(
+            doc = "A human readable version string identifying this package." +
+                  " This may be used to produce an index of OSS packages used" +
+                  " by an applicatation.  It should be a value that" +
+                  " increases over time, rather than a commit hash."
+        ),
+    },
+)
+
+# buildifier: disable=function-docstring-args
+def package_info(
+        name,
+        package_name = None,
+        package_url = None,
+        package_version = None,
+        visibility = ["//visibility:public"]):
+    """Wrapper for package_info rule.
+
+    Args:
+      name: str target name.
+      package_name : str A human readable name identifying this package. This
+                     may be used to produce an index of OSS packages used by
+                     an application.
+      package_url: str The canoncial URL this package distribution was retrieved from.
+                       Note that, because of local mirroring, that might not be the 
+                       physical URL it was retrieved from.
+      package_version: str A human readable name identifying version of this package.
+    """
+    _package_info(
+        name = name,
+        package_name = package_name,
+        package_url = package_url,
+        package_version = package_version,
+        applicable_licenses = [],
+        visibility = visibility,
+        tags = [],
+        testonly = 0,
+    )
diff --git a/rules/providers.bzl b/rules/providers.bzl
index 8778fd7..3b1f090 100644
--- a/rules/providers.bzl
+++ b/rules/providers.bzl
@@ -59,3 +59,41 @@
 
 # This provider is used by the aspect that is used by manifest() rules.
 TransitiveLicensesInfo = licenses_info()
+
+# This is one way to do specify data
+PackageInfo = provider(
+    doc = """Provides information about a package.""",
+    fields = {
+        "type": "string: How to interpret data",
+        "label": "Label: label of the package_info rule",
+        "package_name": "string: Human readable package name",
+        "package_url": "string: URL from which this package was downloaded.",
+        "package_version": "string: Human readable version string",
+    },
+)
+
+# This is more extensible. Because of the provider implementation, having a big
+# dict of values rather than named fields is not much more costly.
+# Design choice.  Replace data with actual providers, such as PackageInfo
+MetadataInfo = provider(
+    doc = """Generic bag of metadata.""",
+    fields = {
+        "type": "string: How to interpret data",
+        "label": "Label: label of the metadata rule",
+        "data": "String->any: Map of names to values",
+    }
+)
+
+TransitiveMetadataInfo = provider(
+    doc = """The transitive set of licenses used by a target.""",
+    fields = {
+        "top_level_target": "Label: The top level target label we are examining.",
+        "other_metadata": "depset(MetatdataInfo)",
+        "licenses": "depset(LicenseInfo)",
+        "package_info": "depset(PackageInfo)",
+
+        "target_under_license": "Label: A target which will be associated with some licenses.",
+        "deps": "depset(LicensedTargetInfo): The transitive list of dependencies that have licenses.",
+        "traces": "list(string) - diagnostic for tracing a dependency relationship to a target.",
+    },
+)
diff --git a/rules/sbom.bzl b/rules/sbom.bzl
new file mode 100644
index 0000000..fb17adc
--- /dev/null
+++ b/rules/sbom.bzl
@@ -0,0 +1,159 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""SBOM generation"""
+
+load(
+    "@rules_license//rules:gather_metadata.bzl",
+    "gather_metadata_info",
+    "gather_metadata_info_and_write",
+    "write_metadata_info",
+)
+load(
+    "@rules_license//rules:providers.bzl",
+    "TransitiveLicensesInfo",
+)
+
+# This rule is proof of concept, and may not represent the final
+# form of a rule for compliance validation.
+def _generate_sbom_impl(ctx):
+    # Gather all licenses and write information to one place
+
+    licenses_file = ctx.actions.declare_file("_%s_licenses_info.json" % ctx.label.name)
+    write_metadata_info(ctx, ctx.attr.deps, licenses_file)
+
+    license_files = []
+    # if ctx.outputs.license_texts:
+    #     license_files = get_licenses_mapping(ctx.attr.deps).keys()
+
+    # Now turn the big blob of data into something consumable.
+    inputs = [licenses_file]
+    outputs = [ctx.outputs.out]
+    args = ctx.actions.args()
+    args.add("--licenses_info", licenses_file.path)
+    args.add("--out", ctx.outputs.out.path)
+    ctx.actions.run(
+        mnemonic = "CreateSBOM",
+        progress_message = "Creating SBOM for %s" % ctx.label,
+        inputs = inputs,
+        outputs = outputs,
+        executable = ctx.executable._sbom_generator,
+        arguments = [args],
+    )
+    outputs.append(licenses_file)  # also make the json file available.
+    return [DefaultInfo(files = depset(outputs))]
+
+_generate_sbom = rule(
+    implementation = _generate_sbom_impl,
+    attrs = {
+        "deps": attr.label_list(
+            aspects = [gather_metadata_info],
+        ),
+        "out": attr.output(mandatory = True),
+        "_sbom_generator": attr.label(
+            default = Label("@rules_license//tools:write_sbom"),
+            executable = True,
+            allow_files = True,
+            cfg = "exec",
+        ),
+    },
+)
+
+def generate_sbom(**kwargs):
+    _generate_sbom(**kwargs)
+
+def _manifest_impl(ctx):
+    # Gather all licenses and make it available as deps for downstream rules
+    # Additionally write the list of license filenames to a file that can
+    # also be used as an input to downstream rules.
+    licenses_file = ctx.actions.declare_file(ctx.attr.out.name)
+    mappings = get_licenses_mapping(ctx.attr.deps, ctx.attr.warn_on_legacy_licenses)
+    ctx.actions.write(
+        output = licenses_file,
+        content = "\n".join([",".join([f.path, p]) for (f, p) in mappings.items()]),
+    )
+    return [DefaultInfo(files = depset(mappings.keys()))]
+
+_manifest = rule(
+    implementation = _manifest_impl,
+    doc = """Internal tmplementation method for manifest().""",
+    attrs = {
+        "deps": attr.label_list(
+            doc = """List of targets to collect license files for.""",
+            aspects = [gather_metadata_info],
+        ),
+        "out": attr.output(
+            doc = """Output file.""",
+            mandatory = True,
+        ),
+        "warn_on_legacy_licenses": attr.bool(default = False),
+    },
+)
+
+def manifest(name, deps, out = None, **kwargs):
+    if not out:
+        out = name + ".manifest"
+
+    _manifest(name = name, deps = deps, out = out, **kwargs)
+
+def _licenses_used_impl(ctx):
+    # Gather all licenses and make it available as JSON
+    write_metadata_info(ctx, ctx.attr.deps, ctx.outputs.out)
+    return [DefaultInfo(files = depset([ctx.outputs.out]))]
+
+_licenses_used = rule(
+    implementation = _licenses_used_impl,
+    doc = """Internal tmplementation method for licenses_used().""",
+    attrs = {
+        "deps": attr.label_list(
+            doc = """List of targets to collect LicenseInfo for.""",
+            aspects = [gather_metadata_info_and_write],
+        ),
+        "out": attr.output(
+            doc = """Output file.""",
+            mandatory = True,
+        ),
+    },
+)
+
+def get_licenses_mapping(deps, warn = False):
+    """Creates list of entries representing all licenses for the deps.
+
+    Args:
+
+      deps: a list of deps which should have TransitiveLicensesInfo providers.
+            This requires that you have run the gather_licenses_info
+            aspect over them
+
+      warn: boolean, if true, display output about legacy targets that need
+            update
+
+    Returns:
+      {File:package_name}
+    """
+    tls = []
+    for dep in deps:
+        lds = dep[TransitiveLicensesInfo].licenses
+        tls.append(lds)
+
+    ds = depset(transitive = tls)
+
+    # Ignore any legacy licenses that may be in the report
+    mappings = {}
+    for lic in ds.to_list():
+        if type(lic.license_text) == "File":
+            mappings[lic.license_text] = lic.package_name
+        elif warn:
+            print("Legacy license %s not included, rule needs updating" % lic.license_text)
+
+    return mappings
diff --git a/tools/BUILD b/tools/BUILD
index 9be1c2d..2b56a34 100644
--- a/tools/BUILD
+++ b/tools/BUILD
@@ -15,12 +15,20 @@
 """License declaration and compliance checking tools."""
 
 package(
-    default_applicable_licenses = ["//:license"],
+    default_applicable_licenses = ["//:license", "//:package_info"],
     default_visibility = ["//visibility:public"],
 )
 
 licenses(["notice"])
 
+filegroup(
+    name = "standard_package",
+    srcs = glob(["**"]),
+    visibility = ["//distro:__pkg__"],
+)
+
+exports_files(["diff_test.sh"])
+
 py_binary(
     name = "checker_demo",
     srcs = ["checker_demo.py"],
@@ -28,10 +36,9 @@
     visibility = ["//visibility:public"],
 )
 
-exports_files(["diff_test.sh"])
-
-filegroup(
-    name = "standard_package",
-    srcs = glob(["**"]),
-    visibility = ["//distro:__pkg__"],
+py_binary(
+    name = "write_sbom",
+    srcs = ["write_sbom.py"],
+    python_version = "PY3",
+    visibility = ["//visibility:public"],
 )
diff --git a/tools/write_sbom.py b/tools/write_sbom.py
new file mode 100644
index 0000000..18286ab
--- /dev/null
+++ b/tools/write_sbom.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python3
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Proof of concept license checker.
+
+This is only a demonstration. It will be replaced with other tools.
+"""
+
+import argparse
+import codecs
+import datetime
+import json
+import os
+
+
+TOOL = 'https//github.com/bazelbuild/rules_license/tools:write_sbom'
+
+def _load_package_data(package_info):
+  with codecs.open(package_info, encoding='utf-8') as inp:
+    return json.loads(inp.read())
+
+def _write_sbom_header(out, package):
+  header = [
+    'SPDXVersion: SPDX-2.2',
+    'DataLicense: CC0-1.0',
+    'SPDXID: SPDXRef-DOCUMENT',
+    'DocumentName: %s' % package,
+    # TBD
+    # 'DocumentNamespace: https://swinslow.net/spdx-examples/example1/hello-v3
+    'Creator: Person: %s' % os.getlogin(),
+    'Creator: Tool: %s' % TOOL,
+    datetime.datetime.utcnow().strftime('Created: %Y-%m-%d-%H:%M:%SZ'),
+    '',
+    '##### Package: %s' % package,
+  ]
+  out.write('\n'.join(header))
+
+
+
+def _write_sbom(out, packages):
+  """Produce a basic SBOM
+
+  Args:
+    out: file object to write to
+    packages: package metadata. A big blob of JSON.
+  """
+  for p in packages:
+    name = p.get('package_name') or '<unknown>'
+    out.write('\n')
+    out.write('SPDXID: "%s"\n' % name)
+    out.write('  name: "%s"\n' % name)
+    if p.get('package_version'):
+      out.write('  versionInfo: "%s"\n' % p['package_version'])
+    # IGNORE_COPYRIGHT: Not a copyright notice. It is a variable holding one.
+    cn = p.get('copyright_notice')
+    if cn:
+      out.write('  copyrightText: "%s"\n' % cn)
+    kinds = p.get('license_kinds')
+    if kinds:
+      out.write('  licenseDeclared: "%s"\n' %
+                ','.join([k['name'] for k in kinds]))
+    url = p.get('package_url')
+    if url:
+      out.write('  downloadLocation: %s\n' % url)
+
+
+def main():
+  parser = argparse.ArgumentParser(
+      description='Demonstraton license compliance checker')
+
+  parser.add_argument('--licenses_info',
+                      help='path to JSON file containing all license data')
+  parser.add_argument('--out', default='sbom.out', help='SBOM output')
+  args = parser.parse_args()
+
+  license_data = _load_package_data(args.licenses_info)
+  target = license_data[0]  # we assume only one target for the demo
+
+  top_level_target = target['top_level_target']
+  dependencies = target['dependencies']
+  # It's not really packages, but this is close proxy for now
+  licenses = target['licenses']
+  package_infos = target['packages']
+
+  # These are similar dicts, so merge them by package. This is not
+  # strictly true, as different licenese can appear in the same
+  # package, but it is good enough for demonstrating the sbom.
+
+  all = {x['bazel_package']: x for x in licenses}
+  for pi in package_infos:
+    p = all.get(pi['bazel_package'])
+    if p:
+      p.update(pi)
+    else:
+      all[pi['bazel_package']] = pi
+
+  err = 0
+  with codecs.open(args.out, mode='w', encoding='utf-8') as out:
+    _write_sbom_header(out, package=top_level_target)
+    _write_sbom(out, all.values())
+  return err
+
+
+if __name__ == '__main__':
+  main()