Add build.json and generator script
This commit introduces a new script, `manage_build_json.py`, which
queries Bazel to determine the source files for the Emboss compiler
and C++ runtime.
The script generates a `build.json` file in the project root, which
will serve as the source of truth for generating build helper files
for other build systems.
The script also adds support for verifying that `build.json` is in
sync with the bazel build targets.
diff --git a/build.json b/build.json
new file mode 100644
index 0000000..4576ad1
--- /dev/null
+++ b/build.json
@@ -0,0 +1,68 @@
+{
+ // A list of all source files required to build the Emboss compiler.
+ "embossc_sources": [
+ "compiler/__init__.py",
+ "compiler/back_end/__init__.py",
+ "compiler/back_end/cpp/__init__.py",
+ "compiler/back_end/cpp/attributes.py",
+ "compiler/back_end/cpp/emboss_codegen_cpp.py",
+ "compiler/back_end/cpp/generated_code_templates",
+ "compiler/back_end/cpp/header_generator.py",
+ "compiler/back_end/util/__init__.py",
+ "compiler/back_end/util/code_template.py",
+ "compiler/front_end/__init__.py",
+ "compiler/front_end/attribute_checker.py",
+ "compiler/front_end/attributes.py",
+ "compiler/front_end/constraints.py",
+ "compiler/front_end/dependency_checker.py",
+ "compiler/front_end/emboss_front_end.py",
+ "compiler/front_end/error_examples",
+ "compiler/front_end/expression_bounds.py",
+ "compiler/front_end/generated/cached_parser.py",
+ "compiler/front_end/glue.py",
+ "compiler/front_end/lr1.py",
+ "compiler/front_end/make_parser.py",
+ "compiler/front_end/module_ir.py",
+ "compiler/front_end/parser.py",
+ "compiler/front_end/prelude.emb",
+ "compiler/front_end/reserved_words",
+ "compiler/front_end/symbol_resolver.py",
+ "compiler/front_end/synthetics.py",
+ "compiler/front_end/tokenizer.py",
+ "compiler/front_end/type_check.py",
+ "compiler/front_end/write_inference.py",
+ "compiler/util/__init__.py",
+ "compiler/util/attribute_util.py",
+ "compiler/util/error.py",
+ "compiler/util/expression_parser.py",
+ "compiler/util/ir_data.py",
+ "compiler/util/ir_data_fields.py",
+ "compiler/util/ir_data_utils.py",
+ "compiler/util/ir_util.py",
+ "compiler/util/name_conversion.py",
+ "compiler/util/parser_types.py",
+ "compiler/util/parser_util.py",
+ "compiler/util/resources.py",
+ "compiler/util/simple_memoizer.py",
+ "compiler/util/traverse_ir.py",
+ "embossc"
+ ],
+ // A list of all source files required for the Emboss C++ runtime.
+ "emboss_runtime_cpp_sources": [
+ "runtime/cpp/emboss_arithmetic.h",
+ "runtime/cpp/emboss_arithmetic_all_known_generated.h",
+ "runtime/cpp/emboss_arithmetic_maximum_operation_generated.h",
+ "runtime/cpp/emboss_array_view.h",
+ "runtime/cpp/emboss_bit_util.h",
+ "runtime/cpp/emboss_constant_view.h",
+ "runtime/cpp/emboss_cpp_types.h",
+ "runtime/cpp/emboss_cpp_util.h",
+ "runtime/cpp/emboss_defines.h",
+ "runtime/cpp/emboss_enum_view.h",
+ "runtime/cpp/emboss_maybe.h",
+ "runtime/cpp/emboss_memory_util.h",
+ "runtime/cpp/emboss_prelude.h",
+ "runtime/cpp/emboss_text_util.h",
+ "runtime/cpp/emboss_view_parameters.h"
+ ]
+}
diff --git a/scripts/build_helpers/manage_build_json.py b/scripts/build_helpers/manage_build_json.py
new file mode 100755
index 0000000..3d356c1
--- /dev/null
+++ b/scripts/build_helpers/manage_build_json.py
@@ -0,0 +1,193 @@
+#!/usr/bin/env python3
+
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Creates and validates the `build.json` file."""
+
+import argparse
+import json
+import os
+import subprocess
+import sys
+
+# The Bazel targets that define the embossc compiler sources.
+EMBOSSC_TARGETS = [
+ "//compiler/back_end/cpp:emboss_codegen_cpp",
+ "//compiler/front_end:emboss_front_end",
+]
+
+# The Bazel target for the C++ runtime sources.
+RUNTIME_CPP_TARGET = "//runtime/cpp:cpp_utils"
+
+
+def get_bazel_query_output(query):
+ """Runs a Bazel query and returns the output as a list of strings."""
+ result = subprocess.run(
+ ["bazel", "query", query],
+ capture_output=True,
+ text=True,
+ check=True,
+ )
+ return result.stdout.strip().split("\n")
+
+
+def get_source_files_for_targets(targets):
+ """Queries Bazel for all source files in the transitive dependencies."""
+ query = f"kind('source file', deps({'+'.join(targets)}))"
+ output = get_bazel_query_output(query)
+ return sorted(
+ [
+ f.replace("//", "").replace(":", "/")
+ for f in output
+ if f.startswith("//") and not f.startswith("//external")
+ ]
+ )
+
+
+def read_build_json(file_path):
+ """Reads build.json, strips comments, and returns the parsed JSON data."""
+ with open(file_path, "r") as f:
+ content = "".join(line for line in f if not line.strip().startswith("//"))
+ return json.loads(content)
+
+
+def find_init_py_files(source_files):
+ """Finds all __init__.py files in the directories of the source files."""
+ init_py_files = set()
+ checked_dirs = set()
+
+ for source_file in source_files:
+ dir_path = os.path.dirname(source_file)
+ while dir_path and dir_path not in checked_dirs:
+ checked_dirs.add(dir_path)
+ init_py = os.path.join(dir_path, "__init__.py")
+ if os.path.exists(init_py):
+ init_py_files.add(init_py)
+ # Move to the parent directory
+ parent_dir = os.path.dirname(dir_path)
+ if parent_dir == dir_path: # Root directory
+ break
+ dir_path = parent_dir
+
+ return sorted(list(init_py_files))
+
+
+def generate_build_json(output_path):
+ """Generates the build.json file from Bazel sources."""
+ print("Generating fresh source lists from Bazel...")
+ embossc_sources = get_source_files_for_targets(EMBOSSC_TARGETS)
+ embossc_sources.append("embossc")
+ init_py_sources = find_init_py_files(embossc_sources)
+ all_embossc_sources = sorted(list(set(embossc_sources + init_py_sources)))
+
+ runtime_cpp_sources = get_source_files_for_targets([RUNTIME_CPP_TARGET])
+
+ build_data = {
+ "embossc_sources": all_embossc_sources,
+ "emboss_runtime_cpp_sources": runtime_cpp_sources,
+ }
+
+ # Use json.dumps to get a formatted string, then inject comments.
+ json_string = json.dumps(build_data, indent=2)
+ json_string = json_string.replace(
+ '"embossc_sources":',
+ '// A list of all source files required to build the Emboss compiler.\n "embossc_sources":',
+ )
+ json_string = json_string.replace(
+ '"emboss_runtime_cpp_sources":',
+ '// A list of all source files required for the Emboss C++ runtime.\n "emboss_runtime_cpp_sources":',
+ )
+
+ with open(output_path, "w") as f:
+ f.write(json_string)
+ f.write("\n")
+
+ print(f"Successfully generated {output_path}")
+ return 0
+
+
+def validate_build_json(file_path):
+ """Validates that the on-disk build.json file is up-to-date."""
+ print("Generating fresh source lists from Bazel for validation...")
+ fresh_embossc_sources = get_source_files_for_targets(EMBOSSC_TARGETS)
+ fresh_embossc_sources.append("embossc")
+ init_py_sources = find_init_py_files(fresh_embossc_sources)
+ all_fresh_embossc_sources = sorted(
+ list(set(fresh_embossc_sources + init_py_sources))
+ )
+ fresh_runtime_cpp_sources = get_source_files_for_targets([RUNTIME_CPP_TARGET])
+
+ print(f"Reading existing sources from {file_path}...")
+ on_disk_data = read_build_json(file_path)
+ on_disk_embossc_sources = on_disk_data.get("embossc_sources", [])
+ on_disk_runtime_cpp_sources = on_disk_data.get("emboss_runtime_cpp_sources", [])
+
+ embossc_diff = set(all_fresh_embossc_sources) ^ set(on_disk_embossc_sources)
+ runtime_diff = set(fresh_runtime_cpp_sources) ^ set(on_disk_runtime_cpp_sources)
+
+ if not embossc_diff and not runtime_diff:
+ print("build.json is up-to-date.")
+ return 0
+
+ print("\nERROR: build.json is out of date!", file=sys.stderr)
+
+ def print_diff(title, fresh_set, on_disk_set):
+ added = sorted(list(fresh_set - on_disk_set))
+ removed = sorted(list(on_disk_set - fresh_set))
+ if added or removed:
+ print(f" {title}:", file=sys.stderr)
+ for f in added:
+ print(f" + {f}", file=sys.stderr)
+ for f in removed:
+ print(f" - {f}", file=sys.stderr)
+
+ print_diff(
+ "embossc_sources",
+ set(fresh_embossc_sources),
+ set(on_disk_embossc_sources),
+ )
+ print_diff(
+ "emboss_runtime_cpp_sources",
+ set(fresh_runtime_cpp_sources),
+ set(on_disk_runtime_cpp_sources),
+ )
+
+ print(
+ "\nPlease run 'scripts/build_helpers/manage_build_json.py' to update.",
+ file=sys.stderr,
+ )
+ return 1
+
+
+def main():
+ parser = argparse.ArgumentParser(description="Manage the build.json file.")
+ parser.add_argument(
+ "--validate",
+ action="store_true",
+ help="Validate that build.json is up-to-date.",
+ )
+ args = parser.parse_args()
+
+ workspace_root = os.environ.get("BUILD_WORKSPACE_DIRECTORY", os.getcwd())
+ build_json_path = os.path.join(workspace_root, "build.json")
+
+ if args.validate:
+ return validate_build_json(build_json_path)
+ else:
+ return generate_build_json(build_json_path)
+
+
+if __name__ == "__main__":
+ sys.exit(main())