feat(runfiles): support for --incompatible_compact_repo_mapping_manifest (#3277)

Under bzlmod, the repo mapping can become quite large (i.e. tens of
megabytes)
because its size scales as a factor of the number of repos in the
transitive
dependencies.

To address this, the --incompatible_compact_repo_mapping_manifest flag
was introduced.
This changes the repo mapping formation to use prefixes (instead of
exact repo names)
for mapping things.

To make this work with the runfiles library, the code has to be updated
to handle
these prefixes instead of just exact strings.

Fixes #3022.

---------

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Richard Levasseur <richardlev@gmail.com>
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3d06a68..469e9d3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -85,6 +85,8 @@
 
 {#v0-0-0-added}
 ### Added
+* (runfiles) The Python runfiles library now supports Bazel's
+  `--incompatible_compact_repo_mapping_manifest` flag.
 * (bootstrap) {obj}`--bootstrap_impl=system_python` now supports the
   {obj}`main_module` attribute.
 * (bootstrap) {obj}`--bootstrap_impl=system_python` now supports the
diff --git a/python/runfiles/runfiles.py b/python/runfiles/runfiles.py
index 3943be5..58f59c5 100644
--- a/python/runfiles/runfiles.py
+++ b/python/runfiles/runfiles.py
@@ -15,14 +15,131 @@
 """Runfiles lookup library for Bazel-built Python binaries and tests.
 
 See @rules_python//python/runfiles/README.md for usage instructions.
+
+:::{versionadded} VERSION_NEXT_FEATURE
+Support for Bazel's `--incompatible_compact_repo_mapping_manifest` flag was added.
+This enables prefix-based repository mappings to reduce memory usage for large
+dependency graphs under bzlmod.
+:::
 """
+import collections.abc
 import inspect
 import os
 import posixpath
 import sys
+from collections import defaultdict
 from typing import Dict, Optional, Tuple, Union
 
 
+class _RepositoryMapping:
+    """Repository mapping for resolving apparent repository names to canonical ones.
+
+    Handles both exact mappings and prefix-based mappings introduced by the
+    --incompatible_compact_repo_mapping_manifest flag.
+    """
+
+    def __init__(
+        self,
+        exact_mappings: Dict[Tuple[str, str], str],
+        prefixed_mappings: Dict[Tuple[str, str], str],
+    ) -> None:
+        """Initialize repository mapping with exact and prefixed mappings.
+
+        Args:
+            exact_mappings: Dict mapping (source_canonical, target_apparent) -> target_canonical
+            prefixed_mappings: Dict mapping (source_prefix, target_apparent) -> target_canonical
+        """
+        self._exact_mappings = exact_mappings
+
+        # Group prefixed mappings by target_apparent for faster lookups
+        self._grouped_prefixed_mappings = defaultdict(list)
+        for (
+            prefix_source,
+            target_app,
+        ), target_canonical in prefixed_mappings.items():
+            self._grouped_prefixed_mappings[target_app].append(
+                (prefix_source, target_canonical)
+            )
+
+    @staticmethod
+    def create_from_file(repo_mapping_path: Optional[str]) -> "_RepositoryMapping":
+        """Create RepositoryMapping from a repository mapping manifest file.
+
+        Args:
+            repo_mapping_path: Path to the repository mapping file, or None if not available
+
+        Returns:
+            RepositoryMapping instance with parsed mappings
+        """
+        # If the repository mapping file can't be found, that is not an error: We
+        # might be running without Bzlmod enabled or there may not be any runfiles.
+        # In this case, just apply empty repo mappings.
+        if not repo_mapping_path:
+            return _RepositoryMapping({}, {})
+
+        try:
+            with open(repo_mapping_path, "r", encoding="utf-8", newline="\n") as f:
+                content = f.read()
+        except FileNotFoundError:
+            return _RepositoryMapping({}, {})
+
+        exact_mappings = {}
+        prefixed_mappings = {}
+        for line in content.splitlines():
+            source_canonical, target_apparent, target_canonical = line.split(",")
+            if source_canonical.endswith("*"):
+                # This is a prefixed mapping - remove the '*' for prefix matching
+                prefix = source_canonical[:-1]
+                prefixed_mappings[(prefix, target_apparent)] = target_canonical
+            else:
+                # This is an exact mapping
+                exact_mappings[(source_canonical, target_apparent)] = target_canonical
+
+        return _RepositoryMapping(exact_mappings, prefixed_mappings)
+
+    def lookup(self, source_repo: Optional[str], target_apparent: str) -> Optional[str]:
+        """Look up repository mapping for the given source and target.
+
+        This handles both exact mappings and prefix-based mappings introduced by the
+        --incompatible_compact_repo_mapping_manifest flag. Exact mappings are tried
+        first, followed by prefix-based mappings where order matters.
+
+        Args:
+            source_repo: Source canonical repository name
+            target_apparent: Target apparent repository name
+
+        Returns:
+            target_canonical repository name, or None if no mapping exists
+        """
+        if source_repo is None:
+            return None
+
+        key = (source_repo, target_apparent)
+
+        # Try exact mapping first
+        if key in self._exact_mappings:
+            return self._exact_mappings[key]
+
+        # Try prefixed mapping if no exact match found
+        if target_apparent in self._grouped_prefixed_mappings:
+            for prefix_source, target_canonical in self._grouped_prefixed_mappings[
+                target_apparent
+            ]:
+                if source_repo.startswith(prefix_source):
+                    return target_canonical
+
+        # No mapping found
+        return None
+
+    def is_empty(self) -> bool:
+        """Check if this repository mapping is empty (no exact or prefixed mappings).
+
+        Returns:
+            True if there are no mappings, False otherwise
+        """
+        return len(self._exact_mappings) == 0 and len(self._grouped_prefixed_mappings) == 0
+
+
 class _ManifestBased:
     """`Runfiles` strategy that parses a runfiles-manifest to look up runfiles."""
 
@@ -130,7 +247,7 @@
     def __init__(self, strategy: Union[_ManifestBased, _DirectoryBased]) -> None:
         self._strategy = strategy
         self._python_runfiles_root = _FindPythonRunfilesRoot()
-        self._repo_mapping = _ParseRepoMapping(
+        self._repo_mapping = _RepositoryMapping.create_from_file(
             strategy.RlocationChecked("_repo_mapping")
         )
 
@@ -179,7 +296,7 @@
         if os.path.isabs(path):
             return path
 
-        if source_repo is None and self._repo_mapping:
+        if source_repo is None and not self._repo_mapping.is_empty():
             # Look up runfiles using the repository mapping of the caller of the
             # current method. If the repo mapping is empty, determining this
             # name is not necessary.
@@ -188,7 +305,8 @@
         # Split off the first path component, which contains the repository
         # name (apparent or canonical).
         target_repo, _, remainder = path.partition("/")
-        if not remainder or (source_repo, target_repo) not in self._repo_mapping:
+        target_canonical = self._repo_mapping.lookup(source_repo, target_repo)
+        if not remainder or target_canonical is None:
             # One of the following is the case:
             # - not using Bzlmod, so the repository mapping is empty and
             #   apparent and canonical repository names are the same
@@ -202,11 +320,15 @@
             source_repo is not None
         ), "BUG: if the `source_repo` is None, we should never go past the `if` statement above"
 
-        # target_repo is an apparent repository name. Look up the corresponding
-        # canonical repository name with respect to the current repository,
-        # identified by its canonical name.
-        target_canonical = self._repo_mapping[(source_repo, target_repo)]
-        return self._strategy.RlocationChecked(target_canonical + "/" + remainder)
+        # Look up the target repository using the repository mapping
+        if target_canonical is not None:
+            return self._strategy.RlocationChecked(
+                target_canonical + "/" + remainder
+            )
+
+        # No mapping found - assume target_repo is already canonical or
+        # we're not using Bzlmod
+        return self._strategy.RlocationChecked(path)
 
     def EnvVars(self) -> Dict[str, str]:
         """Returns environment variables for subprocesses.
@@ -359,30 +481,6 @@
     return root
 
 
-def _ParseRepoMapping(repo_mapping_path: Optional[str]) -> Dict[Tuple[str, str], str]:
-    """Parses the repository mapping manifest."""
-    # If the repository mapping file can't be found, that is not an error: We
-    # might be running without Bzlmod enabled or there may not be any runfiles.
-    # In this case, just apply an empty repo mapping.
-    if not repo_mapping_path:
-        return {}
-    try:
-        with open(repo_mapping_path, "r", encoding="utf-8", newline="\n") as f:
-            content = f.read()
-    except FileNotFoundError:
-        return {}
-
-    repo_mapping = {}
-    for line in content.split("\n"):
-        if not line:
-            # Empty line following the last line break
-            break
-        current_canonical, target_local, target_canonical = line.split(",")
-        repo_mapping[(current_canonical, target_local)] = target_canonical
-
-    return repo_mapping
-
-
 def CreateManifestBased(manifest_path: str) -> Runfiles:
     return Runfiles.CreateManifestBased(manifest_path)
 
diff --git a/tests/runfiles/runfiles_test.py b/tests/runfiles/runfiles_test.py
index a3837ac..b8a3d5f 100644
--- a/tests/runfiles/runfiles_test.py
+++ b/tests/runfiles/runfiles_test.py
@@ -18,6 +18,7 @@
 from typing import Any, List, Optional
 
 from python.runfiles import runfiles
+from python.runfiles.runfiles import _RepositoryMapping
 
 
 class RunfilesTest(unittest.TestCase):
@@ -525,6 +526,165 @@
                 r.Rlocation("config.json", "protobuf~3.19.2"), dir + "/config.json"
             )
 
+    def testDirectoryBasedRlocationWithCompactRepoMappingFromMain(self) -> None:
+        """Test repository mapping with prefix-based entries (compact format)."""
+        with _MockFile(
+            name="_repo_mapping",
+            contents=[
+                # Exact mappings (no asterisk)
+                "_,config.json,config.json~1.2.3",
+                ",my_module,_main",
+                ",my_workspace,_main",
+                # Prefixed mappings (with asterisk) - these apply to any repo starting with the prefix
+                "deps+*,external_dep,external_dep~1.0.0",
+                "test_deps+*,test_lib,test_lib~2.1.0",
+            ],
+        ) as rm:
+            dir = os.path.dirname(rm.Path())
+            r = runfiles.CreateDirectoryBased(dir)
+
+            # Test exact mappings still work
+            self.assertEqual(
+                r.Rlocation("my_module/bar/runfile", ""), dir + "/_main/bar/runfile"
+            )
+            self.assertEqual(
+                r.Rlocation("my_workspace/bar/runfile", ""), dir + "/_main/bar/runfile"
+            )
+
+            # Test prefixed mappings - should match any repo starting with "deps+"
+            self.assertEqual(
+                r.Rlocation("external_dep/foo/file", "deps+dep1"),
+                dir + "/external_dep~1.0.0/foo/file",
+            )
+            self.assertEqual(
+                r.Rlocation("external_dep/bar/file", "deps+dep2"),
+                dir + "/external_dep~1.0.0/bar/file",
+            )
+            self.assertEqual(
+                r.Rlocation("external_dep/nested/path/file", "deps+some_long_dep_name"),
+                dir + "/external_dep~1.0.0/nested/path/file",
+            )
+
+            # Test that prefixed mappings work for test_deps+ prefix too
+            self.assertEqual(
+                r.Rlocation("test_lib/test/file", "test_deps+junit"),
+                dir + "/test_lib~2.1.0/test/file",
+            )
+
+            # Test that non-matching prefixes don't match
+            self.assertEqual(
+                r.Rlocation("external_dep/foo/file", "other_prefix"),
+                dir + "/external_dep/foo/file",  # No mapping applied, use as-is
+            )
+
+    def testDirectoryBasedRlocationWithCompactRepoMappingPrecedence(self) -> None:
+        """Test that exact mappings take precedence over prefixed mappings."""
+        with _MockFile(
+            name="_repo_mapping",
+            contents=[
+                # Exact mapping for a specific source repo
+                "deps+specific_repo,external_dep,external_dep~exact",
+                # Prefixed mapping for repos starting with "deps+"
+                "deps+*,external_dep,external_dep~prefix",
+                # Another prefixed mapping with different prefix
+                "other+*,external_dep,external_dep~other",
+            ],
+        ) as rm:
+            dir = os.path.dirname(rm.Path())
+            r = runfiles.CreateDirectoryBased(dir)
+
+            # Exact mapping should take precedence over prefix
+            self.assertEqual(
+                r.Rlocation("external_dep/foo/file", "deps+specific_repo"),
+                dir + "/external_dep~exact/foo/file",
+            )
+
+            # Other repos with deps+ prefix should use the prefixed mapping
+            self.assertEqual(
+                r.Rlocation("external_dep/foo/file", "deps+other_repo"),
+                dir + "/external_dep~prefix/foo/file",
+            )
+
+            # Different prefix should use its own mapping
+            self.assertEqual(
+                r.Rlocation("external_dep/foo/file", "other+some_repo"),
+                dir + "/external_dep~other/foo/file",
+            )
+
+    def testDirectoryBasedRlocationWithCompactRepoMappingOrderMatters(self) -> None:
+        """Test that order matters for prefixed mappings (first match wins)."""
+        with _MockFile(
+            name="_repo_mapping",
+            contents=[
+                # More specific prefix comes first
+                "deps+specific+*,lib,lib~specific",
+                # More general prefix comes second
+                "deps+*,lib,lib~general",
+            ],
+        ) as rm:
+            dir = os.path.dirname(rm.Path())
+            r = runfiles.CreateDirectoryBased(dir)
+
+            # Should match the more specific prefix first
+            self.assertEqual(
+                r.Rlocation("lib/foo/file", "deps+specific+repo"),
+                dir + "/lib~specific/foo/file",
+            )
+
+            # Should match the general prefix for non-specific repos
+            self.assertEqual(
+                r.Rlocation("lib/foo/file", "deps+other_repo"),
+                dir + "/lib~general/foo/file",
+            )
+
+    def testRepositoryMappingLookup(self) -> None:
+        """Test _RepositoryMapping.lookup() method for both exact and prefix-based mappings."""
+        exact_mappings = {
+            ("", "my_workspace"): "_main",
+            ("", "config_lib"): "config_lib~1.0.0",
+            ("deps+specific_repo", "external_dep"): "external_dep~exact",
+        }
+        prefixed_mappings = {
+            ("deps+", "external_dep"): "external_dep~prefix",
+            ("test_deps+", "test_lib"): "test_lib~2.1.0",
+        }
+
+        repo_mapping = _RepositoryMapping(exact_mappings, prefixed_mappings)
+
+        # Test exact lookups
+        self.assertEqual(repo_mapping.lookup("", "my_workspace"), "_main")
+        self.assertEqual(repo_mapping.lookup("", "config_lib"), "config_lib~1.0.0")
+        self.assertEqual(
+            repo_mapping.lookup("deps+specific_repo", "external_dep"),
+            "external_dep~exact",
+        )
+
+        # Test prefix-based lookups
+        self.assertEqual(
+            repo_mapping.lookup("deps+some_repo", "external_dep"), "external_dep~prefix"
+        )
+        self.assertEqual(
+            repo_mapping.lookup("test_deps+another_repo", "test_lib"), "test_lib~2.1.0"
+        )
+
+        # Test that exact takes precedence over prefix
+        self.assertEqual(
+            repo_mapping.lookup("deps+specific_repo", "external_dep"),
+            "external_dep~exact",
+        )
+
+        # Test non-existent mapping
+        self.assertIsNone(repo_mapping.lookup("nonexistent", "repo"))
+        self.assertIsNone(repo_mapping.lookup("unknown+repo", "missing"))
+
+        # Test empty mapping
+        empty_mapping = _RepositoryMapping({}, {})
+        self.assertIsNone(empty_mapping.lookup("any", "repo"))
+
+        # Test is_empty() method
+        self.assertFalse(repo_mapping.is_empty())  # Should have mappings
+        self.assertTrue(empty_mapping.is_empty())  # Should be empty
+
     def testCurrentRepository(self) -> None:
         # Under bzlmod, the current repository name is the empty string instead
         # of the name in the workspace file.