| # Copyright 2024 The Bazel Authors. All rights reserved. |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| """ |
| A file that houses private functions used in the `bzlmod` extension with the same name. |
| """ |
| |
| load("@bazel_features//:features.bzl", "bazel_features") |
| load("//python/private:auth.bzl", _get_auth = "get_auth") |
| load("//python/private:envsubst.bzl", "envsubst") |
| load("//python/private:normalize_name.bzl", "normalize_name") |
| load("//python/private:text_util.bzl", "render") |
| load(":parse_simpleapi_html.bzl", "parse_simpleapi_html") |
| |
| def simpleapi_download( |
| ctx, |
| *, |
| attr, |
| cache, |
| parallel_download = True, |
| read_simpleapi = None, |
| get_auth = None, |
| _fail = fail): |
| """Download Simple API HTML. |
| |
| Args: |
| ctx: The module_ctx or repository_ctx. |
| attr: Contains the parameters for the download. They are grouped into a |
| struct for better clarity. It must have attributes: |
| * index_url: str, the index. |
| * index_url_overrides: dict[str, str], the index overrides for |
| separate packages. |
| * extra_index_urls: Extra index URLs that will be looked up after |
| the main is looked up. |
| * sources: list[str], the sources to download things for. Each value is |
| the contents of requirements files. |
| * envsubst: list[str], the envsubst vars for performing substitution in index url. |
| * netrc: The netrc parameter for ctx.download, see http_file for docs. |
| * auth_patterns: The auth_patterns parameter for ctx.download, see |
| http_file for docs. |
| cache: A dictionary that can be used as a cache between calls during a |
| single evaluation of the extension. We use a dictionary as a cache |
| so that we can reuse calls to the simple API when evaluating the |
| extension. Using the canonical_id parameter of the module_ctx would |
| deposit the simple API responses to the bazel cache and that is |
| undesirable because additions to the PyPI index would not be |
| reflected when re-evaluating the extension unless we do |
| `bazel clean --expunge`. |
| parallel_download: A boolean to enable usage of bazel 7.1 non-blocking downloads. |
| read_simpleapi: a function for reading and parsing of the SimpleAPI contents. |
| Used in tests. |
| get_auth: A function to get auth information passed to read_simpleapi. Used in tests. |
| _fail: a function to print a failure. Used in tests. |
| |
| Returns: |
| dict of pkg name to the parsed HTML contents - a list of structs. |
| """ |
| index_url_overrides = { |
| normalize_name(p): i |
| for p, i in (attr.index_url_overrides or {}).items() |
| } |
| |
| download_kwargs = {} |
| if bazel_features.external_deps.download_has_block_param: |
| download_kwargs["block"] = not parallel_download |
| |
| # NOTE @aignas 2024-03-31: we are not merging results from multiple indexes |
| # to replicate how `pip` would handle this case. |
| contents = {} |
| index_urls = [attr.index_url] + attr.extra_index_urls |
| read_simpleapi = read_simpleapi or _read_simpleapi |
| |
| found_on_index = {} |
| warn_overrides = False |
| for i, index_url in enumerate(index_urls): |
| if i != 0: |
| # Warn the user about a potential fix for the overrides |
| warn_overrides = True |
| |
| async_downloads = {} |
| sources = [pkg for pkg in attr.sources if pkg not in found_on_index] |
| for pkg in sources: |
| pkg_normalized = normalize_name(pkg) |
| result = read_simpleapi( |
| ctx = ctx, |
| url = "{}/{}/".format( |
| index_url_overrides.get(pkg_normalized, index_url).rstrip("/"), |
| pkg, |
| ), |
| attr = attr, |
| cache = cache, |
| get_auth = get_auth, |
| **download_kwargs |
| ) |
| if hasattr(result, "wait"): |
| # We will process it in a separate loop: |
| async_downloads[pkg] = struct( |
| pkg_normalized = pkg_normalized, |
| wait = result.wait, |
| ) |
| elif result.success: |
| contents[pkg_normalized] = result.output |
| found_on_index[pkg] = index_url |
| |
| if not async_downloads: |
| continue |
| |
| # If we use `block` == False, then we need to have a second loop that is |
| # collecting all of the results as they were being downloaded in parallel. |
| for pkg, download in async_downloads.items(): |
| result = download.wait() |
| |
| if result.success: |
| contents[download.pkg_normalized] = result.output |
| found_on_index[pkg] = index_url |
| |
| failed_sources = [pkg for pkg in attr.sources if pkg not in found_on_index] |
| if failed_sources: |
| _fail("Failed to download metadata for {} for from urls: {}".format( |
| failed_sources, |
| index_urls, |
| )) |
| return None |
| |
| if warn_overrides: |
| index_url_overrides = { |
| pkg: found_on_index[pkg] |
| for pkg in attr.sources |
| if found_on_index[pkg] != attr.index_url |
| } |
| |
| # buildifier: disable=print |
| print("You can use the following `index_url_overrides` to avoid the 404 warnings:\n{}".format( |
| render.dict(index_url_overrides), |
| )) |
| |
| return contents |
| |
| def _read_simpleapi(ctx, url, attr, cache, get_auth = None, **download_kwargs): |
| """Read SimpleAPI. |
| |
| Args: |
| ctx: The module_ctx or repository_ctx. |
| url: str, the url parameter that can be passed to ctx.download. |
| attr: The attribute that contains necessary info for downloading. The |
| following attributes must be present: |
| * envsubst: The envsubst values for performing substitutions in the URL. |
| * netrc: The netrc parameter for ctx.download, see http_file for docs. |
| * auth_patterns: The auth_patterns parameter for ctx.download, see |
| http_file for docs. |
| cache: A dict for storing the results. |
| get_auth: A function to get auth information. Used in tests. |
| **download_kwargs: Any extra params to ctx.download. |
| Note that output and auth will be passed for you. |
| |
| Returns: |
| A similar object to what `download` would return except that in result.out |
| will be the parsed simple api contents. |
| """ |
| # NOTE @aignas 2024-03-31: some of the simple APIs use relative URLs for |
| # the whl location and we cannot handle multiple URLs at once by passing |
| # them to ctx.download if we want to correctly handle the relative URLs. |
| # TODO: Add a test that env subbed index urls do not leak into the lock file. |
| |
| real_url = strip_empty_path_segments(envsubst( |
| url, |
| attr.envsubst, |
| ctx.getenv if hasattr(ctx, "getenv") else ctx.os.environ.get, |
| )) |
| |
| cache_key = real_url |
| if cache_key in cache: |
| return struct(success = True, output = cache[cache_key]) |
| |
| output_str = envsubst( |
| url, |
| attr.envsubst, |
| # Use env names in the subst values - this will be unique over |
| # the lifetime of the execution of this function and we also use |
| # `~` as the separator to ensure that we don't get clashes. |
| {e: "~{}~".format(e) for e in attr.envsubst}.get, |
| ) |
| |
| # Transform the URL into a valid filename |
| for char in [".", ":", "/", "\\", "-"]: |
| output_str = output_str.replace(char, "_") |
| |
| output = ctx.path(output_str.strip("_").lower() + ".html") |
| |
| get_auth = get_auth or _get_auth |
| |
| # NOTE: this may have block = True or block = False in the download_kwargs |
| download = ctx.download( |
| url = [real_url], |
| output = output, |
| auth = get_auth(ctx, [real_url], ctx_attr = attr), |
| allow_fail = True, |
| **download_kwargs |
| ) |
| |
| if download_kwargs.get("block") == False: |
| # Simulate the same API as ctx.download has |
| return struct( |
| wait = lambda: _read_index_result(ctx, download.wait(), output, real_url, cache, cache_key), |
| ) |
| |
| return _read_index_result(ctx, download, output, real_url, cache, cache_key) |
| |
| def strip_empty_path_segments(url): |
| """Removes empty path segments from a URL. Does nothing for urls with no scheme. |
| |
| Public only for testing. |
| |
| Args: |
| url: The url to remove empty path segments from |
| |
| Returns: |
| The url with empty path segments removed and any trailing slash preserved. |
| If the url had no scheme it is returned unchanged. |
| """ |
| scheme, _, rest = url.partition("://") |
| if rest == "": |
| return url |
| stripped = "/".join([p for p in rest.split("/") if p]) |
| if url.endswith("/"): |
| return "{}://{}/".format(scheme, stripped) |
| else: |
| return "{}://{}".format(scheme, stripped) |
| |
| def _read_index_result(ctx, result, output, url, cache, cache_key): |
| if not result.success: |
| return struct(success = False) |
| |
| content = ctx.read(output) |
| |
| output = parse_simpleapi_html(url = url, content = content) |
| if output: |
| cache.setdefault(cache_key, output) |
| return struct(success = True, output = output, cache_key = cache_key) |
| else: |
| return struct(success = False) |