fix: Avoid creating URLs with empty path segments from index URLs in environment variables (#2557)
This change updates `_read_simpleapi` such that it correctly handles the
case where the index URL is specified in an environment variable and
contains a trailing slash. The URL construction would have introduced an
empty path segment, which is now removed.
Fixes: #2554
---------
Co-authored-by: Ignas Anikevicius <240938+aignas@users.noreply.github.com>
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3c71f7e..3ea9339 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -57,6 +57,8 @@
{#v0-0-0-fixed}
### Fixed
* (gazelle) Providing multiple input requirements files to `gazelle_python_manifest` now works correctly.
+* (pypi) Handle trailing slashes in pip index URLs in environment variables,
+ fixes [#2554](https://github.com/bazelbuild/rules_python/issues/2554).
{#v0-0-0-added}
### Added
diff --git a/python/private/pypi/simpleapi_download.bzl b/python/private/pypi/simpleapi_download.bzl
index 6401a06..ef39fb8 100644
--- a/python/private/pypi/simpleapi_download.bzl
+++ b/python/private/pypi/simpleapi_download.bzl
@@ -17,7 +17,7 @@
"""
load("@bazel_features//:features.bzl", "bazel_features")
-load("//python/private:auth.bzl", "get_auth")
+load("//python/private:auth.bzl", _get_auth = "get_auth")
load("//python/private:envsubst.bzl", "envsubst")
load("//python/private:normalize_name.bzl", "normalize_name")
load("//python/private:text_util.bzl", "render")
@@ -30,6 +30,7 @@
cache,
parallel_download = True,
read_simpleapi = None,
+ get_auth = None,
_fail = fail):
"""Download Simple API HTML.
@@ -59,6 +60,7 @@
parallel_download: A boolean to enable usage of bazel 7.1 non-blocking downloads.
read_simpleapi: a function for reading and parsing of the SimpleAPI contents.
Used in tests.
+ get_auth: A function to get auth information passed to read_simpleapi. Used in tests.
_fail: a function to print a failure. Used in tests.
Returns:
@@ -98,6 +100,7 @@
),
attr = attr,
cache = cache,
+ get_auth = get_auth,
**download_kwargs
)
if hasattr(result, "wait"):
@@ -144,7 +147,7 @@
return contents
-def _read_simpleapi(ctx, url, attr, cache, **download_kwargs):
+def _read_simpleapi(ctx, url, attr, cache, get_auth = None, **download_kwargs):
"""Read SimpleAPI.
Args:
@@ -157,6 +160,7 @@
* auth_patterns: The auth_patterns parameter for ctx.download, see
http_file for docs.
cache: A dict for storing the results.
+ get_auth: A function to get auth information. Used in tests.
**download_kwargs: Any extra params to ctx.download.
Note that output and auth will be passed for you.
@@ -169,11 +173,11 @@
# them to ctx.download if we want to correctly handle the relative URLs.
# TODO: Add a test that env subbed index urls do not leak into the lock file.
- real_url = envsubst(
+ real_url = strip_empty_path_segments(envsubst(
url,
attr.envsubst,
ctx.getenv if hasattr(ctx, "getenv") else ctx.os.environ.get,
- )
+ ))
cache_key = real_url
if cache_key in cache:
@@ -194,6 +198,8 @@
output = ctx.path(output_str.strip("_").lower() + ".html")
+ get_auth = get_auth or _get_auth
+
# NOTE: this may have block = True or block = False in the download_kwargs
download = ctx.download(
url = [real_url],
@@ -211,6 +217,27 @@
return _read_index_result(ctx, download, output, real_url, cache, cache_key)
+def strip_empty_path_segments(url):
+ """Removes empty path segments from a URL. Does nothing for urls with no scheme.
+
+ Public only for testing.
+
+ Args:
+ url: The url to remove empty path segments from
+
+ Returns:
+ The url with empty path segments removed and any trailing slash preserved.
+ If the url had no scheme it is returned unchanged.
+ """
+ scheme, _, rest = url.partition("://")
+ if rest == "":
+ return url
+ stripped = "/".join([p for p in rest.split("/") if p])
+ if url.endswith("/"):
+ return "{}://{}/".format(scheme, stripped)
+ else:
+ return "{}://{}".format(scheme, stripped)
+
def _read_index_result(ctx, result, output, url, cache, cache_key):
if not result.success:
return struct(success = False)
diff --git a/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl b/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl
index 9b2967b..964d3e2 100644
--- a/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl
+++ b/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl
@@ -15,17 +15,18 @@
""
load("@rules_testing//lib:test_suite.bzl", "test_suite")
-load("//python/private/pypi:simpleapi_download.bzl", "simpleapi_download") # buildifier: disable=bzl-visibility
+load("//python/private/pypi:simpleapi_download.bzl", "simpleapi_download", "strip_empty_path_segments") # buildifier: disable=bzl-visibility
_tests = []
def _test_simple(env):
calls = []
- def read_simpleapi(ctx, url, attr, cache, block):
+ def read_simpleapi(ctx, url, attr, cache, get_auth, block):
_ = ctx # buildifier: disable=unused-variable
_ = attr
_ = cache
+ _ = get_auth
env.expect.that_bool(block).equals(False)
calls.append(url)
if "foo" in url and "main" in url:
@@ -73,10 +74,11 @@
calls = []
fails = []
- def read_simpleapi(ctx, url, attr, cache, block):
+ def read_simpleapi(ctx, url, attr, cache, get_auth, block):
_ = ctx # buildifier: disable=unused-variable
_ = attr
_ = cache
+ _ = get_auth
env.expect.that_bool(block).equals(False)
calls.append(url)
if "foo" in url:
@@ -119,6 +121,121 @@
_tests.append(_test_fail)
+def _test_download_url(env):
+ downloads = {}
+
+ def download(url, output, **kwargs):
+ _ = kwargs # buildifier: disable=unused-variable
+ downloads[url[0]] = output
+ return struct(success = True)
+
+ simpleapi_download(
+ ctx = struct(
+ os = struct(environ = {}),
+ download = download,
+ read = lambda i: "contents of " + i,
+ path = lambda i: "path/for/" + i,
+ ),
+ attr = struct(
+ index_url_overrides = {},
+ index_url = "https://example.com/main/simple/",
+ extra_index_urls = [],
+ sources = ["foo", "bar", "baz"],
+ envsubst = [],
+ ),
+ cache = {},
+ parallel_download = False,
+ get_auth = lambda ctx, urls, ctx_attr: struct(),
+ )
+
+ env.expect.that_dict(downloads).contains_exactly({
+ "https://example.com/main/simple/bar/": "path/for/https___example_com_main_simple_bar.html",
+ "https://example.com/main/simple/baz/": "path/for/https___example_com_main_simple_baz.html",
+ "https://example.com/main/simple/foo/": "path/for/https___example_com_main_simple_foo.html",
+ })
+
+_tests.append(_test_download_url)
+
+def _test_download_url_parallel(env):
+ downloads = {}
+
+ def download(url, output, **kwargs):
+ _ = kwargs # buildifier: disable=unused-variable
+ downloads[url[0]] = output
+ return struct(wait = lambda: struct(success = True))
+
+ simpleapi_download(
+ ctx = struct(
+ os = struct(environ = {}),
+ download = download,
+ read = lambda i: "contents of " + i,
+ path = lambda i: "path/for/" + i,
+ ),
+ attr = struct(
+ index_url_overrides = {},
+ index_url = "https://example.com/main/simple/",
+ extra_index_urls = [],
+ sources = ["foo", "bar", "baz"],
+ envsubst = [],
+ ),
+ cache = {},
+ parallel_download = True,
+ get_auth = lambda ctx, urls, ctx_attr: struct(),
+ )
+
+ env.expect.that_dict(downloads).contains_exactly({
+ "https://example.com/main/simple/bar/": "path/for/https___example_com_main_simple_bar.html",
+ "https://example.com/main/simple/baz/": "path/for/https___example_com_main_simple_baz.html",
+ "https://example.com/main/simple/foo/": "path/for/https___example_com_main_simple_foo.html",
+ })
+
+_tests.append(_test_download_url_parallel)
+
+def _test_download_envsubst_url(env):
+ downloads = {}
+
+ def download(url, output, **kwargs):
+ _ = kwargs # buildifier: disable=unused-variable
+ downloads[url[0]] = output
+ return struct(success = True)
+
+ simpleapi_download(
+ ctx = struct(
+ os = struct(environ = {"INDEX_URL": "https://example.com/main/simple/"}),
+ download = download,
+ read = lambda i: "contents of " + i,
+ path = lambda i: "path/for/" + i,
+ ),
+ attr = struct(
+ index_url_overrides = {},
+ index_url = "$INDEX_URL",
+ extra_index_urls = [],
+ sources = ["foo", "bar", "baz"],
+ envsubst = ["INDEX_URL"],
+ ),
+ cache = {},
+ parallel_download = False,
+ get_auth = lambda ctx, urls, ctx_attr: struct(),
+ )
+
+ env.expect.that_dict(downloads).contains_exactly({
+ "https://example.com/main/simple/bar/": "path/for/~index_url~_bar.html",
+ "https://example.com/main/simple/baz/": "path/for/~index_url~_baz.html",
+ "https://example.com/main/simple/foo/": "path/for/~index_url~_foo.html",
+ })
+
+_tests.append(_test_download_envsubst_url)
+
+def _test_strip_empty_path_segments(env):
+ env.expect.that_str(strip_empty_path_segments("no/scheme//is/unchanged")).equals("no/scheme//is/unchanged")
+ env.expect.that_str(strip_empty_path_segments("scheme://with/no/empty/segments")).equals("scheme://with/no/empty/segments")
+ env.expect.that_str(strip_empty_path_segments("scheme://with//empty/segments")).equals("scheme://with/empty/segments")
+ env.expect.that_str(strip_empty_path_segments("scheme://with///multiple//empty/segments")).equals("scheme://with/multiple/empty/segments")
+ env.expect.that_str(strip_empty_path_segments("scheme://with//trailing/slash/")).equals("scheme://with/trailing/slash/")
+ env.expect.that_str(strip_empty_path_segments("scheme://with/trailing/slashes///")).equals("scheme://with/trailing/slashes/")
+
+_tests.append(_test_strip_empty_path_segments)
+
def simpleapi_download_test_suite(name):
"""Create the test suite.