fix: Avoid creating URLs with empty path segments from index URLs in environment variables (#2557)

This change updates `_read_simpleapi` such that it correctly handles the
case where the index URL is specified in an environment variable and
contains a trailing slash. The URL construction would have introduced an
empty path segment, which is now removed.

Fixes: #2554

---------

Co-authored-by: Ignas Anikevicius <240938+aignas@users.noreply.github.com>
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3c71f7e..3ea9339 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -57,6 +57,8 @@
 {#v0-0-0-fixed}
 ### Fixed
 * (gazelle) Providing multiple input requirements files to `gazelle_python_manifest` now works correctly.
+* (pypi) Handle trailing slashes in pip index URLs in environment variables,
+  fixes [#2554](https://github.com/bazelbuild/rules_python/issues/2554).
 
 {#v0-0-0-added}
 ### Added
diff --git a/python/private/pypi/simpleapi_download.bzl b/python/private/pypi/simpleapi_download.bzl
index 6401a06..ef39fb8 100644
--- a/python/private/pypi/simpleapi_download.bzl
+++ b/python/private/pypi/simpleapi_download.bzl
@@ -17,7 +17,7 @@
 """
 
 load("@bazel_features//:features.bzl", "bazel_features")
-load("//python/private:auth.bzl", "get_auth")
+load("//python/private:auth.bzl", _get_auth = "get_auth")
 load("//python/private:envsubst.bzl", "envsubst")
 load("//python/private:normalize_name.bzl", "normalize_name")
 load("//python/private:text_util.bzl", "render")
@@ -30,6 +30,7 @@
         cache,
         parallel_download = True,
         read_simpleapi = None,
+        get_auth = None,
         _fail = fail):
     """Download Simple API HTML.
 
@@ -59,6 +60,7 @@
         parallel_download: A boolean to enable usage of bazel 7.1 non-blocking downloads.
         read_simpleapi: a function for reading and parsing of the SimpleAPI contents.
             Used in tests.
+        get_auth: A function to get auth information passed to read_simpleapi. Used in tests.
         _fail: a function to print a failure. Used in tests.
 
     Returns:
@@ -98,6 +100,7 @@
                 ),
                 attr = attr,
                 cache = cache,
+                get_auth = get_auth,
                 **download_kwargs
             )
             if hasattr(result, "wait"):
@@ -144,7 +147,7 @@
 
     return contents
 
-def _read_simpleapi(ctx, url, attr, cache, **download_kwargs):
+def _read_simpleapi(ctx, url, attr, cache, get_auth = None, **download_kwargs):
     """Read SimpleAPI.
 
     Args:
@@ -157,6 +160,7 @@
            * auth_patterns: The auth_patterns parameter for ctx.download, see
                http_file for docs.
         cache: A dict for storing the results.
+        get_auth: A function to get auth information. Used in tests.
         **download_kwargs: Any extra params to ctx.download.
             Note that output and auth will be passed for you.
 
@@ -169,11 +173,11 @@
     # them to ctx.download if we want to correctly handle the relative URLs.
     # TODO: Add a test that env subbed index urls do not leak into the lock file.
 
-    real_url = envsubst(
+    real_url = strip_empty_path_segments(envsubst(
         url,
         attr.envsubst,
         ctx.getenv if hasattr(ctx, "getenv") else ctx.os.environ.get,
-    )
+    ))
 
     cache_key = real_url
     if cache_key in cache:
@@ -194,6 +198,8 @@
 
     output = ctx.path(output_str.strip("_").lower() + ".html")
 
+    get_auth = get_auth or _get_auth
+
     # NOTE: this may have block = True or block = False in the download_kwargs
     download = ctx.download(
         url = [real_url],
@@ -211,6 +217,27 @@
 
     return _read_index_result(ctx, download, output, real_url, cache, cache_key)
 
+def strip_empty_path_segments(url):
+    """Removes empty path segments from a URL. Does nothing for urls with no scheme.
+
+    Public only for testing.
+
+    Args:
+        url: The url to remove empty path segments from
+
+    Returns:
+        The url with empty path segments removed and any trailing slash preserved.
+        If the url had no scheme it is returned unchanged.
+    """
+    scheme, _, rest = url.partition("://")
+    if rest == "":
+        return url
+    stripped = "/".join([p for p in rest.split("/") if p])
+    if url.endswith("/"):
+        return "{}://{}/".format(scheme, stripped)
+    else:
+        return "{}://{}".format(scheme, stripped)
+
 def _read_index_result(ctx, result, output, url, cache, cache_key):
     if not result.success:
         return struct(success = False)
diff --git a/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl b/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl
index 9b2967b..964d3e2 100644
--- a/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl
+++ b/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl
@@ -15,17 +15,18 @@
 ""
 
 load("@rules_testing//lib:test_suite.bzl", "test_suite")
-load("//python/private/pypi:simpleapi_download.bzl", "simpleapi_download")  # buildifier: disable=bzl-visibility
+load("//python/private/pypi:simpleapi_download.bzl", "simpleapi_download", "strip_empty_path_segments")  # buildifier: disable=bzl-visibility
 
 _tests = []
 
 def _test_simple(env):
     calls = []
 
-    def read_simpleapi(ctx, url, attr, cache, block):
+    def read_simpleapi(ctx, url, attr, cache, get_auth, block):
         _ = ctx  # buildifier: disable=unused-variable
         _ = attr
         _ = cache
+        _ = get_auth
         env.expect.that_bool(block).equals(False)
         calls.append(url)
         if "foo" in url and "main" in url:
@@ -73,10 +74,11 @@
     calls = []
     fails = []
 
-    def read_simpleapi(ctx, url, attr, cache, block):
+    def read_simpleapi(ctx, url, attr, cache, get_auth, block):
         _ = ctx  # buildifier: disable=unused-variable
         _ = attr
         _ = cache
+        _ = get_auth
         env.expect.that_bool(block).equals(False)
         calls.append(url)
         if "foo" in url:
@@ -119,6 +121,121 @@
 
 _tests.append(_test_fail)
 
+def _test_download_url(env):
+    downloads = {}
+
+    def download(url, output, **kwargs):
+        _ = kwargs  # buildifier: disable=unused-variable
+        downloads[url[0]] = output
+        return struct(success = True)
+
+    simpleapi_download(
+        ctx = struct(
+            os = struct(environ = {}),
+            download = download,
+            read = lambda i: "contents of " + i,
+            path = lambda i: "path/for/" + i,
+        ),
+        attr = struct(
+            index_url_overrides = {},
+            index_url = "https://example.com/main/simple/",
+            extra_index_urls = [],
+            sources = ["foo", "bar", "baz"],
+            envsubst = [],
+        ),
+        cache = {},
+        parallel_download = False,
+        get_auth = lambda ctx, urls, ctx_attr: struct(),
+    )
+
+    env.expect.that_dict(downloads).contains_exactly({
+        "https://example.com/main/simple/bar/": "path/for/https___example_com_main_simple_bar.html",
+        "https://example.com/main/simple/baz/": "path/for/https___example_com_main_simple_baz.html",
+        "https://example.com/main/simple/foo/": "path/for/https___example_com_main_simple_foo.html",
+    })
+
+_tests.append(_test_download_url)
+
+def _test_download_url_parallel(env):
+    downloads = {}
+
+    def download(url, output, **kwargs):
+        _ = kwargs  # buildifier: disable=unused-variable
+        downloads[url[0]] = output
+        return struct(wait = lambda: struct(success = True))
+
+    simpleapi_download(
+        ctx = struct(
+            os = struct(environ = {}),
+            download = download,
+            read = lambda i: "contents of " + i,
+            path = lambda i: "path/for/" + i,
+        ),
+        attr = struct(
+            index_url_overrides = {},
+            index_url = "https://example.com/main/simple/",
+            extra_index_urls = [],
+            sources = ["foo", "bar", "baz"],
+            envsubst = [],
+        ),
+        cache = {},
+        parallel_download = True,
+        get_auth = lambda ctx, urls, ctx_attr: struct(),
+    )
+
+    env.expect.that_dict(downloads).contains_exactly({
+        "https://example.com/main/simple/bar/": "path/for/https___example_com_main_simple_bar.html",
+        "https://example.com/main/simple/baz/": "path/for/https___example_com_main_simple_baz.html",
+        "https://example.com/main/simple/foo/": "path/for/https___example_com_main_simple_foo.html",
+    })
+
+_tests.append(_test_download_url_parallel)
+
+def _test_download_envsubst_url(env):
+    downloads = {}
+
+    def download(url, output, **kwargs):
+        _ = kwargs  # buildifier: disable=unused-variable
+        downloads[url[0]] = output
+        return struct(success = True)
+
+    simpleapi_download(
+        ctx = struct(
+            os = struct(environ = {"INDEX_URL": "https://example.com/main/simple/"}),
+            download = download,
+            read = lambda i: "contents of " + i,
+            path = lambda i: "path/for/" + i,
+        ),
+        attr = struct(
+            index_url_overrides = {},
+            index_url = "$INDEX_URL",
+            extra_index_urls = [],
+            sources = ["foo", "bar", "baz"],
+            envsubst = ["INDEX_URL"],
+        ),
+        cache = {},
+        parallel_download = False,
+        get_auth = lambda ctx, urls, ctx_attr: struct(),
+    )
+
+    env.expect.that_dict(downloads).contains_exactly({
+        "https://example.com/main/simple/bar/": "path/for/~index_url~_bar.html",
+        "https://example.com/main/simple/baz/": "path/for/~index_url~_baz.html",
+        "https://example.com/main/simple/foo/": "path/for/~index_url~_foo.html",
+    })
+
+_tests.append(_test_download_envsubst_url)
+
+def _test_strip_empty_path_segments(env):
+    env.expect.that_str(strip_empty_path_segments("no/scheme//is/unchanged")).equals("no/scheme//is/unchanged")
+    env.expect.that_str(strip_empty_path_segments("scheme://with/no/empty/segments")).equals("scheme://with/no/empty/segments")
+    env.expect.that_str(strip_empty_path_segments("scheme://with//empty/segments")).equals("scheme://with/empty/segments")
+    env.expect.that_str(strip_empty_path_segments("scheme://with///multiple//empty/segments")).equals("scheme://with/multiple/empty/segments")
+    env.expect.that_str(strip_empty_path_segments("scheme://with//trailing/slash/")).equals("scheme://with/trailing/slash/")
+    env.expect.that_str(strip_empty_path_segments("scheme://with/trailing/slashes///")).equals("scheme://with/trailing/slashes/")
+
+_tests.append(_test_strip_empty_path_segments)
+
 def simpleapi_download_test_suite(name):
     """Create the test suite.