feat(zipapp): support EXTRACT_ROOT env var for __main__.py invocations (#3682)

This makes zipapps that are invoked using the `__main__.py` entry point
(i.e. `python foo.zip`) respect the RULES_PYTHON_EXTRACT_ROOT env var,
which
allows some control over where they extract themselves.
diff --git a/python/private/zipapp/py_zipapp_rule.bzl b/python/private/zipapp/py_zipapp_rule.bzl
index a6ab485..ac79447 100644
--- a/python/private/zipapp/py_zipapp_rule.bzl
+++ b/python/private/zipapp/py_zipapp_rule.bzl
@@ -35,6 +35,11 @@
         template = py_runtime.zip_main_template,
         output = zip_main_py,
         substitutions = {
+            "%EXTRACT_DIR%": paths.join(
+                (ctx.label.repo_name or "_main"),
+                ctx.label.package,
+                ctx.label.name,
+            ),
             "%python_binary%": venv_python_exe_path,
             "%python_binary_actual%": python_binary_actual_path,
             "%stage2_bootstrap%": runfiles_root_path(ctx, stage2_bootstrap.short_path),
diff --git a/python/private/zipapp/zip_main_template.py b/python/private/zipapp/zip_main_template.py
index e997110..3c25d1d 100644
--- a/python/private/zipapp/zip_main_template.py
+++ b/python/private/zipapp/zip_main_template.py
@@ -23,9 +23,11 @@
 
 import os
 import shutil
+import stat
 import subprocess
 import tempfile
 import zipfile
+from os.path import dirname, join
 
 # runfiles-root-relative path
 _STAGE2_BOOTSTRAP = "%stage2_bootstrap%"
@@ -35,6 +37,10 @@
 # executable to use.
 _PYTHON_BINARY_ACTUAL = "%python_binary_actual%"
 _WORKSPACE_NAME = "%workspace_name%"
+# relative path under EXTRACT_ROOT to extract to.
+EXTRACT_DIR = "%EXTRACT_DIR%"
+
+EXTRACT_ROOT = os.environ.get("RULES_PYTHON_EXTRACT_ROOT")
 
 
 def print_verbose(*args, mapping=None, values=None):
@@ -118,7 +124,7 @@
     search_path = os.getenv("PATH", os.defpath).split(os.pathsep)
     for directory in search_path:
         if directory:
-            path = os.path.join(directory, name)
+            path = join(directory, name)
             if os.path.isfile(path) and os.access(path, os.X_OK):
                 return path
     return None
@@ -139,7 +145,7 @@
     # Use normpath() to convert slashes to os.sep on Windows.
     elif os.sep in os.path.normpath(bin_name):
         # Case 3: Path is relative to the repo root.
-        return os.path.join(runfiles_root, bin_name)
+        return join(runfiles_root, bin_name)
     else:
         # Case 4: Path has to be looked up in the search path.
         return search_path(bin_name)
@@ -161,10 +167,18 @@
     dest_dir = get_windows_path_with_unc_prefix(dest_dir)
     with zipfile.ZipFile(zip_path) as zf:
         for info in zf.infolist():
+            file_path = os.path.abspath(join(dest_dir, info.filename))
+            # If the file exists, it might be a symlink or read-only file from a previous extraction.
+            # Unlink it first so zipfile.extract doesn't corrupt the symlink target or fail on read-only files.
+            if os.path.lexists(file_path) and not os.path.isdir(file_path):
+                try:
+                    os.unlink(file_path)
+                except OSError:
+                    # On Windows, unlinking a read-only file fails.
+                    os.chmod(file_path, stat.S_IWRITE)
+                    os.unlink(file_path)
+
             zf.extract(info, dest_dir)
-            # UNC-prefixed paths must be absolute/normalized. See
-            # https://docs.microsoft.com/en-us/windows/desktop/fileio/naming-a-file#maximum-path-length-limitation
-            file_path = os.path.abspath(os.path.join(dest_dir, info.filename))
             # The Unix st_mode bits (see "man 7 inode") are stored in the upper 16
             # bits of external_attr.
             attrs = info.external_attr >> 16
@@ -182,11 +196,14 @@
 
 # Create the runfiles tree by extracting the zip file
 def create_runfiles_root():
-    temp_dir = tempfile.mkdtemp("", "Bazel.runfiles_")
-    extract_zip(os.path.dirname(__file__), temp_dir)
+    if EXTRACT_ROOT:
+        extract_root = join(EXTRACT_ROOT, EXTRACT_DIR)
+    else:
+        extract_root = tempfile.mkdtemp("", "Bazel.runfiles_")
+    extract_zip(dirname(__file__), extract_root)
     # IMPORTANT: Later code does `rm -fr` on dirname(runfiles_root) -- it's
     # important that deletion code be in sync with this directory structure
-    return os.path.join(temp_dir, "runfiles")
+    return join(extract_root, "runfiles")
 
 
 def execute_file(
@@ -223,18 +240,24 @@
     # - When running in a zip file, we need to clean up the
     #   workspace after the process finishes so control must return here.
     try:
-        subprocess_argv = [python_program, main_filename] + args
+        subprocess_argv = [python_program]
+        if not EXTRACT_ROOT:
+            subprocess_argv.append(f"-XRULES_PYTHON_ZIP_DIR={dirname(runfiles_root)}")
+        subprocess_argv.append(main_filename)
+        subprocess_argv += args
         print_verbose("subprocess argv:", values=subprocess_argv)
         print_verbose("subprocess env:", mapping=env)
         print_verbose("subprocess cwd:", workspace)
         ret_code = subprocess.call(subprocess_argv, env=env, cwd=workspace)
         sys.exit(ret_code)
     finally:
-        # NOTE: dirname() is called because create_runfiles_root() creates a
-        # sub-directory within a temporary directory, and we want to remove the
-        # whole temporary directory.
-        ##shutil.rmtree(os.path.dirname(runfiles_root), True)
-        pass
+        if not EXTRACT_ROOT:
+            # NOTE: dirname() is called because create_runfiles_root() creates a
+            # sub-directory within a temporary directory, and we want to remove the
+            # whole temporary directory.
+            extract_root = dirname(runfiles_root)
+            print_verbose("cleanup: rmtree: ", extract_root)
+            shutil.rmtree(extract_root, True)
 
 
 def main():
@@ -266,7 +289,7 @@
     # See: https://docs.python.org/3.11/using/cmdline.html#envvar-PYTHONSAFEPATH
     new_env["PYTHONSAFEPATH"] = "1"
 
-    main_filename = os.path.join(runfiles_root, main_rel_path)
+    main_filename = join(runfiles_root, main_rel_path)
     main_filename = get_windows_path_with_unc_prefix(main_filename)
     assert os.path.exists(main_filename), (
         "Cannot exec() %r: file not found." % main_filename
@@ -276,7 +299,7 @@
     )
 
     if _PYTHON_BINARY_VENV:
-        python_program = os.path.join(runfiles_root, _PYTHON_BINARY_VENV)
+        python_program = join(runfiles_root, _PYTHON_BINARY_VENV)
         # When a venv is used, the `bin/python3` symlink may need to be created.
         # This case occurs when "create venv at runtime" or "resolve python at
         # runtime" modes are enabled.
@@ -288,7 +311,7 @@
                     "Program's venv binary not under runfiles: {python_program}"
                 )
             symlink_to = find_binary(runfiles_root, _PYTHON_BINARY_ACTUAL)
-            os.makedirs(os.path.dirname(python_program), exist_ok=True)
+            os.makedirs(dirname(python_program), exist_ok=True)
             try:
                 os.symlink(symlink_to, python_program)
             except OSError as e:
@@ -317,7 +340,7 @@
     # change directory to the right runfiles directory.
     # (So that the data files are accessible)
     if os.environ.get("RUN_UNDER_RUNFILES") == "1":
-        workspace = os.path.join(runfiles_root, _WORKSPACE_NAME)
+        workspace = join(runfiles_root, _WORKSPACE_NAME)
 
     sys.stdout.flush()
     execute_file(
diff --git a/tests/py_zipapp/system_python_zipapp_external_bootstrap_test.sh b/tests/py_zipapp/system_python_zipapp_external_bootstrap_test.sh
index 21c6741..bb4ba64 100755
--- a/tests/py_zipapp/system_python_zipapp_external_bootstrap_test.sh
+++ b/tests/py_zipapp/system_python_zipapp_external_bootstrap_test.sh
@@ -13,6 +13,21 @@
 ZIPAPP="${ZIPAPP/.exe/.zip}"
 
 export RULES_PYTHON_BOOTSTRAP_VERBOSE=1
+
 # We're testing the invocation of `__main__.py`, so we have to
 # manually pass the zipapp to python.
+echo "Running zipapp using an automatic temp directory..."
+"$PYTHON" "$ZIPAPP"
+
+echo "Running zipapp with extract root set..."
+export RULES_PYTHON_EXTRACT_ROOT="${TEST_TMPDIR:-/tmp}/extract_root_test"
+"$PYTHON" "$ZIPAPP"
+
+# Verify that the directory was created
+if [[ ! -d "$RULES_PYTHON_EXTRACT_ROOT" ]]; then
+  echo "Error: Extract root directory $RULES_PYTHON_EXTRACT_ROOT was not created!"
+  exit 1
+fi
+
+echo "Running zipapp with extract root set a second time..."
 "$PYTHON" "$ZIPAPP"