feat(zipapp): support EXTRACT_ROOT env var for __main__.py invocations (#3682)
This makes zipapps that are invoked using the `__main__.py` entry point
(i.e. `python foo.zip`) respect the RULES_PYTHON_EXTRACT_ROOT env var,
which
allows some control over where they extract themselves.
diff --git a/python/private/zipapp/py_zipapp_rule.bzl b/python/private/zipapp/py_zipapp_rule.bzl
index a6ab485..ac79447 100644
--- a/python/private/zipapp/py_zipapp_rule.bzl
+++ b/python/private/zipapp/py_zipapp_rule.bzl
@@ -35,6 +35,11 @@
template = py_runtime.zip_main_template,
output = zip_main_py,
substitutions = {
+ "%EXTRACT_DIR%": paths.join(
+ (ctx.label.repo_name or "_main"),
+ ctx.label.package,
+ ctx.label.name,
+ ),
"%python_binary%": venv_python_exe_path,
"%python_binary_actual%": python_binary_actual_path,
"%stage2_bootstrap%": runfiles_root_path(ctx, stage2_bootstrap.short_path),
diff --git a/python/private/zipapp/zip_main_template.py b/python/private/zipapp/zip_main_template.py
index e997110..3c25d1d 100644
--- a/python/private/zipapp/zip_main_template.py
+++ b/python/private/zipapp/zip_main_template.py
@@ -23,9 +23,11 @@
import os
import shutil
+import stat
import subprocess
import tempfile
import zipfile
+from os.path import dirname, join
# runfiles-root-relative path
_STAGE2_BOOTSTRAP = "%stage2_bootstrap%"
@@ -35,6 +37,10 @@
# executable to use.
_PYTHON_BINARY_ACTUAL = "%python_binary_actual%"
_WORKSPACE_NAME = "%workspace_name%"
+# relative path under EXTRACT_ROOT to extract to.
+EXTRACT_DIR = "%EXTRACT_DIR%"
+
+EXTRACT_ROOT = os.environ.get("RULES_PYTHON_EXTRACT_ROOT")
def print_verbose(*args, mapping=None, values=None):
@@ -118,7 +124,7 @@
search_path = os.getenv("PATH", os.defpath).split(os.pathsep)
for directory in search_path:
if directory:
- path = os.path.join(directory, name)
+ path = join(directory, name)
if os.path.isfile(path) and os.access(path, os.X_OK):
return path
return None
@@ -139,7 +145,7 @@
# Use normpath() to convert slashes to os.sep on Windows.
elif os.sep in os.path.normpath(bin_name):
# Case 3: Path is relative to the repo root.
- return os.path.join(runfiles_root, bin_name)
+ return join(runfiles_root, bin_name)
else:
# Case 4: Path has to be looked up in the search path.
return search_path(bin_name)
@@ -161,10 +167,18 @@
dest_dir = get_windows_path_with_unc_prefix(dest_dir)
with zipfile.ZipFile(zip_path) as zf:
for info in zf.infolist():
+ file_path = os.path.abspath(join(dest_dir, info.filename))
+ # If the file exists, it might be a symlink or read-only file from a previous extraction.
+ # Unlink it first so zipfile.extract doesn't corrupt the symlink target or fail on read-only files.
+ if os.path.lexists(file_path) and not os.path.isdir(file_path):
+ try:
+ os.unlink(file_path)
+ except OSError:
+ # On Windows, unlinking a read-only file fails.
+ os.chmod(file_path, stat.S_IWRITE)
+ os.unlink(file_path)
+
zf.extract(info, dest_dir)
- # UNC-prefixed paths must be absolute/normalized. See
- # https://docs.microsoft.com/en-us/windows/desktop/fileio/naming-a-file#maximum-path-length-limitation
- file_path = os.path.abspath(os.path.join(dest_dir, info.filename))
# The Unix st_mode bits (see "man 7 inode") are stored in the upper 16
# bits of external_attr.
attrs = info.external_attr >> 16
@@ -182,11 +196,14 @@
# Create the runfiles tree by extracting the zip file
def create_runfiles_root():
- temp_dir = tempfile.mkdtemp("", "Bazel.runfiles_")
- extract_zip(os.path.dirname(__file__), temp_dir)
+ if EXTRACT_ROOT:
+ extract_root = join(EXTRACT_ROOT, EXTRACT_DIR)
+ else:
+ extract_root = tempfile.mkdtemp("", "Bazel.runfiles_")
+ extract_zip(dirname(__file__), extract_root)
# IMPORTANT: Later code does `rm -fr` on dirname(runfiles_root) -- it's
# important that deletion code be in sync with this directory structure
- return os.path.join(temp_dir, "runfiles")
+ return join(extract_root, "runfiles")
def execute_file(
@@ -223,18 +240,24 @@
# - When running in a zip file, we need to clean up the
# workspace after the process finishes so control must return here.
try:
- subprocess_argv = [python_program, main_filename] + args
+ subprocess_argv = [python_program]
+ if not EXTRACT_ROOT:
+ subprocess_argv.append(f"-XRULES_PYTHON_ZIP_DIR={dirname(runfiles_root)}")
+ subprocess_argv.append(main_filename)
+ subprocess_argv += args
print_verbose("subprocess argv:", values=subprocess_argv)
print_verbose("subprocess env:", mapping=env)
print_verbose("subprocess cwd:", workspace)
ret_code = subprocess.call(subprocess_argv, env=env, cwd=workspace)
sys.exit(ret_code)
finally:
- # NOTE: dirname() is called because create_runfiles_root() creates a
- # sub-directory within a temporary directory, and we want to remove the
- # whole temporary directory.
- ##shutil.rmtree(os.path.dirname(runfiles_root), True)
- pass
+ if not EXTRACT_ROOT:
+ # NOTE: dirname() is called because create_runfiles_root() creates a
+ # sub-directory within a temporary directory, and we want to remove the
+ # whole temporary directory.
+ extract_root = dirname(runfiles_root)
+ print_verbose("cleanup: rmtree: ", extract_root)
+ shutil.rmtree(extract_root, True)
def main():
@@ -266,7 +289,7 @@
# See: https://docs.python.org/3.11/using/cmdline.html#envvar-PYTHONSAFEPATH
new_env["PYTHONSAFEPATH"] = "1"
- main_filename = os.path.join(runfiles_root, main_rel_path)
+ main_filename = join(runfiles_root, main_rel_path)
main_filename = get_windows_path_with_unc_prefix(main_filename)
assert os.path.exists(main_filename), (
"Cannot exec() %r: file not found." % main_filename
@@ -276,7 +299,7 @@
)
if _PYTHON_BINARY_VENV:
- python_program = os.path.join(runfiles_root, _PYTHON_BINARY_VENV)
+ python_program = join(runfiles_root, _PYTHON_BINARY_VENV)
# When a venv is used, the `bin/python3` symlink may need to be created.
# This case occurs when "create venv at runtime" or "resolve python at
# runtime" modes are enabled.
@@ -288,7 +311,7 @@
"Program's venv binary not under runfiles: {python_program}"
)
symlink_to = find_binary(runfiles_root, _PYTHON_BINARY_ACTUAL)
- os.makedirs(os.path.dirname(python_program), exist_ok=True)
+ os.makedirs(dirname(python_program), exist_ok=True)
try:
os.symlink(symlink_to, python_program)
except OSError as e:
@@ -317,7 +340,7 @@
# change directory to the right runfiles directory.
# (So that the data files are accessible)
if os.environ.get("RUN_UNDER_RUNFILES") == "1":
- workspace = os.path.join(runfiles_root, _WORKSPACE_NAME)
+ workspace = join(runfiles_root, _WORKSPACE_NAME)
sys.stdout.flush()
execute_file(
diff --git a/tests/py_zipapp/system_python_zipapp_external_bootstrap_test.sh b/tests/py_zipapp/system_python_zipapp_external_bootstrap_test.sh
index 21c6741..bb4ba64 100755
--- a/tests/py_zipapp/system_python_zipapp_external_bootstrap_test.sh
+++ b/tests/py_zipapp/system_python_zipapp_external_bootstrap_test.sh
@@ -13,6 +13,21 @@
ZIPAPP="${ZIPAPP/.exe/.zip}"
export RULES_PYTHON_BOOTSTRAP_VERBOSE=1
+
# We're testing the invocation of `__main__.py`, so we have to
# manually pass the zipapp to python.
+echo "Running zipapp using an automatic temp directory..."
+"$PYTHON" "$ZIPAPP"
+
+echo "Running zipapp with extract root set..."
+export RULES_PYTHON_EXTRACT_ROOT="${TEST_TMPDIR:-/tmp}/extract_root_test"
+"$PYTHON" "$ZIPAPP"
+
+# Verify that the directory was created
+if [[ ! -d "$RULES_PYTHON_EXTRACT_ROOT" ]]; then
+ echo "Error: Extract root directory $RULES_PYTHON_EXTRACT_ROOT was not created!"
+ exit 1
+fi
+
+echo "Running zipapp with extract root set a second time..."
"$PYTHON" "$ZIPAPP"