Allow pkg_resources' resource APIs to be used inside .par files. (#52)
* Allow pkg_resources' resource APIs to be used inside .par files.
Several limitations apply.
* Fix description of test case
* Add link to bug tracker
diff --git a/runtime/support.py b/runtime/support.py
index 0132e39..e411bd0 100644
--- a/runtime/support.py
+++ b/runtime/support.py
@@ -22,13 +22,33 @@
3. Resources stored in a .par file may need to be exposed as OS-level
files instead of Python File objects.
-We try a succession of different strategies until we find one that
-works.
+We hook into the pkg_resources module, if present, to achieve 2 and 3.
-TODO: Strategy: FUSE filesystem
-TODO: Strategy: dlopen_with_offset
-TODO: Strategy: extract all files to a temp dir
-TODO: Strategy: Do nothing if archive doesn't have any C extension modules
+Limitations:
+
+A. Retrieving resources from packages
+
+It should be possible to do this:
+ fn = pkg_resources.resource_filename('mypackage', 'myfile')
+But instead one must do
+ fn = pkg_resources.resource_filename(
+ pkg_resources.Requirement.parse.spec('mypackage'),
+ 'myfile')
+
+B. Extraction dir
+
+You should explicitly set the default extraction directory, via
+`pkg_resources.set_extraction_path(my_directory)`, since the default
+is not safe. For example:
+
+ tmpdir = tempfile.mkdtemp()
+ pkg_resources.set_extraction(tmpdir)
+
+You should arrange for that directory to be deleted at some point.
+Note that pkg_resources.cleanup_resources() is an unimplemented no-op,
+so use something else. For example:
+
+ atexit.register(lambda: shutil.rmtree(tmpdir, ignore_errors=True))
"""
@@ -94,35 +114,69 @@
Therefore this class exists.
"""
- def _listdir(self, fspath):
- """List of resource names in the directory (like ``os.listdir()``)
+ def _zipinfo_name(self, fspath):
+ """Overrides EggMetadata._zipinfo_name"""
+ # Convert a virtual filename (full path to file) into a
+ # zipfile subpath usable with the zipimport directory
+ # cache for our target archive
+ while fspath.endswith(os.sep):
+ fspath = fspath[:-1]
+ if fspath == self.loader.archive:
+ return ''
+ if fspath.startswith(self.zip_pre):
+ return fspath[len(self.zip_pre):]
+ raise AssertionError(
+ "%s is not a subpath of %s" % (fspath, self.zip_pre)
+ )
- Overrides EggMetadata._listdir()
- """
-
- zipinfo_name = self._zipinfo_name(fspath)
- while zipinfo_name.endswith('/'):
- zipinfo_name = zipinfo_name[:-1]
- result = self._index().get(zipinfo_name, ())
- return list(result)
+ def _parts(self, zip_path):
+ """Overrides EggMetadata._parts"""
+ # Convert a zipfile subpath into an egg-relative path part
+ # list.
+ fspath = self.zip_pre + zip_path
+ if fspath == self.egg_root:
+ return []
+ if fspath.startswith(self.egg_root + os.sep):
+ return fspath[len(self.egg_root) + 1:].split(os.sep)
+ raise AssertionError(
+ "%s is not a subpath of %s" % (fspath, self.egg_root)
+ )
def find_dist_info_in_zip(importer, path_item, only=False):
"""Find dist-info style metadata in zip files.
- We ignore the `only` flag because it's not clear what it should
- actually do in this case.
+ importer: PEP 302-style Importer object
+ path_item (str): filename or pseudo-filename like:
+ /usr/somedirs/main.par
+ or
+ /usr/somedirs/main.par/pypi__portpicker_1_2_0
+ only (bool): We ignore the `only` flag because it's not clear
+ what it should actually do in this case.
+
+ Yields pkg_resources.Distribution objects
"""
metadata = DistInfoMetadata(importer)
for subitem in metadata.resource_listdir('/'):
- if subitem.lower().endswith('.dist-info'):
+ basename, ext = os.path.splitext(subitem)
+ if ext.lower() == '.dist-info':
+ # Parse distribution name
+ match = pkg_resources.EGG_NAME(basename)
+ project_name = 'unknown'
+ if match:
+ project_name = match.group('name')
+ # Create metadata object
subpath = os.path.join(path_item, subitem)
- submeta = pkg_resources.EggMetadata(
- zipimport.zipimporter(subpath))
+ submeta = DistInfoMetadata(
+ zipimport.zipimporter(path_item))
+ # Override pkg_resources defaults to avoid
+ # "resource_filename() only supported for .egg, not
+ # .zip" message
+ submeta.egg_name = project_name
submeta.egg_info = subpath
+ submeta.egg_root = path_item
dist = pkg_resources.Distribution.from_location(
path_item, subitem, submeta)
yield dist
- return
def find_eggs_and_dist_info_in_zip(importer, path_item, only=False):
"""Chain together our finder and the standard pkg_resources finder
@@ -150,8 +204,9 @@
importer = pkgutil.get_importer(entry)
if isinstance(importer, zipimport.zipimporter):
for dist in find_dist_info_in_zip(importer, entry, only=True):
- pkg_resources.working_set.add(dist, entry, insert=False,
- replace=False)
+ if isinstance(dist._provider, DistInfoMetadata):
+ pkg_resources.working_set.add(dist, entry, insert=False,
+ replace=True)
def setup(import_roots=None):
diff --git a/runtime/support_test.py b/runtime/support_test.py
index 3ec1063..48f4ff7 100644
--- a/runtime/support_test.py
+++ b/runtime/support_test.py
@@ -42,15 +42,21 @@
self.assertNotEqual(path, None)
def test_setup(self):
+ old_sys_path = sys.path
+ mock_sys_path = list(sys.path)
+ sys.path = mock_sys_path
# `import pip` can cause arbitrary sys.path changes,
# especially if using the Debian `python-pip` package or
- # similar. Get that lunacy out of the way before starting
- # test
+ # similar. Do it first to get those changes out of the
+ # way.
try:
import pip # noqa
except ImportError:
pass
+ finally:
+ sys.path = old_sys_path
+ # Run setup()
old_sys_path = sys.path
try:
mock_sys_path = list(sys.path)
@@ -58,6 +64,8 @@
support.setup(import_roots=['some_root', 'another_root'])
finally:
sys.path = old_sys_path
+
+ # Check results
self.assertTrue(mock_sys_path[1].endswith('subpar/runtime/some_root'),
mock_sys_path)
self.assertTrue(
diff --git a/tests/package_pkg_resources/main.py b/tests/package_pkg_resources/main.py
index dea801d..f7b9cdb 100644
--- a/tests/package_pkg_resources/main.py
+++ b/tests/package_pkg_resources/main.py
@@ -19,26 +19,95 @@
"""
+import atexit
+import os
+import shutil
+import tempfile
+
+
def main():
print('In pkg_resources test main()')
try:
import pkg_resources
+ import setuptools
+ # We can't query pkg_resources' version so query setuptools instead
+ version = pkg_resources.parse_version(setuptools.__version__)
+ minimum = pkg_resources.parse_version('36.6.0')
+ if (version < minimum):
+ print('Skipping test, pkg_resources module is too old')
+ return
except ImportError:
print('Skipping test, pkg_resources module is not available')
return
ws = pkg_resources.working_set
+ # Set a safe extraction dir (the default is unsafe)
+ extraction_tmpdir = tempfile.mkdtemp()
+ atexit.register(lambda: shutil.rmtree(
+ extraction_tmpdir, ignore_errors=True))
+ pkg_resources.set_extraction_path(extraction_tmpdir)
+
# Informational for debugging
distributions = list(ws)
- print('Resources found: %s' % distributions)
+ assert distributions
- # Check for the packages we provided metadata for. There will
- # also be metadata for whatever other packages happen to be
- # installed in the current Python interpreter.
- for spec in ['portpicker==1.2.0', 'yapf==0.19.0']:
- dist = ws.find(pkg_resources.Requirement.parse(spec))
- assert dist, (spec, distributions)
+ # Test package that doesn't exist.
+ # I hereby promise never to release a package with this name.
+ nonexistent_name = 'subpar-package-does-not-exist-blorg'
+ req = pkg_resources.Requirement.parse(nonexistent_name)
+ dist = ws.find(req)
+ assert not dist
+
+ # Package exists, has code at the top level directory
+ portpicker_spec = 'portpicker==1.2.0'
+ req = pkg_resources.Requirement.parse(portpicker_spec)
+ # Extract single file
+ fn = pkg_resources.resource_filename(req, 'data_file.txt')
+ with open(fn) as f:
+ actual = f.read()
+ assert actual == 'Dummy data file for portpicker\n', actual
+ # Extract all
+ dirname = pkg_resources.resource_filename(req, '')
+ expected = [
+ # The __init__.py file shouldn't be here, but is, as an
+ # unfortunately side effect of Bazel runfiles behavior.
+ # https://github.com/google/subpar/issues/51
+ '__init__.py',
+ 'data_file.txt',
+ 'portpicker-1.2.0.dist-info',
+ 'portpicker.py',
+ ]
+ for fn in expected:
+ assert os.path.exists(os.path.join(dirname, fn)), fn
+ # Import module and check that we got the right one
+ module = __import__(req.name)
+ assert module.x == req.name, (module, vars(module))
+
+ # Package exists, has code in a subdir
+ yapf_spec = 'yapf==0.19.0'
+ req = pkg_resources.Requirement.parse(yapf_spec)
+ # Extract single file
+ fn = pkg_resources.resource_filename(req, 'data_file.txt')
+ with open(fn) as f:
+ actual = f.read()
+ assert actual == 'Dummy data file for yapf\n', actual
+ # Extract all
+ dirname = pkg_resources.resource_filename(req, '')
+ expected = [
+ # The __init__.py file shouldn't be here, but is, as an
+ # unfortunately side effect of Bazel runfiles behavior.
+ '__init__.py',
+ 'data_file.txt',
+ 'yapf',
+ 'yapf-0.19.0.dist-info',
+ ]
+ for fn in expected:
+ assert os.path.exists(os.path.join(dirname, fn)), fn
+ # Import module and check that we got the right one
+ module = __import__(req.name)
+ assert module.x == req.name, (module, vars(module))
+ print("Pass")
if __name__ == '__main__':
diff --git a/tests/package_pkg_resources/main_PY2_filelist.txt b/tests/package_pkg_resources/main_PY2_filelist.txt
index 7b93f00..07fffbd 100644
--- a/tests/package_pkg_resources/main_PY2_filelist.txt
+++ b/tests/package_pkg_resources/main_PY2_filelist.txt
@@ -1,8 +1,15 @@
+__init__.py
__main__.py
+pypi__portpicker_1_2_0/__init__.py
+pypi__portpicker_1_2_0/data_file.txt
pypi__portpicker_1_2_0/portpicker-1.2.0.dist-info/METADATA
pypi__portpicker_1_2_0/portpicker-1.2.0.dist-info/metadata.json
+pypi__portpicker_1_2_0/portpicker.py
+pypi__yapf_0_19_0/__init__.py
+pypi__yapf_0_19_0/data_file.txt
pypi__yapf_0_19_0/yapf-0.19.0.dist-info/METADATA
pypi__yapf_0_19_0/yapf-0.19.0.dist-info/metadata.json
+pypi__yapf_0_19_0/yapf/__init__.py
subpar/__init__.py
subpar/runtime/__init__.py
subpar/runtime/support.py
diff --git a/tests/package_pkg_resources/main_PY3_filelist.txt b/tests/package_pkg_resources/main_PY3_filelist.txt
index 7b93f00..07fffbd 100644
--- a/tests/package_pkg_resources/main_PY3_filelist.txt
+++ b/tests/package_pkg_resources/main_PY3_filelist.txt
@@ -1,8 +1,15 @@
+__init__.py
__main__.py
+pypi__portpicker_1_2_0/__init__.py
+pypi__portpicker_1_2_0/data_file.txt
pypi__portpicker_1_2_0/portpicker-1.2.0.dist-info/METADATA
pypi__portpicker_1_2_0/portpicker-1.2.0.dist-info/metadata.json
+pypi__portpicker_1_2_0/portpicker.py
+pypi__yapf_0_19_0/__init__.py
+pypi__yapf_0_19_0/data_file.txt
pypi__yapf_0_19_0/yapf-0.19.0.dist-info/METADATA
pypi__yapf_0_19_0/yapf-0.19.0.dist-info/metadata.json
+pypi__yapf_0_19_0/yapf/__init__.py
subpar/__init__.py
subpar/runtime/__init__.py
subpar/runtime/support.py
diff --git a/third_party/pypi__portpicker_1_2_0/BUILD b/third_party/pypi__portpicker_1_2_0/BUILD
index 6cff5d5..bee0da1 100644
--- a/third_party/pypi__portpicker_1_2_0/BUILD
+++ b/third_party/pypi__portpicker_1_2_0/BUILD
@@ -5,6 +5,9 @@
py_library(
name = "files",
srcs = [],
- data = glob(["portpicker-1.2.0.dist-info/**"]),
+ data = [
+ "portpicker.py",
+ "data_file.txt",
+ ] + glob(["portpicker-1.2.0.dist-info/**"]),
imports = ["."],
)
diff --git a/third_party/pypi__portpicker_1_2_0/data_file.txt b/third_party/pypi__portpicker_1_2_0/data_file.txt
new file mode 100644
index 0000000..66630bd
--- /dev/null
+++ b/third_party/pypi__portpicker_1_2_0/data_file.txt
@@ -0,0 +1 @@
+Dummy data file for portpicker
diff --git a/third_party/pypi__portpicker_1_2_0/portpicker.py b/third_party/pypi__portpicker_1_2_0/portpicker.py
new file mode 100644
index 0000000..11bc3c4
--- /dev/null
+++ b/third_party/pypi__portpicker_1_2_0/portpicker.py
@@ -0,0 +1,3 @@
+# Dummy source file for testing
+
+x = 'portpicker'
diff --git a/third_party/pypi__yapf_0_19_0/BUILD b/third_party/pypi__yapf_0_19_0/BUILD
index cd12f69..274ddd3 100644
--- a/third_party/pypi__yapf_0_19_0/BUILD
+++ b/third_party/pypi__yapf_0_19_0/BUILD
@@ -5,6 +5,9 @@
py_library(
name = "files",
srcs = [],
- data = glob(["yapf-0.19.0.dist-info/**"]),
+ data = [
+ "yapf/__init__.py",
+ "data_file.txt",
+ ] + glob(["yapf-0.19.0.dist-info/**"]),
imports = ["."],
)
diff --git a/third_party/pypi__yapf_0_19_0/data_file.txt b/third_party/pypi__yapf_0_19_0/data_file.txt
new file mode 100644
index 0000000..c2de30b
--- /dev/null
+++ b/third_party/pypi__yapf_0_19_0/data_file.txt
@@ -0,0 +1 @@
+Dummy data file for yapf
diff --git a/third_party/pypi__yapf_0_19_0/yapf/__init__.py b/third_party/pypi__yapf_0_19_0/yapf/__init__.py
new file mode 100644
index 0000000..75ad8c5
--- /dev/null
+++ b/third_party/pypi__yapf_0_19_0/yapf/__init__.py
@@ -0,0 +1,3 @@
+# Dummy source file for testing
+
+x = 'yapf'