Allow pkg_resources' resource APIs to be used inside .par files. (#52)

* Allow pkg_resources' resource APIs to be used inside .par files.

Several limitations apply.

* Fix description of test case

* Add link to bug tracker
diff --git a/runtime/support.py b/runtime/support.py
index 0132e39..e411bd0 100644
--- a/runtime/support.py
+++ b/runtime/support.py
@@ -22,13 +22,33 @@
 3. Resources stored in a .par file may need to be exposed as OS-level
    files instead of Python File objects.
 
-We try a succession of different strategies until we find one that
-works.
+We hook into the pkg_resources module, if present, to achieve 2 and 3.
 
-TODO: Strategy: FUSE filesystem
-TODO: Strategy: dlopen_with_offset
-TODO: Strategy: extract all files to a temp dir
-TODO: Strategy: Do nothing if archive doesn't have any C extension modules
+Limitations:
+
+A. Retrieving resources from packages
+
+It should be possible to do this:
+    fn = pkg_resources.resource_filename('mypackage', 'myfile')
+But instead one must do
+    fn = pkg_resources.resource_filename(
+             pkg_resources.Requirement.parse.spec('mypackage'),
+             'myfile')
+
+B. Extraction dir
+
+You should explicitly set the default extraction directory, via
+`pkg_resources.set_extraction_path(my_directory)`, since the default
+is not safe.  For example:
+
+    tmpdir = tempfile.mkdtemp()
+    pkg_resources.set_extraction(tmpdir)
+
+You should arrange for that directory to be deleted at some point.
+Note that pkg_resources.cleanup_resources() is an unimplemented no-op,
+so use something else.  For example:
+
+    atexit.register(lambda: shutil.rmtree(tmpdir, ignore_errors=True))
 
 """
 
@@ -94,35 +114,69 @@
         Therefore this class exists.
         """
 
-        def _listdir(self, fspath):
-            """List of resource names in the directory (like ``os.listdir()``)
+        def _zipinfo_name(self, fspath):
+            """Overrides EggMetadata._zipinfo_name"""
+            # Convert a virtual filename (full path to file) into a
+            # zipfile subpath usable with the zipimport directory
+            # cache for our target archive
+            while fspath.endswith(os.sep):
+                fspath = fspath[:-1]
+            if fspath == self.loader.archive:
+                return ''
+            if fspath.startswith(self.zip_pre):
+                return fspath[len(self.zip_pre):]
+            raise AssertionError(
+                "%s is not a subpath of %s" % (fspath, self.zip_pre)
+            )
 
-            Overrides EggMetadata._listdir()
-            """
-
-            zipinfo_name = self._zipinfo_name(fspath)
-            while zipinfo_name.endswith('/'):
-                zipinfo_name = zipinfo_name[:-1]
-            result = self._index().get(zipinfo_name, ())
-            return list(result)
+        def _parts(self, zip_path):
+            """Overrides EggMetadata._parts"""
+            # Convert a zipfile subpath into an egg-relative path part
+            # list.
+            fspath = self.zip_pre + zip_path
+            if fspath == self.egg_root:
+                return []
+            if fspath.startswith(self.egg_root + os.sep):
+                return fspath[len(self.egg_root) + 1:].split(os.sep)
+            raise AssertionError(
+                "%s is not a subpath of %s" % (fspath, self.egg_root)
+            )
 
     def find_dist_info_in_zip(importer, path_item, only=False):
         """Find dist-info style metadata in zip files.
 
-        We ignore the `only` flag because it's not clear what it should
-        actually do in this case.
+        importer: PEP 302-style Importer object
+        path_item (str): filename or pseudo-filename like:
+            /usr/somedirs/main.par
+            or
+            /usr/somedirs/main.par/pypi__portpicker_1_2_0
+        only (bool): We ignore the `only` flag because it's not clear
+            what it should actually do in this case.
+
+        Yields pkg_resources.Distribution objects
         """
         metadata = DistInfoMetadata(importer)
         for subitem in metadata.resource_listdir('/'):
-            if subitem.lower().endswith('.dist-info'):
+            basename, ext = os.path.splitext(subitem)
+            if ext.lower() == '.dist-info':
+                # Parse distribution name
+                match = pkg_resources.EGG_NAME(basename)
+                project_name = 'unknown'
+                if match:
+                    project_name = match.group('name')
+                # Create metadata object
                 subpath = os.path.join(path_item, subitem)
-                submeta = pkg_resources.EggMetadata(
-                    zipimport.zipimporter(subpath))
+                submeta = DistInfoMetadata(
+                    zipimport.zipimporter(path_item))
+                # Override pkg_resources defaults to avoid
+                # "resource_filename() only supported for .egg, not
+                # .zip" message
+                submeta.egg_name = project_name
                 submeta.egg_info = subpath
+                submeta.egg_root = path_item
                 dist = pkg_resources.Distribution.from_location(
                     path_item, subitem, submeta)
                 yield dist
-        return
 
     def find_eggs_and_dist_info_in_zip(importer, path_item, only=False):
         """Chain together our finder and the standard pkg_resources finder
@@ -150,8 +204,9 @@
         importer = pkgutil.get_importer(entry)
         if isinstance(importer, zipimport.zipimporter):
             for dist in find_dist_info_in_zip(importer, entry, only=True):
-                pkg_resources.working_set.add(dist, entry, insert=False,
-                                              replace=False)
+                if isinstance(dist._provider, DistInfoMetadata):
+                    pkg_resources.working_set.add(dist, entry, insert=False,
+                                                  replace=True)
 
 
 def setup(import_roots=None):
diff --git a/runtime/support_test.py b/runtime/support_test.py
index 3ec1063..48f4ff7 100644
--- a/runtime/support_test.py
+++ b/runtime/support_test.py
@@ -42,15 +42,21 @@
         self.assertNotEqual(path, None)
 
     def test_setup(self):
+        old_sys_path = sys.path
+        mock_sys_path = list(sys.path)
+        sys.path = mock_sys_path
         # `import pip` can cause arbitrary sys.path changes,
         # especially if using the Debian `python-pip` package or
-        # similar.  Get that lunacy out of the way before starting
-        # test
+        # similar.  Do it first to get those changes out of the
+        # way.
         try:
             import pip  # noqa
         except ImportError:
             pass
+        finally:
+            sys.path = old_sys_path
 
+        # Run setup()
         old_sys_path = sys.path
         try:
             mock_sys_path = list(sys.path)
@@ -58,6 +64,8 @@
             support.setup(import_roots=['some_root', 'another_root'])
         finally:
             sys.path = old_sys_path
+
+        # Check results
         self.assertTrue(mock_sys_path[1].endswith('subpar/runtime/some_root'),
                         mock_sys_path)
         self.assertTrue(
diff --git a/tests/package_pkg_resources/main.py b/tests/package_pkg_resources/main.py
index dea801d..f7b9cdb 100644
--- a/tests/package_pkg_resources/main.py
+++ b/tests/package_pkg_resources/main.py
@@ -19,26 +19,95 @@
 """
 
 
+import atexit
+import os
+import shutil
+import tempfile
+
+
 def main():
     print('In pkg_resources test main()')
     try:
         import pkg_resources
+        import setuptools
+        # We can't query pkg_resources' version so query setuptools instead
+        version = pkg_resources.parse_version(setuptools.__version__)
+        minimum = pkg_resources.parse_version('36.6.0')
+        if (version < minimum):
+            print('Skipping test, pkg_resources module is too old')
+            return
     except ImportError:
         print('Skipping test, pkg_resources module is not available')
         return
 
     ws = pkg_resources.working_set
 
+    # Set a safe extraction dir (the default is unsafe)
+    extraction_tmpdir = tempfile.mkdtemp()
+    atexit.register(lambda: shutil.rmtree(
+        extraction_tmpdir, ignore_errors=True))
+    pkg_resources.set_extraction_path(extraction_tmpdir)
+
     # Informational for debugging
     distributions = list(ws)
-    print('Resources found: %s' % distributions)
+    assert distributions
 
-    # Check for the packages we provided metadata for.  There will
-    # also be metadata for whatever other packages happen to be
-    # installed in the current Python interpreter.
-    for spec in ['portpicker==1.2.0', 'yapf==0.19.0']:
-        dist = ws.find(pkg_resources.Requirement.parse(spec))
-        assert dist, (spec, distributions)
+    # Test package that doesn't exist.
+    # I hereby promise never to release a package with this name.
+    nonexistent_name = 'subpar-package-does-not-exist-blorg'
+    req = pkg_resources.Requirement.parse(nonexistent_name)
+    dist = ws.find(req)
+    assert not dist
+
+    # Package exists, has code at the top level directory
+    portpicker_spec = 'portpicker==1.2.0'
+    req = pkg_resources.Requirement.parse(portpicker_spec)
+    # Extract single file
+    fn = pkg_resources.resource_filename(req, 'data_file.txt')
+    with open(fn) as f:
+        actual = f.read()
+        assert actual == 'Dummy data file for portpicker\n', actual
+    # Extract all
+    dirname = pkg_resources.resource_filename(req, '')
+    expected = [
+        # The __init__.py file shouldn't be here, but is, as an
+        # unfortunately side effect of Bazel runfiles behavior.
+        # https://github.com/google/subpar/issues/51
+        '__init__.py',
+        'data_file.txt',
+        'portpicker-1.2.0.dist-info',
+        'portpicker.py',
+    ]
+    for fn in expected:
+        assert os.path.exists(os.path.join(dirname, fn)), fn
+    # Import module and check that we got the right one
+    module = __import__(req.name)
+    assert module.x == req.name, (module, vars(module))
+
+    # Package exists, has code in a subdir
+    yapf_spec = 'yapf==0.19.0'
+    req = pkg_resources.Requirement.parse(yapf_spec)
+    # Extract single file
+    fn = pkg_resources.resource_filename(req, 'data_file.txt')
+    with open(fn) as f:
+        actual = f.read()
+        assert actual == 'Dummy data file for yapf\n', actual
+    # Extract all
+    dirname = pkg_resources.resource_filename(req, '')
+    expected = [
+        # The __init__.py file shouldn't be here, but is, as an
+        # unfortunately side effect of Bazel runfiles behavior.
+        '__init__.py',
+        'data_file.txt',
+        'yapf',
+        'yapf-0.19.0.dist-info',
+    ]
+    for fn in expected:
+        assert os.path.exists(os.path.join(dirname, fn)), fn
+    # Import module and check that we got the right one
+    module = __import__(req.name)
+    assert module.x == req.name, (module, vars(module))
+    print("Pass")
 
 
 if __name__ == '__main__':
diff --git a/tests/package_pkg_resources/main_PY2_filelist.txt b/tests/package_pkg_resources/main_PY2_filelist.txt
index 7b93f00..07fffbd 100644
--- a/tests/package_pkg_resources/main_PY2_filelist.txt
+++ b/tests/package_pkg_resources/main_PY2_filelist.txt
@@ -1,8 +1,15 @@
+__init__.py
 __main__.py
+pypi__portpicker_1_2_0/__init__.py
+pypi__portpicker_1_2_0/data_file.txt
 pypi__portpicker_1_2_0/portpicker-1.2.0.dist-info/METADATA
 pypi__portpicker_1_2_0/portpicker-1.2.0.dist-info/metadata.json
+pypi__portpicker_1_2_0/portpicker.py
+pypi__yapf_0_19_0/__init__.py
+pypi__yapf_0_19_0/data_file.txt
 pypi__yapf_0_19_0/yapf-0.19.0.dist-info/METADATA
 pypi__yapf_0_19_0/yapf-0.19.0.dist-info/metadata.json
+pypi__yapf_0_19_0/yapf/__init__.py
 subpar/__init__.py
 subpar/runtime/__init__.py
 subpar/runtime/support.py
diff --git a/tests/package_pkg_resources/main_PY3_filelist.txt b/tests/package_pkg_resources/main_PY3_filelist.txt
index 7b93f00..07fffbd 100644
--- a/tests/package_pkg_resources/main_PY3_filelist.txt
+++ b/tests/package_pkg_resources/main_PY3_filelist.txt
@@ -1,8 +1,15 @@
+__init__.py
 __main__.py
+pypi__portpicker_1_2_0/__init__.py
+pypi__portpicker_1_2_0/data_file.txt
 pypi__portpicker_1_2_0/portpicker-1.2.0.dist-info/METADATA
 pypi__portpicker_1_2_0/portpicker-1.2.0.dist-info/metadata.json
+pypi__portpicker_1_2_0/portpicker.py
+pypi__yapf_0_19_0/__init__.py
+pypi__yapf_0_19_0/data_file.txt
 pypi__yapf_0_19_0/yapf-0.19.0.dist-info/METADATA
 pypi__yapf_0_19_0/yapf-0.19.0.dist-info/metadata.json
+pypi__yapf_0_19_0/yapf/__init__.py
 subpar/__init__.py
 subpar/runtime/__init__.py
 subpar/runtime/support.py
diff --git a/third_party/pypi__portpicker_1_2_0/BUILD b/third_party/pypi__portpicker_1_2_0/BUILD
index 6cff5d5..bee0da1 100644
--- a/third_party/pypi__portpicker_1_2_0/BUILD
+++ b/third_party/pypi__portpicker_1_2_0/BUILD
@@ -5,6 +5,9 @@
 py_library(
     name = "files",
     srcs = [],
-    data = glob(["portpicker-1.2.0.dist-info/**"]),
+    data = [
+        "portpicker.py",
+        "data_file.txt",
+    ] + glob(["portpicker-1.2.0.dist-info/**"]),
     imports = ["."],
 )
diff --git a/third_party/pypi__portpicker_1_2_0/data_file.txt b/third_party/pypi__portpicker_1_2_0/data_file.txt
new file mode 100644
index 0000000..66630bd
--- /dev/null
+++ b/third_party/pypi__portpicker_1_2_0/data_file.txt
@@ -0,0 +1 @@
+Dummy data file for portpicker
diff --git a/third_party/pypi__portpicker_1_2_0/portpicker.py b/third_party/pypi__portpicker_1_2_0/portpicker.py
new file mode 100644
index 0000000..11bc3c4
--- /dev/null
+++ b/third_party/pypi__portpicker_1_2_0/portpicker.py
@@ -0,0 +1,3 @@
+# Dummy source file for testing
+
+x = 'portpicker'
diff --git a/third_party/pypi__yapf_0_19_0/BUILD b/third_party/pypi__yapf_0_19_0/BUILD
index cd12f69..274ddd3 100644
--- a/third_party/pypi__yapf_0_19_0/BUILD
+++ b/third_party/pypi__yapf_0_19_0/BUILD
@@ -5,6 +5,9 @@
 py_library(
     name = "files",
     srcs = [],
-    data = glob(["yapf-0.19.0.dist-info/**"]),
+    data = [
+        "yapf/__init__.py",
+        "data_file.txt",
+    ] + glob(["yapf-0.19.0.dist-info/**"]),
     imports = ["."],
 )
diff --git a/third_party/pypi__yapf_0_19_0/data_file.txt b/third_party/pypi__yapf_0_19_0/data_file.txt
new file mode 100644
index 0000000..c2de30b
--- /dev/null
+++ b/third_party/pypi__yapf_0_19_0/data_file.txt
@@ -0,0 +1 @@
+Dummy data file for yapf
diff --git a/third_party/pypi__yapf_0_19_0/yapf/__init__.py b/third_party/pypi__yapf_0_19_0/yapf/__init__.py
new file mode 100644
index 0000000..75ad8c5
--- /dev/null
+++ b/third_party/pypi__yapf_0_19_0/yapf/__init__.py
@@ -0,0 +1,3 @@
+# Dummy source file for testing
+
+x = 'yapf'