Fix incompatibility with Python Toolchains (#99)

* Make remove_if_present atomic

* Fix typo

* Pass import roots directly from Bazel to compiler via args

* Replace default python toolchain wrappers with /usr/bin/env python[23]

* Replace Skylark with Starlark

* Update tests for CLI

* Update CLI args for naming consistency

* Fail when python interpreter is a relative path and improve comments
diff --git a/compiler/cli.py b/compiler/cli.py
index 7a34b4e..d1fcb59 100644
--- a/compiler/cli.py
+++ b/compiler/cli.py
@@ -53,7 +53,7 @@
         help='Root directory of all relative paths in manifest file.',
         default=os.getcwd())
     parser.add_argument(
-        '--outputpar',
+        '--output_par',
         help='Filename of generated par file.',
         required=True)
     parser.add_argument(
@@ -69,7 +69,7 @@
     # "Seconds since Unix epoch" was chosen to be compatible with
     # the SOURCE_DATE_EPOCH standard
     #
-    # Numeric calue is from running this:
+    # Numeric value is from running this:
     #   "date --date='Jan 1 1980 00:00:00 utc' --utc +%s"
     parser.add_argument(
         '--timestamp',
@@ -87,50 +87,67 @@
         'directory at the start of execution.',
         type=bool_from_string,
         required=True)
+    parser.add_argument(
+        '--import_root',
+        help='Path to add to sys.path, may be repeated to provide multiple roots.',
+        action='append',
+        default=[],
+        dest='import_roots')
     return parser
 
 
 def parse_stub(stub_filename):
-    """Parse the imports and interpreter path from a py_binary() stub.
+    """Parse interpreter path from a py_binary() stub.
 
     We assume the stub is utf-8 encoded.
 
-    TODO(b/29227737): Remove this once we can access imports from skylark.
+    TODO(bazelbuild/bazel#7805): Remove this once we can access the py_runtime from Starlark.
 
-    Returns (list of relative paths, path to Python interpreter)
+    Returns path to Python interpreter
     """
 
-    # Find the list of import roots
-    imports_regex = re.compile(r'''^  python_imports = '([^']*)'$''')
+    # Find the interpreter
     interpreter_regex = re.compile(r'''^PYTHON_BINARY = '([^']*)'$''')
-    import_roots = None
     interpreter = None
     with io.open(stub_filename, 'rt', encoding='utf8') as stub_file:
         for line in stub_file:
-            importers_match = imports_regex.match(line)
-            if importers_match:
-                import_roots = importers_match.group(1).split(':')
-                # Filter out empty paths
-                import_roots = [x for x in import_roots if x]
             interpreter_match = interpreter_regex.match(line)
             if interpreter_match:
                 interpreter = interpreter_match.group(1)
-    if import_roots is None or not interpreter:
+    if not interpreter:
         raise error.Error('Failed to parse stub file [%s]' % stub_filename)
 
-    # Find the Python interpreter, matching the search logic in
-    # stub_template.txt
+    # Determine the Python interpreter, checking for default toolchain.
+    #
+    # This somewhat mirrors the logic in python_stub_template.txt, but we don't support
+    # relative paths (i.e., in-workspace interpreters). This is because the interpreter
+    # will be used in the .par file's shebang, and putting a relative path in a shebang
+    # is extremely brittle and non-relocatable. (The reason the standard py_binary rule
+    # can use an in-workspace interpreter is that its stub script runs in a separate
+    # process and has a shebang referencing the system interpreter). As a special case,
+    # if the Python target is using the autodetecting Python toolchain, which is
+    # technically an in-workspace runtime, we rewrite it to "/usr/bin/env python[2|3]"
+    # rather than fail.
     if interpreter.startswith('//'):
         raise error.Error('Python interpreter must not be a label [%s]' %
                           stub_filename)
     elif interpreter.startswith('/'):
         pass
+    elif interpreter == 'bazel_tools/tools/python/py3wrapper.sh': # Default toolchain
+        # Replace default toolchain python3 wrapper with default python3 on path
+        interpreter = '/usr/bin/env python3'
+    elif interpreter == 'bazel_tools/tools/python/py2wrapper.sh': # Default toolchain
+        # Replace default toolchain python2 wrapper with default python2 on path
+        interpreter = '/usr/bin/env python2'
     elif '/' in interpreter:
-        pass
+        raise error.Error(
+            'par files require a Python runtime that is ' +
+            'installed on the system, not defined inside the workspace. Use ' +
+            'a `py_runtime` with an absolute path, not a label.')
     else:
         interpreter = '/usr/bin/env %s' % interpreter
 
-    return (import_roots, interpreter)
+    return interpreter
 
 
 def main(argv):
@@ -138,17 +155,17 @@
     parser = make_command_line_parser()
     args = parser.parse_args(argv[1:])
 
-    # Parse information from stub file that's too hard to compute in Skylark
-    import_roots, interpreter = parse_stub(args.stub_file)
+    # Parse interpreter from stub file that's not available in Starlark
+    interpreter = parse_stub(args.stub_file)
 
     if args.interpreter:
         interpreter = args.interpreter
 
     par = python_archive.PythonArchive(
         main_filename=args.main_filename,
-        import_roots=import_roots,
+        import_roots=args.import_roots,
         interpreter=interpreter,
-        output_filename=args.outputpar,
+        output_filename=args.output_par,
         manifest_filename=args.manifest_file,
         manifest_root=args.manifest_root,
         timestamp=args.timestamp,
diff --git a/compiler/cli_test.py b/compiler/cli_test.py
index a30c067..4bdaf34 100644
--- a/compiler/cli_test.py
+++ b/compiler/cli_test.py
@@ -35,19 +35,27 @@
         args = parser.parse_args([
             '--manifest_file=bar',
             '--manifest_root=bazz',
-            '--outputpar=baz',
+            '--output_par=baz',
             '--stub_file=quux',
             '--zip_safe=False',
+            '--import_root=root1',
+            '--import_root=root2',
             'foo',
         ])
         self.assertEqual(args.manifest_file, 'bar')
+        self.assertEqual(args.manifest_root, 'bazz')
+        self.assertEqual(args.output_par, 'baz')
+        self.assertEqual(args.stub_file, 'quux')
+        self.assertEqual(args.zip_safe, False)
+        self.assertEqual(args.import_roots, ['root1', 'root2'])
+        self.assertEqual(args.main_filename, 'foo')
 
     def test_make_command_line_parser_for_interprerter(self):
         parser = cli.make_command_line_parser()
         args = parser.parse_args([
             '--manifest_file=bar',
             '--manifest_root=bazz',
-            '--outputpar=baz',
+            '--output_par=baz',
             '--stub_file=quux',
             '--zip_safe=False',
             '--interpreter=foobar',
@@ -57,36 +65,26 @@
 
     def test_stub(self):
         valid_cases = [
-            # Empty list
+            # Absolute path to interpreter
             [b"""
-  python_imports = ''
 PYTHON_BINARY = '/usr/bin/python'
 """,
-             ([], '/usr/bin/python')],
-            # Single import
-            [b"""
-  python_imports = 'myworkspace/spam/eggs'
-PYTHON_BINARY = '/usr/bin/python'
-""",
-             (['myworkspace/spam/eggs'], '/usr/bin/python')],
-            # Multiple imports
-            [b"""
-  python_imports = 'myworkspace/spam/eggs:otherworkspace'
-PYTHON_BINARY = '/usr/bin/python'
-""",
-             (['myworkspace/spam/eggs', 'otherworkspace'], '/usr/bin/python')],
-            # Relative path to interpreter
-            [b"""
-  python_imports = ''
-PYTHON_BINARY = 'mydir/python'
-""",
-             ([], 'mydir/python')],
+             '/usr/bin/python'],
             # Search for interpreter on $PATH
             [b"""
-  python_imports = ''
 PYTHON_BINARY = 'python'
 """,
-             ([], '/usr/bin/env python')],
+             '/usr/bin/env python'],
+            # Default toolchain wrapped python3 interpreter
+            [b"""
+PYTHON_BINARY = 'bazel_tools/tools/python/py3wrapper.sh'
+""",
+             '/usr/bin/env python3'],
+            # Default toolchain wrapped python2 interpreter
+            [b"""
+PYTHON_BINARY = 'bazel_tools/tools/python/py2wrapper.sh'
+""",
+             '/usr/bin/env python2'],
         ]
         for content, expected in valid_cases:
             with test_utils.temp_file(content) as stub_file:
@@ -94,15 +92,13 @@
                 self.assertEqual(actual, expected)
 
         invalid_cases = [
+            # No interpreter
             b'',
             b'\n\n',
-            # No interpreter
-            b"  python_imports = 'myworkspace/spam/eggs'",
-            # No imports
-            b"PYTHON_BINARY = 'python'\n",
+            # Relative interpreter path
+            b"PYTHON_BINARY = 'mydir/python'",
             # Interpreter is label
             b"""
-  python_imports = ''
 PYTHON_BINARY = '//mypackage:python'
 """,
             ]
diff --git a/compiler/python_archive.py b/compiler/python_archive.py
index fc810c6..2c69c58 100755
--- a/compiler/python_archive.py
+++ b/compiler/python_archive.py
@@ -30,6 +30,7 @@
 
 from datetime import datetime
 import contextlib
+import errno
 import io
 import logging
 import os
@@ -290,9 +291,14 @@
 
 
 def remove_if_present(filename):
-    """Delete any existing file"""
-    if os.path.exists(filename):
+    """Delete a file if it exists"""
+    try:
+        # Remove atomically
         os.remove(filename)
+    except OSError as exc:
+        # Ignore failure if file does not exist
+        if exc.errno != errno.ENOENT:
+            raise
 
 
 def fetch_support_file(name, timestamp_tuple):
diff --git a/subpar.bzl b/subpar.bzl
index 1b70b5d..ae685bf 100644
--- a/subpar.bzl
+++ b/subpar.bzl
@@ -64,8 +64,7 @@
     )
 
     # Find the list of directories to add to sys.path
-    # TODO(b/29227737): Use 'imports' provider from Bazel
-    stub_file = ctx.attr.src.files_to_run.executable.path
+    import_roots = ctx.attr.src[PyInfo].imports.to_list()
 
     # Inputs to the action, but don't actually get stored in the .par file
     extra_inputs = [
@@ -79,14 +78,18 @@
     args = ctx.attr.compiler_args + [
         "--manifest_file",
         sources_file.path,
-        "--outputpar",
+        "--output_par",
         ctx.outputs.executable.path,
         "--stub_file",
-        stub_file,
+        ctx.attr.src.files_to_run.executable.path,
         "--zip_safe",
         str(zip_safe),
-        main_py_file.path,
     ]
+    for import_root in import_roots:
+        args.extend(['--import_root', import_root])
+    args.append(main_py_file.path)
+
+    # Run compiler
     ctx.actions.run(
         inputs = inputs + extra_inputs,
         tools = [ctx.attr.src.files_to_run.executable],