Lazily import parallelized format modules

`black.reformat_many` depends on a lot of slow-to-import modules. When
formatting simply a single file, the time paid to import those modules
is totally wasted. So I moved `black.reformat_many` and its helpers
to `black.concurrency` which is now *only* imported if there's more
than one file to reformat. This way, running Black over a single file
is snappier

Here are the numbers before and after this patch running `python -m
black --version`:

- interpreted: 411 ms +- 9 ms -> 342 ms +- 7 ms: 1.20x faster
- compiled: 365 ms +- 15 ms -> 304 ms +- 7 ms: 1.20x faster

Co-authored-by: Fabio Zadrozny <fabiofz@gmail.com>
diff --git a/CHANGES.md b/CHANGES.md
index 5b29f20..f2314c1 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -75,6 +75,8 @@
 
 <!-- Changes that improve Black's performance. -->
 
+- Reduce Black's startup time when formatting a single file by 15-30% (#3208)
+
 ## 22.6.0
 
 ### Style
diff --git a/docs/contributing/reference/reference_functions.rst b/docs/contributing/reference/reference_functions.rst
index 01ffe44..50eaeb3 100644
--- a/docs/contributing/reference/reference_functions.rst
+++ b/docs/contributing/reference/reference_functions.rst
@@ -52,7 +52,7 @@
 
 .. autofunction:: black.reformat_one
 
-.. autofunction:: black.schedule_formatting
+.. autofunction:: black.concurrency.schedule_formatting
 
 File operations
 ---------------
@@ -173,7 +173,7 @@
 
 .. autofunction:: black.linegen.should_split_line
 
-.. autofunction:: black.shutdown
+.. autofunction:: black.concurrency.shutdown
 
 .. autofunction:: black.strings.sub_twice
 
diff --git a/src/black/__init__.py b/src/black/__init__.py
index 2a5c750..6de5ff0 100644
--- a/src/black/__init__.py
+++ b/src/black/__init__.py
@@ -1,10 +1,8 @@
-import asyncio
 import io
 import json
 import os
 import platform
 import re
-import signal
 import sys
 import tokenize
 import traceback
@@ -13,10 +11,8 @@
 from datetime import datetime
 from enum import Enum
 from json.decoder import JSONDecodeError
-from multiprocessing import Manager, freeze_support
 from pathlib import Path
 from typing import (
-    TYPE_CHECKING,
     Any,
     Dict,
     Generator,
@@ -32,15 +28,19 @@
     Union,
 )
 
+if sys.version_info >= (3, 8):
+    from typing import Final
+else:
+    from typing_extensions import Final
+
 import click
 from click.core import ParameterSource
 from mypy_extensions import mypyc_attr
 from pathspec.patterns.gitwildmatch import GitWildMatchPatternError
 
 from _black_version import version as __version__
-from black.cache import Cache, filter_cached, get_cache_info, read_cache, write_cache
+from black.cache import Cache, get_cache_info, read_cache, write_cache
 from black.comments import normalize_fmt_off
-from black.concurrency import cancel, maybe_install_uvloop, shutdown
 from black.const import (
     DEFAULT_EXCLUDES,
     DEFAULT_INCLUDES,
@@ -90,10 +90,10 @@
 from blib2to3.pgen2 import token
 from blib2to3.pytree import Leaf, Node
 
-if TYPE_CHECKING:
-    from concurrent.futures import Executor
-
 COMPILED = Path(__file__).suffix in (".pyd", ".so")
+DEFAULT_WORKERS: Final = os.cpu_count()
+# Used to know whether uvloop should be installed.
+__BLACK_MAIN_CALLED__ = False
 
 # types
 FileContent = str
@@ -124,8 +124,6 @@
 # Legacy name, left for integrations.
 FileMode = Mode
 
-DEFAULT_WORKERS = os.cpu_count()
-
 
 def read_pyproject_toml(
     ctx: click.Context, param: click.Parameter, value: Optional[str]
@@ -587,6 +585,8 @@
                 report=report,
             )
         else:
+            from black.concurrency import reformat_many
+
             reformat_many(
                 sources=sources,
                 fast=fast,
@@ -771,132 +771,6 @@
         report.failed(src, str(exc))
 
 
-# diff-shades depends on being to monkeypatch this function to operate. I know it's
-# not ideal, but this shouldn't cause any issues ... hopefully. ~ichard26
-@mypyc_attr(patchable=True)
-def reformat_many(
-    sources: Set[Path],
-    fast: bool,
-    write_back: WriteBack,
-    mode: Mode,
-    report: "Report",
-    workers: Optional[int],
-) -> None:
-    """Reformat multiple files using a ProcessPoolExecutor."""
-    from concurrent.futures import Executor, ProcessPoolExecutor, ThreadPoolExecutor
-
-    executor: Executor
-    worker_count = workers if workers is not None else DEFAULT_WORKERS
-    if sys.platform == "win32":
-        # Work around https://bugs.python.org/issue26903
-        assert worker_count is not None
-        worker_count = min(worker_count, 60)
-    try:
-        executor = ProcessPoolExecutor(max_workers=worker_count)
-    except (ImportError, NotImplementedError, OSError):
-        # we arrive here if the underlying system does not support multi-processing
-        # like in AWS Lambda or Termux, in which case we gracefully fallback to
-        # a ThreadPoolExecutor with just a single worker (more workers would not do us
-        # any good due to the Global Interpreter Lock)
-        executor = ThreadPoolExecutor(max_workers=1)
-
-    loop = asyncio.new_event_loop()
-    asyncio.set_event_loop(loop)
-    try:
-        loop.run_until_complete(
-            schedule_formatting(
-                sources=sources,
-                fast=fast,
-                write_back=write_back,
-                mode=mode,
-                report=report,
-                loop=loop,
-                executor=executor,
-            )
-        )
-    finally:
-        try:
-            shutdown(loop)
-        finally:
-            asyncio.set_event_loop(None)
-        if executor is not None:
-            executor.shutdown()
-
-
-async def schedule_formatting(
-    sources: Set[Path],
-    fast: bool,
-    write_back: WriteBack,
-    mode: Mode,
-    report: "Report",
-    loop: asyncio.AbstractEventLoop,
-    executor: "Executor",
-) -> None:
-    """Run formatting of `sources` in parallel using the provided `executor`.
-
-    (Use ProcessPoolExecutors for actual parallelism.)
-
-    `write_back`, `fast`, and `mode` options are passed to
-    :func:`format_file_in_place`.
-    """
-    cache: Cache = {}
-    if write_back not in (WriteBack.DIFF, WriteBack.COLOR_DIFF):
-        cache = read_cache(mode)
-        sources, cached = filter_cached(cache, sources)
-        for src in sorted(cached):
-            report.done(src, Changed.CACHED)
-    if not sources:
-        return
-
-    cancelled = []
-    sources_to_cache = []
-    lock = None
-    if write_back in (WriteBack.DIFF, WriteBack.COLOR_DIFF):
-        # For diff output, we need locks to ensure we don't interleave output
-        # from different processes.
-        manager = Manager()
-        lock = manager.Lock()
-    tasks = {
-        asyncio.ensure_future(
-            loop.run_in_executor(
-                executor, format_file_in_place, src, fast, mode, write_back, lock
-            )
-        ): src
-        for src in sorted(sources)
-    }
-    pending = tasks.keys()
-    try:
-        loop.add_signal_handler(signal.SIGINT, cancel, pending)
-        loop.add_signal_handler(signal.SIGTERM, cancel, pending)
-    except NotImplementedError:
-        # There are no good alternatives for these on Windows.
-        pass
-    while pending:
-        done, _ = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
-        for task in done:
-            src = tasks.pop(task)
-            if task.cancelled():
-                cancelled.append(task)
-            elif task.exception():
-                report.failed(src, str(task.exception()))
-            else:
-                changed = Changed.YES if task.result() else Changed.NO
-                # If the file was written back or was successfully checked as
-                # well-formatted, store this information in the cache.
-                if write_back is WriteBack.YES or (
-                    write_back is WriteBack.CHECK and changed is Changed.NO
-                ):
-                    sources_to_cache.append(src)
-                report.done(src, changed)
-    if cancelled:
-        if sys.version_info >= (3, 7):
-            await asyncio.gather(*cancelled, return_exceptions=True)
-        else:
-            await asyncio.gather(*cancelled, loop=loop, return_exceptions=True)
-    if sources_to_cache:
-        write_cache(cache, sources_to_cache, mode)
-
-
 def format_file_in_place(
     src: Path,
     fast: bool,
@@ -1495,8 +1369,14 @@
 
 
 def patched_main() -> None:
-    maybe_install_uvloop()
-    freeze_support()
+    global __BLACK_MAIN_CALLED__
+    __BLACK_MAIN_CALLED__ = True
+
+    if sys.platform == "win32" and getattr(sys, "frozen", False):
+        from multiprocessing import freeze_support
+
+        freeze_support()
+
     patch_click()
     main()
 
diff --git a/src/black/concurrency.py b/src/black/concurrency.py
index 24f67b6..fcf83ca 100644
--- a/src/black/concurrency.py
+++ b/src/black/concurrency.py
@@ -1,9 +1,26 @@
+"""
+Formatting many files at once via multiprocessing. Contains entrypoint and utilities.
+
+NOTE: this module is only imported if we need to format several files at once.
+"""
+
 import asyncio
 import logging
+import signal
 import sys
-from typing import Any, Iterable
+from concurrent.futures import Executor, ProcessPoolExecutor, ThreadPoolExecutor
+from multiprocessing import Manager
+from pathlib import Path
+from typing import Any, Iterable, Optional, Set
 
+from mypy_extensions import mypyc_attr
+
+import black
+from black import DEFAULT_WORKERS, WriteBack, format_file_in_place
+from black.cache import Cache, filter_cached, read_cache, write_cache
+from black.mode import Mode
 from black.output import err
+from black.report import Changed, Report
 
 
 def maybe_install_uvloop() -> None:
@@ -11,7 +28,6 @@
 
     This is called only from command-line entry points to avoid
     interfering with the parent process if Black is used as a library.
-
     """
     try:
         import uvloop
@@ -55,3 +71,130 @@
         cf_logger = logging.getLogger("concurrent.futures")
         cf_logger.setLevel(logging.CRITICAL)
         loop.close()
+
+
+# diff-shades depends on being to monkeypatch this function to operate. I know it's
+# not ideal, but this shouldn't cause any issues ... hopefully. ~ichard26
+@mypyc_attr(patchable=True)
+def reformat_many(
+    sources: Set[Path],
+    fast: bool,
+    write_back: WriteBack,
+    mode: Mode,
+    report: Report,
+    workers: Optional[int],
+) -> None:
+    """Reformat multiple files using a ProcessPoolExecutor."""
+    if black.__BLACK_MAIN_CALLED__:
+        maybe_install_uvloop()
+
+    executor: Executor
+    worker_count = workers if workers is not None else DEFAULT_WORKERS
+    if sys.platform == "win32":
+        # Work around https://bugs.python.org/issue26903
+        assert worker_count is not None
+        worker_count = min(worker_count, 60)
+    try:
+        executor = ProcessPoolExecutor(max_workers=worker_count)
+    except (ImportError, NotImplementedError, OSError):
+        # we arrive here if the underlying system does not support multi-processing
+        # like in AWS Lambda or Termux, in which case we gracefully fallback to
+        # a ThreadPoolExecutor with just a single worker (more workers would not do us
+        # any good due to the Global Interpreter Lock)
+        executor = ThreadPoolExecutor(max_workers=1)
+
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+    try:
+        loop.run_until_complete(
+            schedule_formatting(
+                sources=sources,
+                fast=fast,
+                write_back=write_back,
+                mode=mode,
+                report=report,
+                loop=loop,
+                executor=executor,
+            )
+        )
+    finally:
+        try:
+            shutdown(loop)
+        finally:
+            asyncio.set_event_loop(None)
+        if executor is not None:
+            executor.shutdown()
+
+
+async def schedule_formatting(
+    sources: Set[Path],
+    fast: bool,
+    write_back: WriteBack,
+    mode: Mode,
+    report: "Report",
+    loop: asyncio.AbstractEventLoop,
+    executor: "Executor",
+) -> None:
+    """Run formatting of `sources` in parallel using the provided `executor`.
+
+    (Use ProcessPoolExecutors for actual parallelism.)
+
+    `write_back`, `fast`, and `mode` options are passed to
+    :func:`format_file_in_place`.
+    """
+    cache: Cache = {}
+    if write_back not in (WriteBack.DIFF, WriteBack.COLOR_DIFF):
+        cache = read_cache(mode)
+        sources, cached = filter_cached(cache, sources)
+        for src in sorted(cached):
+            report.done(src, Changed.CACHED)
+    if not sources:
+        return
+
+    cancelled = []
+    sources_to_cache = []
+    lock = None
+    if write_back in (WriteBack.DIFF, WriteBack.COLOR_DIFF):
+        # For diff output, we need locks to ensure we don't interleave output
+        # from different processes.
+        manager = Manager()
+        lock = manager.Lock()
+    tasks = {
+        asyncio.ensure_future(
+            loop.run_in_executor(
+                executor, format_file_in_place, src, fast, mode, write_back, lock
+            )
+        ): src
+        for src in sorted(sources)
+    }
+    pending = tasks.keys()
+    try:
+        loop.add_signal_handler(signal.SIGINT, cancel, pending)
+        loop.add_signal_handler(signal.SIGTERM, cancel, pending)
+    except NotImplementedError:
+        # There are no good alternatives for these on Windows.
+        pass
+    while pending:
+        done, _ = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
+        for task in done:
+            src = tasks.pop(task)
+            if task.cancelled():
+                cancelled.append(task)
+            elif task.exception():
+                report.failed(src, str(task.exception()))
+            else:
+                changed = Changed.YES if task.result() else Changed.NO
+                # If the file was written back or was successfully checked as
+                # well-formatted, store this information in the cache.
+                if write_back is WriteBack.YES or (
+                    write_back is WriteBack.CHECK and changed is Changed.NO
+                ):
+                    sources_to_cache.append(src)
+                report.done(src, changed)
+    if cancelled:
+        if sys.version_info >= (3, 7):
+            await asyncio.gather(*cancelled, return_exceptions=True)
+        else:
+            await asyncio.gather(*cancelled, loop=loop, return_exceptions=True)
+    if sources_to_cache:
+        write_cache(cache, sources_to_cache, mode)
diff --git a/tests/test_black.py b/tests/test_black.py
index bb7784d..7622019 100644
--- a/tests/test_black.py
+++ b/tests/test_black.py
@@ -1737,7 +1737,9 @@
                 src = (workspace / f"test{tag}.py").resolve()
                 with src.open("w") as fobj:
                     fobj.write("print('hello')")
-            with patch("black.Manager", wraps=multiprocessing.Manager) as mgr:
+            with patch(
+                "black.concurrency.Manager", wraps=multiprocessing.Manager
+            ) as mgr:
                 cmd = ["--diff", str(workspace)]
                 if color:
                     cmd.append("--color")
@@ -1784,7 +1786,7 @@
                 str(cached): black.get_cache_info(cached),
                 str(cached_but_changed): (0.0, 0),
             }
-            todo, done = black.filter_cached(
+            todo, done = black.cache.filter_cached(
                 cache, {uncached, cached, cached_but_changed}
             )
             assert todo == {uncached, cached_but_changed}