misc/incremental_checker.py - third_party/github.com/python/mypy - Git at Google

 #!/usr/bin/env python3
 """
 This file compares the output and runtime of running normal vs incremental mode
 on the history of any arbitrary git repo as a way of performing a sanity check
 to make sure incremental mode is working correctly and efficiently.

 It does so by first running mypy without incremental mode on the specified range
 of commits to find the expected result, then rewinds back to the first commit and
 re-runs mypy on the commits with incremental mode enabled to make sure it returns
 the same results.

 This script will download and test the official mypy repo by default. Running:

     python3 misc/incremental_checker.py last 30

 is equivalent to running

     python3 misc/incremental_checker.py last 30 \\
             --repo_url https://github.com/python/mypy.git \\
             --file-path mypy

 You can chose to run this script against a specific commit id or against the
 last n commits.

 To run this script against the last 30 commits:

     python3 misc/incremental_checker.py last 30

 To run this script starting from the commit id 2a432b:

     python3 misc/incremental_checker.py commit 2a432b
 """

 from typing import Any, Dict, List, Optional, Tuple

 from argparse import ArgumentParser, RawDescriptionHelpFormatter, Namespace
 import base64
 import json
 import os
 import random
 import re
 import shutil
 import subprocess
 import sys
 import textwrap
 import time


 CACHE_PATH = ".incremental_checker_cache.json"
 MYPY_REPO_URL = "https://github.com/python/mypy.git"
 MYPY_TARGET_FILE = "mypy"
 DAEMON_CMD = ["python3", "-m", "mypy.dmypy"]

 JsonDict = Dict[str, Any]


 def print_offset(text: str, indent_length: int = 4) -> None:
     print()
     print(textwrap.indent(text, ' ' * indent_length))
     print()


 def delete_folder(folder_path: str) -> None:
     if os.path.exists(folder_path):
         shutil.rmtree(folder_path)


 def execute(command: List[str], fail_on_error: bool = True) -> Tuple[str, str, int]:
     proc = subprocess.Popen(
         ' '.join(command),
         stderr=subprocess.PIPE,
         stdout=subprocess.PIPE,
         shell=True)
     stdout_bytes, stderr_bytes = proc.communicate()  # type: Tuple[bytes, bytes]
     stdout, stderr = stdout_bytes.decode('utf-8'), stderr_bytes.decode('utf-8')
     if fail_on_error and proc.returncode != 0:
         print('EXECUTED COMMAND:', repr(command))
         print('RETURN CODE:', proc.returncode)
         print()
         print('STDOUT:')
         print_offset(stdout)
         print('STDERR:')
         print_offset(stderr)
         raise RuntimeError('Unexpected error from external tool.')
     return stdout, stderr, proc.returncode


 def ensure_environment_is_ready(mypy_path: str, temp_repo_path: str, mypy_cache_path: str) -> None:
     os.chdir(mypy_path)
     delete_folder(temp_repo_path)
     delete_folder(mypy_cache_path)


 def initialize_repo(repo_url: str, temp_repo_path: str, branch: str) -> None:
     print("Cloning repo {0} to {1}".format(repo_url, temp_repo_path))
     execute(["git", "clone", repo_url, temp_repo_path])
     if branch is not None:
         print("Checking out branch {}".format(branch))
         execute(["git", "-C", temp_repo_path, "checkout", branch])


 def get_commits(repo_folder_path: str, commit_range: str) -> List[Tuple[str, str]]:
     raw_data, _stderr, _errcode = execute([
         "git", "-C", repo_folder_path, "log", "--reverse", "--oneline", commit_range])
     output = []
     for line in raw_data.strip().split('\n'):
         commit_id, _, message = line.partition(' ')
         output.append((commit_id, message))
     return output


 def get_commits_starting_at(repo_folder_path: str, start_commit: str) -> List[Tuple[str, str]]:
     print("Fetching commits starting at {0}".format(start_commit))
     return get_commits(repo_folder_path, '{0}^..HEAD'.format(start_commit))


 def get_nth_commit(repo_folder_path: str, n: int) -> Tuple[str, str]:
     print("Fetching last {} commits (or all, if there are fewer commits than n)".format(n))
     return get_commits(repo_folder_path, '-{}'.format(n))[0]


 def run_mypy(target_file_path: Optional[str],
              mypy_cache_path: str,
              mypy_script: Optional[str],
              *,
              incremental: bool = False,
              daemon: bool = False,
              verbose: bool = False) -> Tuple[float, str, Dict[str, Any]]:
     """Runs mypy against `target_file_path` and returns what mypy prints to stdout as a string.

     If `incremental` is set to True, this function will use store and retrieve all caching data
     inside `mypy_cache_path`. If `verbose` is set to True, this function will pass the "-v -v"
     flags to mypy to make it output debugging information.

     If `daemon` is True, we use daemon mode; the daemon must be started and stopped by the caller.
     """
     stats = {}  # type: Dict[str, Any]
     if daemon:
         command = DAEMON_CMD + ["check", "-v"]
     else:
         if mypy_script is None:
             command = ["python3", "-m", "mypy"]
         else:
             command = [mypy_script]
         command.extend(["--cache-dir", mypy_cache_path])
         if incremental:
             command.append("--incremental")
         if verbose:
             command.extend(["-v", "-v"])
     if target_file_path is not None:
         command.append(target_file_path)
     start = time.time()
     output, stderr, _ = execute(command, False)
     if stderr != "":
         output = stderr
     else:
         if daemon:
             output, stats = filter_daemon_stats(output)
     runtime = time.time() - start
     return runtime, output, stats


 def filter_daemon_stats(output: str) -> Tuple[str, Dict[str, Any]]:
     stats = {}  # type: Dict[str, Any]
     lines = output.splitlines()
     output_lines = []
     for line in lines:
         m = re.match(r'(\w+)\s+:\s+(.*)', line)
         if m:
             key, value = m.groups()
             stats[key] = value
         else:
             output_lines.append(line)
     if output_lines:
         output_lines.append('\n')
     return '\n'.join(output_lines), stats


 def start_daemon(mypy_cache_path: str) -> None:
     cmd = DAEMON_CMD + ["restart", "--log-file", "./@incr-chk-logs", "--", "--cache-dir", mypy_cache_path]
     execute(cmd)


 def stop_daemon() -> None:
     execute(DAEMON_CMD + ["stop"])


 def load_cache(incremental_cache_path: str = CACHE_PATH) -> JsonDict:
     if os.path.exists(incremental_cache_path):
         with open(incremental_cache_path, 'r') as stream:
             return json.load(stream)
     else:
         return {}


 def save_cache(cache: JsonDict, incremental_cache_path: str = CACHE_PATH) -> None:
     with open(incremental_cache_path, 'w') as stream:
         json.dump(cache, stream, indent=2)


 def set_expected(commits: List[Tuple[str, str]],
                  cache: JsonDict,
                  temp_repo_path: str,
                  target_file_path: Optional[str],
                  mypy_cache_path: str,
                  mypy_script: Optional[str]) -> None:
     """Populates the given `cache` with the expected results for all of the given `commits`.

     This function runs mypy on the `target_file_path` inside the `temp_repo_path`, and stores
     the result in the `cache`.

     If `cache` already contains results for a particular commit, this function will
     skip evaluating that commit and move on to the next."""
     for commit_id, message in commits:
         if commit_id in cache:
             print('Skipping commit (already cached): {0}: "{1}"'.format(commit_id, message))
         else:
             print('Caching expected output for commit {0}: "{1}"'.format(commit_id, message))
             execute(["git", "-C", temp_repo_path, "checkout", commit_id])
             runtime, output, stats = run_mypy(target_file_path, mypy_cache_path, mypy_script,
                                               incremental=False)
             cache[commit_id] = {'runtime': runtime, 'output': output}
             if output == "":
                 print("    Clean output ({:.3f} sec)".format(runtime))
             else:
                 print("    Output ({:.3f} sec)".format(runtime))
                 print_offset(output, 8)
     print()


 def test_incremental(commits: List[Tuple[str, str]],
                      cache: JsonDict,
                      temp_repo_path: str,
                      target_file_path: Optional[str],
                      mypy_cache_path: str,
                      *,
                      mypy_script: Optional[str] = None,
                      daemon: bool = False,
                      exit_on_error: bool = False) -> None:
     """Runs incremental mode on all `commits` to verify the output matches the expected output.

     This function runs mypy on the `target_file_path` inside the `temp_repo_path`. The
     expected output must be stored inside of the given `cache`.
     """
     print("Note: first commit is evaluated twice to warm up cache")
     commits = [commits[0]] + commits
     overall_stats = {}  # type: Dict[str, float]
     for commit_id, message in commits:
         print('Now testing commit {0}: "{1}"'.format(commit_id, message))
         execute(["git", "-C", temp_repo_path, "checkout", commit_id])
         runtime, output, stats = run_mypy(target_file_path, mypy_cache_path, mypy_script,
                                           incremental=True, daemon=daemon)
         relevant_stats = combine_stats(overall_stats, stats)
         expected_runtime = cache[commit_id]['runtime']  # type: float
         expected_output = cache[commit_id]['output']  # type: str
         if output != expected_output:
             print("    Output does not match expected result!")
             print("    Expected output ({:.3f} sec):".format(expected_runtime))
             print_offset(expected_output, 8)
             print("    Actual output: ({:.3f} sec):".format(runtime))
             print_offset(output, 8)
             if exit_on_error:
                 break
         else:
             print("    Output matches expected result!")
             print("    Incremental: {:.3f} sec".format(runtime))
             print("    Original:    {:.3f} sec".format(expected_runtime))
             if relevant_stats:
                 print("    Stats:       {}".format(relevant_stats))
     if overall_stats:
         print("Overall stats:", overall_stats)


 def combine_stats(overall_stats: Dict[str, float],
                   new_stats: Dict[str, Any]) -> Dict[str, float]:
     INTERESTING_KEYS = ['build_time', 'gc_time']
     # For now, we only support float keys
     relevant_stats = {}  # type: Dict[str, float]
     for key in INTERESTING_KEYS:
         if key in new_stats:
             value = float(new_stats[key])
             relevant_stats[key] = value
             overall_stats[key] = overall_stats.get(key, 0.0) + value
     return relevant_stats


 def cleanup(temp_repo_path: str, mypy_cache_path: str) -> None:
     delete_folder(temp_repo_path)
     delete_folder(mypy_cache_path)


 def test_repo(target_repo_url: str, temp_repo_path: str,
               target_file_path: Optional[str],
               mypy_path: str, incremental_cache_path: str, mypy_cache_path: str,
               range_type: str, range_start: str, branch: str,
               params: Namespace) -> None:
     """Tests incremental mode against the repo specified in `target_repo_url`.

     This algorithm runs in five main stages:

     1.  Clones `target_repo_url` into the `temp_repo_path` folder locally,
         checking out the specified `branch` if applicable.
     2.  Examines the repo's history to get the list of all commits to
         to test incremental mode on.
     3.  Runs mypy WITHOUT incremental mode against the `target_file_path` (which is
         assumed to be located inside the `temp_repo_path`), testing each commit
         discovered in stage two.
         -   If the results of running mypy WITHOUT incremental mode on a
             particular commit are already cached inside the `incremental_cache_path`,
             skip that commit to save time.
         -   Cache the results after finishing.
     4.  Rewind back to the first commit, and run mypy WITH incremental mode
         against the `target_file_path` commit-by-commit, and compare to the expected
         results found in stage 3.
     5.  Delete all unnecessary temp files.
     """
     # Stage 1: Clone repo and get ready to being testing
     ensure_environment_is_ready(mypy_path, temp_repo_path, mypy_cache_path)
     initialize_repo(target_repo_url, temp_repo_path, branch)

     # Stage 2: Get all commits we want to test
     if range_type == "last":
         start_commit = get_nth_commit(temp_repo_path, int(range_start))[0]
     elif range_type == "commit":
         start_commit = range_start
     else:
         raise RuntimeError("Invalid option: {}".format(range_type))
     commits = get_commits_starting_at(temp_repo_path, start_commit)
     if params.limit:
         commits = commits[:params.limit]
     if params.sample:
         seed = params.seed or base64.urlsafe_b64encode(os.urandom(15)).decode('ascii')
         random.seed(seed)
         commits = random.sample(commits, params.sample)
         print("Sampled down to %d commits using random seed %s" % (len(commits), seed))

     # Stage 3: Find and cache expected results for each commit (without incremental mode)
     cache = load_cache(incremental_cache_path)
     set_expected(commits, cache, temp_repo_path, target_file_path, mypy_cache_path,
                  mypy_script=params.mypy_script)
     save_cache(cache, incremental_cache_path)

     # Stage 4: Rewind and re-run mypy (with incremental mode enabled)
     if params.daemon:
         print('Starting daemon')
         start_daemon(mypy_cache_path)
     test_incremental(commits, cache, temp_repo_path, target_file_path, mypy_cache_path,
                      mypy_script=params.mypy_script, daemon=params.daemon,
                      exit_on_error=params.exit_on_error)

     # Stage 5: Remove temp files, stop daemon
     if not params.keep_temporary_files:
         cleanup(temp_repo_path, mypy_cache_path)
     if params.daemon:
         print('Stopping daemon')
         stop_daemon()


 def main() -> None:
     help_factory = (lambda prog: RawDescriptionHelpFormatter(prog=prog, max_help_position=32))  # type: Any
     parser = ArgumentParser(
         prog='incremental_checker',
         description=__doc__,
         formatter_class=help_factory)

     parser.add_argument("range_type", metavar="START_TYPE", choices=["last", "commit"],
                         help="must be one of 'last' or 'commit'")
     parser.add_argument("range_start", metavar="COMMIT_ID_OR_NUMBER",
                         help="the commit id to start from, or the number of "
                         "commits to move back (see above)")
     parser.add_argument("-r", "--repo_url", default=MYPY_REPO_URL, metavar="URL",
                         help="the repo to clone and run tests on")
     parser.add_argument("-f", "--file-path", default=MYPY_TARGET_FILE, metavar="FILE",
                         help="the name of the file or directory to typecheck")
     parser.add_argument("-x", "--exit-on-error", action='store_true',
                         help="Exits as soon as an error occurs")
     parser.add_argument("--keep-temporary-files", action='store_true',
                         help="Keep temporary files on exit")
     parser.add_argument("--cache-path", default=CACHE_PATH, metavar="DIR",
                         help="sets a custom location to store cache data")
     parser.add_argument("--branch", default=None, metavar="NAME",
                         help="check out and test a custom branch"
                         "uses the default if not specified")
     parser.add_argument("--sample", type=int, help="use a random sample of size SAMPLE")
     parser.add_argument("--seed", type=str, help="random seed")
     parser.add_argument("--limit", type=int,
                         help="maximum number of commits to use (default until end)")
     parser.add_argument("--mypy-script", type=str, help="alternate mypy script to run")
     parser.add_argument("--daemon", action='store_true',
                         help="use mypy daemon instead of incremental (highly experimental)")

     if len(sys.argv[1:]) == 0:
         parser.print_help()
         parser.exit()

     params = parser.parse_args(sys.argv[1:])

     # Make all paths absolute so we avoid having to worry about being in the right folder

     # The path to this specific script (incremental_checker.py).
     script_path = os.path.abspath(sys.argv[0])

     # The path to the mypy repo.
     mypy_path = os.path.abspath(os.path.dirname(os.path.dirname(script_path)))

     # The folder the cloned repo will reside in.
     temp_repo_path = os.path.abspath(os.path.join(mypy_path, "tmp_repo"))

     # The particular file or package to typecheck inside the repo.
     if params.file_path:
         target_file_path = os.path.abspath(os.path.join(temp_repo_path, params.file_path))
     else:
         # Allow `-f ''` to clear target_file_path.
         target_file_path = None

     # The path to where the incremental checker cache data is stored.
     incremental_cache_path = os.path.abspath(params.cache_path)

     # The path to store the mypy incremental mode cache data
     mypy_cache_path = os.path.abspath(os.path.join(mypy_path, "misc", ".mypy_cache"))

     print("Assuming mypy is located at {0}".format(mypy_path))
     print("Temp repo will be cloned at {0}".format(temp_repo_path))
     print("Testing file/dir located at {0}".format(target_file_path))
     print("Using cache data located at {0}".format(incremental_cache_path))
     print()

     test_repo(params.repo_url, temp_repo_path, target_file_path,
               mypy_path, incremental_cache_path, mypy_cache_path,
               params.range_type, params.range_start, params.branch,
               params)


 if __name__ == '__main__':
     main()
	#!/usr/bin/env python3
	"""
	This file compares the output and runtime of running normal vs incremental mode
	on the history of any arbitrary git repo as a way of performing a sanity check
	to make sure incremental mode is working correctly and efficiently.

	It does so by first running mypy without incremental mode on the specified range
	of commits to find the expected result, then rewinds back to the first commit and
	re-runs mypy on the commits with incremental mode enabled to make sure it returns
	the same results.

	This script will download and test the official mypy repo by default. Running:

	python3 misc/incremental_checker.py last 30

	is equivalent to running

	python3 misc/incremental_checker.py last 30 \\
	--repo_url https://github.com/python/mypy.git \\
	--file-path mypy

	You can chose to run this script against a specific commit id or against the
	last n commits.

	To run this script against the last 30 commits:

	python3 misc/incremental_checker.py last 30

	To run this script starting from the commit id 2a432b:

	python3 misc/incremental_checker.py commit 2a432b
	"""

	from typing import Any, Dict, List, Optional, Tuple

	from argparse import ArgumentParser, RawDescriptionHelpFormatter, Namespace
	import base64
	import json
	import os
	import random
	import re
	import shutil
	import subprocess
	import sys
	import textwrap
	import time


	CACHE_PATH = ".incremental_checker_cache.json"
	MYPY_REPO_URL = "https://github.com/python/mypy.git"
	MYPY_TARGET_FILE = "mypy"
	DAEMON_CMD = ["python3", "-m", "mypy.dmypy"]

	JsonDict = Dict[str, Any]


	def print_offset(text: str, indent_length: int = 4) -> None:
	print()
	print(textwrap.indent(text, ' ' * indent_length))
	print()


	def delete_folder(folder_path: str) -> None:
	if os.path.exists(folder_path):
	shutil.rmtree(folder_path)


	def execute(command: List[str], fail_on_error: bool = True) -> Tuple[str, str, int]:
	proc = subprocess.Popen(
	' '.join(command),
	stderr=subprocess.PIPE,
	stdout=subprocess.PIPE,
	shell=True)
	stdout_bytes, stderr_bytes = proc.communicate() # type: Tuple[bytes, bytes]
	stdout, stderr = stdout_bytes.decode('utf-8'), stderr_bytes.decode('utf-8')
	if fail_on_error and proc.returncode != 0:
	print('EXECUTED COMMAND:', repr(command))
	print('RETURN CODE:', proc.returncode)
	print()
	print('STDOUT:')
	print_offset(stdout)
	print('STDERR:')
	print_offset(stderr)
	raise RuntimeError('Unexpected error from external tool.')
	return stdout, stderr, proc.returncode


	def ensure_environment_is_ready(mypy_path: str, temp_repo_path: str, mypy_cache_path: str) -> None:
	os.chdir(mypy_path)
	delete_folder(temp_repo_path)
	delete_folder(mypy_cache_path)


	def initialize_repo(repo_url: str, temp_repo_path: str, branch: str) -> None:
	print("Cloning repo {0} to {1}".format(repo_url, temp_repo_path))
	execute(["git", "clone", repo_url, temp_repo_path])
	if branch is not None:
	print("Checking out branch {}".format(branch))
	execute(["git", "-C", temp_repo_path, "checkout", branch])


	def get_commits(repo_folder_path: str, commit_range: str) -> List[Tuple[str, str]]:
	raw_data, _stderr, _errcode = execute([
	"git", "-C", repo_folder_path, "log", "--reverse", "--oneline", commit_range])
	output = []
	for line in raw_data.strip().split('\n'):
	commit_id, _, message = line.partition(' ')
	output.append((commit_id, message))
	return output


	def get_commits_starting_at(repo_folder_path: str, start_commit: str) -> List[Tuple[str, str]]:
	print("Fetching commits starting at {0}".format(start_commit))
	return get_commits(repo_folder_path, '{0}^..HEAD'.format(start_commit))


	def get_nth_commit(repo_folder_path: str, n: int) -> Tuple[str, str]:
	print("Fetching last {} commits (or all, if there are fewer commits than n)".format(n))
	return get_commits(repo_folder_path, '-{}'.format(n))[0]


	def run_mypy(target_file_path: Optional[str],
	mypy_cache_path: str,
	mypy_script: Optional[str],
	*,
	incremental: bool = False,
	daemon: bool = False,
	verbose: bool = False) -> Tuple[float, str, Dict[str, Any]]:
	"""Runs mypy against `target_file_path` and returns what mypy prints to stdout as a string.

	If `incremental` is set to True, this function will use store and retrieve all caching data
	inside `mypy_cache_path`. If `verbose` is set to True, this function will pass the "-v -v"
	flags to mypy to make it output debugging information.

	If `daemon` is True, we use daemon mode; the daemon must be started and stopped by the caller.
	"""
	stats = {} # type: Dict[str, Any]
	if daemon:
	command = DAEMON_CMD + ["check", "-v"]
	else:
	if mypy_script is None:
	command = ["python3", "-m", "mypy"]
	else:
	command = [mypy_script]
	command.extend(["--cache-dir", mypy_cache_path])
	if incremental:
	command.append("--incremental")
	if verbose:
	command.extend(["-v", "-v"])
	if target_file_path is not None:
	command.append(target_file_path)
	start = time.time()
	output, stderr, _ = execute(command, False)
	if stderr != "":
	output = stderr
	else:
	if daemon:
	output, stats = filter_daemon_stats(output)
	runtime = time.time() - start
	return runtime, output, stats


	def filter_daemon_stats(output: str) -> Tuple[str, Dict[str, Any]]:
	stats = {} # type: Dict[str, Any]
	lines = output.splitlines()
	output_lines = []
	for line in lines:
	m = re.match(r'(\w+)\s+:\s+(.*)', line)
	if m:
	key, value = m.groups()
	stats[key] = value
	else:
	output_lines.append(line)
	if output_lines:
	output_lines.append('\n')
	return '\n'.join(output_lines), stats


	def start_daemon(mypy_cache_path: str) -> None:
	cmd = DAEMON_CMD + ["restart", "--log-file", "./@incr-chk-logs", "--", "--cache-dir", mypy_cache_path]
	execute(cmd)


	def stop_daemon() -> None:
	execute(DAEMON_CMD + ["stop"])


	def load_cache(incremental_cache_path: str = CACHE_PATH) -> JsonDict:
	if os.path.exists(incremental_cache_path):
	with open(incremental_cache_path, 'r') as stream:
	return json.load(stream)
	else:
	return {}


	def save_cache(cache: JsonDict, incremental_cache_path: str = CACHE_PATH) -> None:
	with open(incremental_cache_path, 'w') as stream:
	json.dump(cache, stream, indent=2)


	def set_expected(commits: List[Tuple[str, str]],
	cache: JsonDict,
	temp_repo_path: str,
	target_file_path: Optional[str],
	mypy_cache_path: str,
	mypy_script: Optional[str]) -> None:
	"""Populates the given `cache` with the expected results for all of the given `commits`.

	This function runs mypy on the `target_file_path` inside the `temp_repo_path`, and stores
	the result in the `cache`.

	If `cache` already contains results for a particular commit, this function will
	skip evaluating that commit and move on to the next."""
	for commit_id, message in commits:
	if commit_id in cache:
	print('Skipping commit (already cached): {0}: "{1}"'.format(commit_id, message))
	else:
	print('Caching expected output for commit {0}: "{1}"'.format(commit_id, message))
	execute(["git", "-C", temp_repo_path, "checkout", commit_id])
	runtime, output, stats = run_mypy(target_file_path, mypy_cache_path, mypy_script,
	incremental=False)
	cache[commit_id] = {'runtime': runtime, 'output': output}
	if output == "":
	print(" Clean output ({:.3f} sec)".format(runtime))
	else:
	print(" Output ({:.3f} sec)".format(runtime))
	print_offset(output, 8)
	print()


	def test_incremental(commits: List[Tuple[str, str]],
	cache: JsonDict,
	temp_repo_path: str,
	target_file_path: Optional[str],
	mypy_cache_path: str,
	*,
	mypy_script: Optional[str] = None,
	daemon: bool = False,
	exit_on_error: bool = False) -> None:
	"""Runs incremental mode on all `commits` to verify the output matches the expected output.

	This function runs mypy on the `target_file_path` inside the `temp_repo_path`. The
	expected output must be stored inside of the given `cache`.
	"""
	print("Note: first commit is evaluated twice to warm up cache")
	commits = [commits[0]] + commits
	overall_stats = {} # type: Dict[str, float]
	for commit_id, message in commits:
	print('Now testing commit {0}: "{1}"'.format(commit_id, message))
	execute(["git", "-C", temp_repo_path, "checkout", commit_id])
	runtime, output, stats = run_mypy(target_file_path, mypy_cache_path, mypy_script,
	incremental=True, daemon=daemon)
	relevant_stats = combine_stats(overall_stats, stats)
	expected_runtime = cache[commit_id]['runtime'] # type: float
	expected_output = cache[commit_id]['output'] # type: str
	if output != expected_output:
	print(" Output does not match expected result!")
	print(" Expected output ({:.3f} sec):".format(expected_runtime))
	print_offset(expected_output, 8)
	print(" Actual output: ({:.3f} sec):".format(runtime))
	print_offset(output, 8)
	if exit_on_error:
	break
	else:
	print(" Output matches expected result!")
	print(" Incremental: {:.3f} sec".format(runtime))
	print(" Original: {:.3f} sec".format(expected_runtime))
	if relevant_stats:
	print(" Stats: {}".format(relevant_stats))
	if overall_stats:
	print("Overall stats:", overall_stats)


	def combine_stats(overall_stats: Dict[str, float],
	new_stats: Dict[str, Any]) -> Dict[str, float]:
	INTERESTING_KEYS = ['build_time', 'gc_time']
	# For now, we only support float keys
	relevant_stats = {} # type: Dict[str, float]
	for key in INTERESTING_KEYS:
	if key in new_stats:
	value = float(new_stats[key])
	relevant_stats[key] = value
	overall_stats[key] = overall_stats.get(key, 0.0) + value
	return relevant_stats


	def cleanup(temp_repo_path: str, mypy_cache_path: str) -> None:
	delete_folder(temp_repo_path)
	delete_folder(mypy_cache_path)


	def test_repo(target_repo_url: str, temp_repo_path: str,
	target_file_path: Optional[str],
	mypy_path: str, incremental_cache_path: str, mypy_cache_path: str,
	range_type: str, range_start: str, branch: str,
	params: Namespace) -> None:
	"""Tests incremental mode against the repo specified in `target_repo_url`.

	This algorithm runs in five main stages:

	1. Clones `target_repo_url` into the `temp_repo_path` folder locally,
	checking out the specified `branch` if applicable.
	2. Examines the repo's history to get the list of all commits to
	to test incremental mode on.
	3. Runs mypy WITHOUT incremental mode against the `target_file_path` (which is
	assumed to be located inside the `temp_repo_path`), testing each commit
	discovered in stage two.
	- If the results of running mypy WITHOUT incremental mode on a
	particular commit are already cached inside the `incremental_cache_path`,
	skip that commit to save time.
	- Cache the results after finishing.
	4. Rewind back to the first commit, and run mypy WITH incremental mode
	against the `target_file_path` commit-by-commit, and compare to the expected
	results found in stage 3.
	5. Delete all unnecessary temp files.
	"""
	# Stage 1: Clone repo and get ready to being testing
	ensure_environment_is_ready(mypy_path, temp_repo_path, mypy_cache_path)
	initialize_repo(target_repo_url, temp_repo_path, branch)

	# Stage 2: Get all commits we want to test
	if range_type == "last":
	start_commit = get_nth_commit(temp_repo_path, int(range_start))[0]
	elif range_type == "commit":
	start_commit = range_start
	else:
	raise RuntimeError("Invalid option: {}".format(range_type))
	commits = get_commits_starting_at(temp_repo_path, start_commit)
	if params.limit:
	commits = commits[:params.limit]
	if params.sample:
	seed = params.seed or base64.urlsafe_b64encode(os.urandom(15)).decode('ascii')
	random.seed(seed)
	commits = random.sample(commits, params.sample)
	print("Sampled down to %d commits using random seed %s" % (len(commits), seed))

	# Stage 3: Find and cache expected results for each commit (without incremental mode)
	cache = load_cache(incremental_cache_path)
	set_expected(commits, cache, temp_repo_path, target_file_path, mypy_cache_path,
	mypy_script=params.mypy_script)
	save_cache(cache, incremental_cache_path)

	# Stage 4: Rewind and re-run mypy (with incremental mode enabled)
	if params.daemon:
	print('Starting daemon')
	start_daemon(mypy_cache_path)
	test_incremental(commits, cache, temp_repo_path, target_file_path, mypy_cache_path,
	mypy_script=params.mypy_script, daemon=params.daemon,
	exit_on_error=params.exit_on_error)

	# Stage 5: Remove temp files, stop daemon
	if not params.keep_temporary_files:
	cleanup(temp_repo_path, mypy_cache_path)
	if params.daemon:
	print('Stopping daemon')
	stop_daemon()


	def main() -> None:
	help_factory = (lambda prog: RawDescriptionHelpFormatter(prog=prog, max_help_position=32)) # type: Any
	parser = ArgumentParser(
	prog='incremental_checker',
	description=__doc__,
	formatter_class=help_factory)

	parser.add_argument("range_type", metavar="START_TYPE", choices=["last", "commit"],
	help="must be one of 'last' or 'commit'")
	parser.add_argument("range_start", metavar="COMMIT_ID_OR_NUMBER",
	help="the commit id to start from, or the number of "
	"commits to move back (see above)")
	parser.add_argument("-r", "--repo_url", default=MYPY_REPO_URL, metavar="URL",
	help="the repo to clone and run tests on")
	parser.add_argument("-f", "--file-path", default=MYPY_TARGET_FILE, metavar="FILE",
	help="the name of the file or directory to typecheck")
	parser.add_argument("-x", "--exit-on-error", action='store_true',
	help="Exits as soon as an error occurs")
	parser.add_argument("--keep-temporary-files", action='store_true',
	help="Keep temporary files on exit")
	parser.add_argument("--cache-path", default=CACHE_PATH, metavar="DIR",
	help="sets a custom location to store cache data")
	parser.add_argument("--branch", default=None, metavar="NAME",
	help="check out and test a custom branch"
	"uses the default if not specified")
	parser.add_argument("--sample", type=int, help="use a random sample of size SAMPLE")
	parser.add_argument("--seed", type=str, help="random seed")
	parser.add_argument("--limit", type=int,
	help="maximum number of commits to use (default until end)")
	parser.add_argument("--mypy-script", type=str, help="alternate mypy script to run")
	parser.add_argument("--daemon", action='store_true',
	help="use mypy daemon instead of incremental (highly experimental)")

	if len(sys.argv[1:]) == 0:
	parser.print_help()
	parser.exit()

	params = parser.parse_args(sys.argv[1:])

	# Make all paths absolute so we avoid having to worry about being in the right folder

	# The path to this specific script (incremental_checker.py).
	script_path = os.path.abspath(sys.argv[0])

	# The path to the mypy repo.
	mypy_path = os.path.abspath(os.path.dirname(os.path.dirname(script_path)))

	# The folder the cloned repo will reside in.
	temp_repo_path = os.path.abspath(os.path.join(mypy_path, "tmp_repo"))

	# The particular file or package to typecheck inside the repo.
	if params.file_path:
	target_file_path = os.path.abspath(os.path.join(temp_repo_path, params.file_path))
	else:
	# Allow `-f ''` to clear target_file_path.
	target_file_path = None

	# The path to where the incremental checker cache data is stored.
	incremental_cache_path = os.path.abspath(params.cache_path)

	# The path to store the mypy incremental mode cache data
	mypy_cache_path = os.path.abspath(os.path.join(mypy_path, "misc", ".mypy_cache"))

	print("Assuming mypy is located at {0}".format(mypy_path))
	print("Temp repo will be cloned at {0}".format(temp_repo_path))
	print("Testing file/dir located at {0}".format(target_file_path))
	print("Using cache data located at {0}".format(incremental_cache_path))
	print()

	test_repo(params.repo_url, temp_repo_path, target_file_path,
	mypy_path, incremental_cache_path, mypy_cache_path,
	params.range_type, params.range_start, params.branch,
	params)


	if __name__ == '__main__':
	main()