blob: dec3a16cc9c775ac2a2cb71f3c21f2b106b4d515 [file] [log] [blame]
# parse.py is a long-living program that communicates over STDIN and STDOUT.
# STDIN receives parse requests, one per line. It outputs the parsed modules and
# comments from all the files from each request.
import ast
import concurrent.futures
import json
import os
import sys
from io import BytesIO
from tokenize import COMMENT, tokenize
def parse_import_statements(content, filepath):
modules = list()
tree = ast.parse(content)
for node in ast.walk(tree):
if isinstance(node, ast.Import):
for subnode in node.names:
module = {
"name": subnode.name,
"lineno": node.lineno,
"filepath": filepath,
}
modules.append(module)
elif isinstance(node, ast.ImportFrom) and node.level == 0:
module = {
"name": node.module,
"lineno": node.lineno,
"filepath": filepath,
}
modules.append(module)
return modules
def parse_comments(content):
comments = list()
g = tokenize(BytesIO(content.encode("utf-8")).readline)
for toknum, tokval, _, _, _ in g:
if toknum == COMMENT:
comments.append(tokval)
return comments
def parse(repo_root, rel_package_path, filename):
rel_filepath = os.path.join(rel_package_path, filename)
abs_filepath = os.path.join(repo_root, rel_filepath)
with open(abs_filepath, "r") as file:
content = file.read()
# From simple benchmarks, 2 workers gave the best performance here.
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
modules_future = executor.submit(parse_import_statements, content, rel_filepath)
comments_future = executor.submit(parse_comments, content)
modules = modules_future.result()
comments = comments_future.result()
output = {
"modules": modules,
"comments": comments,
}
return output
def main(stdin, stdout):
with concurrent.futures.ProcessPoolExecutor() as executor:
for parse_request in stdin:
parse_request = json.loads(parse_request)
repo_root = parse_request["repo_root"]
rel_package_path = parse_request["rel_package_path"]
filenames = parse_request["filenames"]
outputs = list()
if len(filenames) == 1:
outputs.append(parse(repo_root, rel_package_path, filenames[0]))
else:
futures = [
executor.submit(parse, repo_root, rel_package_path, filename)
for filename in filenames
if filename != ""
]
for future in concurrent.futures.as_completed(futures):
outputs.append(future.result())
print(json.dumps(outputs), end="", file=stdout, flush=True)
stdout.buffer.write(bytes([0]))
stdout.flush()
if __name__ == "__main__":
exit(main(sys.stdin, sys.stdout))