blob: 48c1b8b90efae43167aab1eca5499e83f9dba2cc [file] [log] [blame]
#!/usr/bin/env fuchsia-vendored-python
# Copyright 2022 The Fuchsia Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Functions for working with reproxy logs.
This requires python protobufs for reproxy LogDump.
"""
import hashlib
import os
import shutil
import subprocess
from api.proxy import log_pb2
from pathlib import Path
from typing import Dict
_SCRIPT_BASENAME = Path(__file__).name
def msg(text: str):
print(f'[{_SCRIPT_BASENAME}] {text}')
class ReproxyLog(object):
"""Contains a LogDump proto and indexes information internally.
The output_file paths used are relative to the working dir.
"""
def __init__(self, log_dump: log_pb2.LogDump):
self._proto: log_pb2.LogDump = log_dump
# Index in a few ways for easy lookup.
self._records_by_output_file = self._index_records_by_output_file()
self._records_by_action_digest = self._index_records_by_action_digest()
def _index_records_by_output_file(self) -> Dict[Path, log_pb2.LogRecord]:
"""Map files to the records of the actions that created them."""
records: Dict[Path, log_pb2.LogRecord] = dict()
for record in self.proto.records:
for output_file in record.command.output.output_files:
records[Path(output_file)] = record
return records
def _index_records_by_action_digest(self) -> Dict[str, log_pb2.LogRecord]:
"""Map action records by their action digest (hash/size)."""
return {
record.remote_metadata.action_digest: record
for record in self.proto.records
# Not all actions have remote_metadata, due to local/racing.
# If there happens to be duplicates (due to retries),
# just keep the last one.
if record.HasField('remote_metadata') and
record.remote_metadata.action_digest
}
@property
def proto(self) -> log_pb2.LogDump:
return self._proto
@property
def records_by_output_file(self) -> Dict[Path, log_pb2.LogRecord]:
return self._records_by_output_file
@property
def records_by_action_digest(self) -> Dict[str, log_pb2.LogRecord]:
return self._records_by_action_digest
def setup_logdir_for_logdump(path: Path, verbose: bool = False) -> Path:
"""Automatically setup a log dir for logdump, if needed.
The `logdump` tool expects a log directory, but sometimes we
are only given a log file. When we are given a log file, copy it
to a directory. The created directory is cached by the hash
of the log file, so that it can be re-used for multiple queries.
Args:
path: path to either a .rrpl/.rpl log, or an reproxy log dir
containing a .rrpl/.rpl log.
Returns:
The original path, or path to a newly created cached directory
containing a copy of the log.
"""
if path.is_dir():
# This is a directory containing a .rrpl or .rpl file.
return path
# Otherwise, this is assumed to be a .rrpl or .rpl file.
# Copy it to a cached location.
tmpdir = Path(os.environ.get('TMPDIR', '/tmp'))
with open(path, "rb") as f:
contents = f.read()
readable_hash = hashlib.sha256(contents).hexdigest()
cached_log_dir = tmpdir / 'action_diff_py' / 'cache' / readable_hash
if verbose:
msg(f'Copying log to {cached_log_dir} for logdump processing.')
cached_log_dir.mkdir(parents=True, exist_ok=True)
shutil.copy2(path, cached_log_dir)
return cached_log_dir
def convert_reproxy_actions_log(
reproxy_logdir: Path,
reclient_bindir: Path,
verbose: bool = False) -> log_pb2.LogDump:
log_pb_file = reproxy_logdir / "reproxy_log.pb"
if not log_pb_file.exists():
# Convert reproxy text logs to binary proto.
logdump_cmd = [
str(reclient_bindir / "logdump"),
"--proxy_log_dir",
str(reproxy_logdir),
"--output_dir",
str(reproxy_logdir), # Use same log dir, must be writeable.
]
if verbose:
msg(f"Ingesting logs from {reproxy_logdir}.")
# logdump could take a few minutes on large reproxy logs.
subprocess.check_call(logdump_cmd)
# Produces "reproxy_log.pb" in args.reproxy_logdir.
if verbose:
msg(f"Loading log records from {log_pb_file}.")
log_dump = log_pb2.LogDump()
with open(log_pb_file, mode='rb') as logf:
log_dump.ParseFromString(logf.read())
return log_dump