blob: cdafe5f822a51f28df3eb1d69193b02669d596e4 [file] [log] [blame]
#!/usr/bin/env fuchsia-vendored-python
# Copyright 2023 The Fuchsia Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Generic utilities for working with depfiles
"""
import enum
import dataclasses
import re
from pathlib import Path
from typing import AbstractSet, Callable, Iterable, Optional, Sequence
class LexError(ValueError):
def __init__(self, msg):
super().__init__(msg)
class ParseError(ValueError):
def __init__(self, msg):
super().__init__(msg)
class TokenType(enum.Enum):
PATH = 0 # anything that isn't the others
COLON = 1 # ':'
SPACE = 2 # [ \t]
NEWLINE = 3 # [\n\r]
LINECONTINUE = 4 # '\\' (also start of escape-sequence)
COMMENT = 5 # '#...'
ESCAPED = 6 # e.g. '\t'
@dataclasses.dataclass
class Token(object):
text: str
type: TokenType
_SPACE_RE = re.compile(r"[ \t]+")
_NEWLINE_RE = re.compile(r"\r?\n")
_PATH_RE = re.compile(r"[a-zA-Z0-9_/.+-]+")
_COMMENT_RE = re.compile(r"#[^\n]*")
def _lex_line(line: str) -> Iterable[Token]:
prev: Token = None
while line: # is not empty
next_char = line[0]
if prev and prev.type == TokenType.LINECONTINUE:
newline_match = _NEWLINE_RE.match(line)
if newline_match:
newline = newline_match.group(0)
yield prev
prev = None
yield Token(text=newline, type=TokenType.NEWLINE)
line = line[len(newline) :]
continue
# otherwise, we don't handle escaped sequences yet
new = Token(text=prev.text + next_char, type=TokenType.ESCAPED)
raise LexError(f"Escape sequences are not handled yet: {new}")
if next_char == ":":
yield Token(text=next_char, type=TokenType.COLON)
line = line[1:]
continue
if next_char == "\\":
prev = Token(text=next_char, type=TokenType.LINECONTINUE)
# do not yield yet, look at text that follows
line = line[1:]
continue
space_match = _SPACE_RE.match(line)
if space_match:
spaces = space_match.group(0)
yield Token(text=spaces, type=TokenType.SPACE)
line = line[len(spaces) :]
continue
newline_match = _NEWLINE_RE.match(line)
if newline_match:
newline = newline_match.group(0)
yield Token(text=newline, type=TokenType.NEWLINE)
line = line[len(newline) :]
continue
comment_match = _COMMENT_RE.match(line)
if comment_match:
comment = comment_match.group(0)
yield Token(text=comment, type=TokenType.COMMENT)
line = line[len(comment) :]
continue
path_match = _PATH_RE.match(line)
if path_match:
path = path_match.group(0)
yield Token(text=path, type=TokenType.PATH)
line = line[len(path) :]
continue
raise LexError(f'[depfile.lex] Unrecognized text: "{line}"')
if prev:
yield prev
def lex(lines: Iterable[str]) -> Iterable[Token]:
"""Divides depfile text into tokens."""
for line in lines:
yield from _lex_line(line)
def _transform_paths(
tokens: Iterable[Token], transform: Callable[[str], str]
) -> Iterable[Token]:
"""Space-preserving path transformation."""
for token in tokens:
if token.type == TokenType.PATH:
yield Token(text=transform(token.text), type=TokenType.PATH)
else:
yield token
def unlex(tokens: Iterable[Token]) -> str:
"""Concatenates tokens' text."""
return "".join(token.text for token in tokens)
def transform_paths(text: str, transform: Callable[[str], str]) -> str:
"""Applies an arbitrary transformation to depfile paths."""
return unlex(
_transform_paths(lex(text.splitlines(keepends=True)), transform)
)
def consume_line_continuations(toks: Iterable[Token]) -> Iterable[Token]:
"""Filter out line-continuations, as if there were long lines."""
line_cont = None
for tok in toks:
if line_cont is not None:
if tok.type == TokenType.NEWLINE:
line_cont = None
continue
if tok.type == TokenType.LINECONTINUE:
line_cont = tok
continue
yield tok
@dataclasses.dataclass
class Dep(object):
targets: Sequence[Token]
colon: Token
deps: Sequence[Token] = dataclasses.field(default_factory=list)
@property
def target_paths(self) -> Sequence[Path]:
return [Path(d.text) for d in self.targets]
@property
def deps_paths(self) -> Sequence[Path]:
return [Path(d.text) for d in self.deps]
@property
def is_phony(self) -> bool:
return len(self.deps) == 0
@property
def absolute_paths(self) -> AbstractSet[Path]:
return {
p for p in self.target_paths + self.deps_paths if p.is_absolute()
}
class _ParserState(enum.Enum):
EXPECTING_FIRST_TARGET = 0
EXPECTING_TARGET_OR_COLON = 1
EXPECTING_DEP_OR_NEWLINE = 2
DONE = 3
def _parse_one_dep(toks: Iterable[Token]) -> Optional[Dep]:
"""Parse a single dependency.
Args:
toks: tokens, already filtered through consume_line_continuations().
Returns:
A Dep, or just None if token stream ended cleanly.
Raises:
ParseError if there are any syntax errors.
"""
# parser states
state: _ParserState = _ParserState.EXPECTING_FIRST_TARGET
targets = []
colon = None
deps = []
for tok in toks:
if state == _ParserState.EXPECTING_FIRST_TARGET:
# ignore blank lines
if tok.type == TokenType.NEWLINE:
continue
if tok.type == TokenType.PATH:
targets.append(tok)
state = _ParserState.EXPECTING_TARGET_OR_COLON
continue
elif state == _ParserState.EXPECTING_TARGET_OR_COLON:
if tok.type == TokenType.PATH:
targets.append(tok)
continue
if tok.type == TokenType.COLON:
colon = tok
state = _ParserState.EXPECTING_DEP_OR_NEWLINE
continue
elif state == _ParserState.EXPECTING_DEP_OR_NEWLINE:
if tok.type == TokenType.PATH:
deps.append(tok)
continue
if tok.type == TokenType.NEWLINE:
state = _ParserState.DONE
break # newline terminator, done
else:
assert False, f"Internal error: unknown parser state: {state}"
# reaching this point is a parse error
raise ParseError(f"In state {state}: Unexpected token: {tok}")
if state == _ParserState.EXPECTING_FIRST_TARGET:
return None
elif state != _ParserState.DONE:
raise ParseError(
f"Expecting a terminal line-break, but reached end of token stream."
)
return Dep(targets=targets, colon=colon, deps=deps)
def _parse_filtered_tokens(toks: Iterable[Token]) -> Iterable[Dep]:
while True:
dep = _parse_one_dep(toks)
if dep is None:
return
yield dep
def _parse_tokens(toks: Iterable[Token]) -> Iterable[Dep]:
yield from _parse_filtered_tokens(
tok
for tok in consume_line_continuations(toks)
# keep NEWLINEs as separators between deps
if tok.type not in {TokenType.SPACE, TokenType.COMMENT}
)
def parse_lines(lines: Iterable[str]) -> Iterable[Dep]:
"""Parse lines of a depfile.
Args:
lines: lines of depfile (including trailing newline)
Yields:
Dep objects.
Raises:
ParseError or LexError if input is malformed.
"""
yield from _parse_tokens(lex(lines))
def absolute_paths(deps: Iterable[Dep]) -> AbstractSet[Path]:
unique_paths = set()
for d in deps:
unique_paths.update(d.absolute_paths)
return unique_paths