pylint/utils/pragma_parser.py - third_party/github.com/pylint-dev/pylint - Git at Google

 # Licensed under the GPL: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
 # For details: https://github.com/pylint-dev/pylint/blob/main/LICENSE
 # Copyright (c) https://github.com/pylint-dev/pylint/blob/main/CONTRIBUTORS.txt

 from __future__ import annotations

 import re
 from collections.abc import Generator
 from typing import NamedTuple

 # Allow stopping after the first semicolon/hash encountered,
 # so that an option can be continued with the reasons
 # why it is active or disabled.
 OPTION_RGX = r"""
     (?:^\s*\#.*|\s*|               # Comment line, or whitespaces,
        \s*\#.*(?=\#.*?\bpylint:))  # or a beginning of an inline comment
                                    # followed by "pylint:" pragma
     (\#                            # Beginning of comment
     .*?                            # Anything (as little as possible)
     \bpylint:                      # pylint word and column
     \s*                            # Any number of whitespaces
     ([^;#\n]+))                    # Anything except semicolon or hash or
                                    # newline (it is the second matched group)
                                    # and end of the first matched group
     [;#]{0,1}                      # From 0 to 1 repetition of semicolon or hash
 """
 OPTION_PO = re.compile(OPTION_RGX, re.VERBOSE)


 class PragmaRepresenter(NamedTuple):
     action: str
     messages: list[str]


 ATOMIC_KEYWORDS = frozenset(("disable-all", "skip-file"))
 MESSAGE_KEYWORDS = frozenset(
     ("disable-next", "disable-msg", "enable-msg", "disable", "enable")
 )
 # sorted is necessary because sets are unordered collections and ALL_KEYWORDS
 # string should not vary between executions
 # reverse is necessary in order to have the longest keywords first, so that, for example,
 # 'disable' string should not be matched instead of 'disable-all'
 ALL_KEYWORDS = "|".join(
     sorted(ATOMIC_KEYWORDS | MESSAGE_KEYWORDS, key=len, reverse=True)
 )


 TOKEN_SPECIFICATION = [
     ("KEYWORD", rf"\b({ALL_KEYWORDS:s})\b"),
     ("MESSAGE_STRING", r"[0-9A-Za-z\-\_]{2,}"),  # Identifiers
     ("ASSIGN", r"="),  # Assignment operator
     ("MESSAGE_NUMBER", r"[CREIWF]{1}\d*"),
 ]

 TOK_REGEX = "|".join(
     f"(?P<{token_name:s}>{token_rgx:s})"
     for token_name, token_rgx in TOKEN_SPECIFICATION
 )


 def emit_pragma_representer(action: str, messages: list[str]) -> PragmaRepresenter:
     if not messages and action in MESSAGE_KEYWORDS:
         raise InvalidPragmaError(
             "The keyword is not followed by message identifier", action
         )
     return PragmaRepresenter(action, messages)


 class PragmaParserError(Exception):
     """A class for exceptions thrown by pragma_parser module."""

     def __init__(self, message: str, token: str) -> None:
         """:args message: explain the reason why the exception has been thrown
         :args token: token concerned by the exception.
         """
         self.message = message
         self.token = token
         super().__init__(self.message)


 class UnRecognizedOptionError(PragmaParserError):
     """Thrown in case the of a valid but unrecognized option."""


 class InvalidPragmaError(PragmaParserError):
     """Thrown in case the pragma is invalid."""


 def parse_pragma(pylint_pragma: str) -> Generator[PragmaRepresenter, None, None]:
     action: str | None = None
     messages: list[str] = []
     assignment_required = False
     previous_token = ""

     for mo in re.finditer(TOK_REGEX, pylint_pragma):
         kind = mo.lastgroup
         value = mo.group()

         if kind == "ASSIGN":
             if not assignment_required:
                 if action:
                     # A keyword has been found previously but doesn't support assignment
                     raise UnRecognizedOptionError(
                         "The keyword doesn't support assignment", action
                     )
                 if previous_token:
                     # Something found previously but not a known keyword
                     raise UnRecognizedOptionError(
                         "The keyword is unknown", previous_token
                     )
                 # Nothing at all detected before this assignment
                 raise InvalidPragmaError("Missing keyword before assignment", "")
             assignment_required = False
         elif assignment_required:
             raise InvalidPragmaError(
                 "The = sign is missing after the keyword", action or ""
             )
         elif kind == "KEYWORD":
             if action:
                 yield emit_pragma_representer(action, messages)
             action = value
             messages = []
             assignment_required = action in MESSAGE_KEYWORDS
         elif kind in {"MESSAGE_STRING", "MESSAGE_NUMBER"}:
             messages.append(value)
             assignment_required = False
         else:
             raise RuntimeError("Token not recognized")

         previous_token = value

     if action:
         yield emit_pragma_representer(action, messages)
     else:
         raise UnRecognizedOptionError("The keyword is unknown", previous_token)
	# Licensed under the GPL: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
	# For details: https://github.com/pylint-dev/pylint/blob/main/LICENSE
	# Copyright (c) https://github.com/pylint-dev/pylint/blob/main/CONTRIBUTORS.txt

	from __future__ import annotations

	import re
	from collections.abc import Generator
	from typing import NamedTuple

	# Allow stopping after the first semicolon/hash encountered,
	# so that an option can be continued with the reasons
	# why it is active or disabled.
	OPTION_RGX = r"""
	(?:^\s\#.\|\s*\| # Comment line, or whitespaces,
	\s\#.(?=\#.*?\bpylint:)) # or a beginning of an inline comment
	# followed by "pylint:" pragma
	(\# # Beginning of comment
	.*? # Anything (as little as possible)
	\bpylint: # pylint word and column
	\s* # Any number of whitespaces
	([^;#\n]+)) # Anything except semicolon or hash or
	# newline (it is the second matched group)
	# and end of the first matched group
	[;#]{0,1} # From 0 to 1 repetition of semicolon or hash
	"""
	OPTION_PO = re.compile(OPTION_RGX, re.VERBOSE)


	class PragmaRepresenter(NamedTuple):
	action: str
	messages: list[str]


	ATOMIC_KEYWORDS = frozenset(("disable-all", "skip-file"))
	MESSAGE_KEYWORDS = frozenset(
	("disable-next", "disable-msg", "enable-msg", "disable", "enable")
	)
	# sorted is necessary because sets are unordered collections and ALL_KEYWORDS
	# string should not vary between executions
	# reverse is necessary in order to have the longest keywords first, so that, for example,
	# 'disable' string should not be matched instead of 'disable-all'
	ALL_KEYWORDS = "\|".join(
	sorted(ATOMIC_KEYWORDS \| MESSAGE_KEYWORDS, key=len, reverse=True)
	)


	TOKEN_SPECIFICATION = [
	("KEYWORD", rf"\b({ALL_KEYWORDS:s})\b"),
	("MESSAGE_STRING", r"[0-9A-Za-z\-\_]{2,}"), # Identifiers
	("ASSIGN", r"="), # Assignment operator
	("MESSAGE_NUMBER", r"[CREIWF]{1}\d*"),
	]

	TOK_REGEX = "\|".join(
	f"(?P<{token_name:s}>{token_rgx:s})"
	for token_name, token_rgx in TOKEN_SPECIFICATION
	)


	def emit_pragma_representer(action: str, messages: list[str]) -> PragmaRepresenter:
	if not messages and action in MESSAGE_KEYWORDS:
	raise InvalidPragmaError(
	"The keyword is not followed by message identifier", action
	)
	return PragmaRepresenter(action, messages)


	class PragmaParserError(Exception):
	"""A class for exceptions thrown by pragma_parser module."""

	def __init__(self, message: str, token: str) -> None:
	""":args message: explain the reason why the exception has been thrown
	:args token: token concerned by the exception.
	"""
	self.message = message
	self.token = token
	super().__init__(self.message)


	class UnRecognizedOptionError(PragmaParserError):
	"""Thrown in case the of a valid but unrecognized option."""


	class InvalidPragmaError(PragmaParserError):
	"""Thrown in case the pragma is invalid."""


	def parse_pragma(pylint_pragma: str) -> Generator[PragmaRepresenter, None, None]:
	action: str \| None = None
	messages: list[str] = []
	assignment_required = False
	previous_token = ""

	for mo in re.finditer(TOK_REGEX, pylint_pragma):
	kind = mo.lastgroup
	value = mo.group()

	if kind == "ASSIGN":
	if not assignment_required:
	if action:
	# A keyword has been found previously but doesn't support assignment
	raise UnRecognizedOptionError(
	"The keyword doesn't support assignment", action
	)
	if previous_token:
	# Something found previously but not a known keyword
	raise UnRecognizedOptionError(
	"The keyword is unknown", previous_token
	)
	# Nothing at all detected before this assignment
	raise InvalidPragmaError("Missing keyword before assignment", "")
	assignment_required = False
	elif assignment_required:
	raise InvalidPragmaError(
	"The = sign is missing after the keyword", action or ""
	)
	elif kind == "KEYWORD":
	if action:
	yield emit_pragma_representer(action, messages)
	action = value
	messages = []
	assignment_required = action in MESSAGE_KEYWORDS
	elif kind in {"MESSAGE_STRING", "MESSAGE_NUMBER"}:
	messages.append(value)
	assignment_required = False
	else:
	raise RuntimeError("Token not recognized")

	previous_token = value

	if action:
	yield emit_pragma_representer(action, messages)
	else:
	raise UnRecognizedOptionError("The keyword is unknown", previous_token)