| # @file globber.py | |
| # | |
| # Provides global functionality for use by the CodeQL plugin. | |
| # | |
| # Copyright 2019 Jaakko Kangasharju | |
| # | |
| # Apache License | |
| # Version 2.0, January 2004 | |
| # http://www.apache.org/licenses/ | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| # | |
| # This file has been altered from its original form. Based on code in: | |
| # https://github.com/advanced-security/filter-sarif | |
| # | |
| # Specifically: | |
| # https://github.com/advanced-security/filter-sarif/blob/main/filter_sarif.py | |
| # | |
| # It primarily contains modifications made to integrate with the CodeQL plugin. | |
| # | |
| # SPDX-License-Identifier: Apache-2.0 | |
| ## | |
| import re | |
| _double_star_after_invalid_regex = re.compile(r'[^/\\]\*\*') | |
| _double_star_first_before_invalid_regex = re.compile('^\\*\\*[^/]') | |
| _double_star_middle_before_invalid_regex = re.compile(r'[^\\]\*\*[^/]') | |
| def _match_component(pattern_component, file_name_component): | |
| if len(pattern_component) == 0 and len(file_name_component) == 0: | |
| return True | |
| elif len(pattern_component) == 0: | |
| return False | |
| elif len(file_name_component) == 0: | |
| return pattern_component == '*' | |
| elif pattern_component[0] == '*': | |
| return (_match_component(pattern_component, file_name_component[1:]) or | |
| _match_component(pattern_component[1:], file_name_component)) | |
| elif pattern_component[0] == '?': | |
| return _match_component(pattern_component[1:], file_name_component[1:]) | |
| elif pattern_component[0] == '\\': | |
| return (len(pattern_component) >= 2 and | |
| pattern_component[1] == file_name_component[0] and | |
| _match_component( | |
| pattern_component[2:], file_name_component[1:])) | |
| elif pattern_component[0] != file_name_component[0]: | |
| return False | |
| else: | |
| return _match_component(pattern_component[1:], file_name_component[1:]) | |
| def _match_components(pattern_components, file_name_components): | |
| if len(pattern_components) == 0 and len(file_name_components) == 0: | |
| return True | |
| if len(pattern_components) == 0: | |
| return False | |
| if len(file_name_components) == 0: | |
| return len(pattern_components) == 1 and pattern_components[0] == '**' | |
| if pattern_components[0] == '**': | |
| return (_match_components(pattern_components, file_name_components[1:]) | |
| or _match_components( | |
| pattern_components[1:], file_name_components)) | |
| else: | |
| return ( | |
| _match_component( | |
| pattern_components[0], file_name_components[0]) and | |
| _match_components( | |
| pattern_components[1:], file_name_components[1:])) | |
| def match(pattern: str, file_name: str): | |
| """Match a glob pattern against a file name. | |
| Glob pattern matching is for file names, which do not need to exist as | |
| files on the file system. | |
| A file name is a sequence of directory names, possibly followed by the name | |
| of a file, with the components separated by a path separator. A glob | |
| pattern is similar, except it may contain special characters: A '?' matches | |
| any character in a name. A '*' matches any sequence of characters (possibly | |
| empty) in a name. Both of these match only within a single component, i.e., | |
| they will not match a path separator. A component in a pattern may also be | |
| a literal '**', which matches zero or more components in the complete file | |
| name. A backslash '\\' in a pattern acts as an escape character, and | |
| indicates that the following character is to be matched literally, even if | |
| it is a special character. | |
| Args: | |
| pattern (str): The pattern to match. The path separator in patterns is | |
| always '/'. | |
| file_name (str): The file name to match against. The path separator in | |
| file names is the platform separator | |
| Returns: | |
| bool: True if the pattern matches, False otherwise. | |
| """ | |
| if (_double_star_after_invalid_regex.search(pattern) is not None or | |
| _double_star_first_before_invalid_regex.search( | |
| pattern) is not None or | |
| _double_star_middle_before_invalid_regex.search(pattern) is not None): | |
| raise ValueError( | |
| '** in {} not alone between path separators'.format(pattern)) | |
| pattern = pattern.rstrip('/') | |
| file_name = file_name.rstrip('/') | |
| while '**/**' in pattern: | |
| pattern = pattern.replace('**/**', '**') | |
| pattern_components = pattern.split('/') | |
| # We split on '\' as well as '/' to support unix and windows-style paths | |
| file_name_components = re.split(r'[\\/]', file_name) | |
| return _match_components(pattern_components, file_name_components) |