Changeset View
Changeset View
Standalone View
Standalone View
swh/model/from_disk.py
Show First 20 Lines • Show All 293 Lines • ▼ Show 20 Lines | |||||||||
# In the future `swh.scanner` should use the `swh.model` version and remove its own. | # In the future `swh.scanner` should use the `swh.model` version and remove its own. | ||||||||
def extract_regex_objs( | def extract_regex_objs( | ||||||||
root_path: bytes, patterns: Iterable[bytes] | root_path: bytes, patterns: Iterable[bytes] | ||||||||
) -> Iterator[Pattern[bytes]]: | ) -> Iterator[Pattern[bytes]]: | ||||||||
"""Generates a regex object for each pattern given in input and checks if | """Generates a regex object for each pattern given in input and checks if | ||||||||
the path is a subdirectory or relative to the root path. | the path is a subdirectory or relative to the root path. | ||||||||
Args: | Args: | ||||||||
root_path (bytes): path to the root directory | root_path (bytes): path to the root directory | ||||||||
patterns (list of byte): patterns to match | patterns (list of byte): shell patterns to match | ||||||||
ardumont: Those were documented when we did not have types yet.
So now it's ok to drop those (one less… | |||||||||
Yields: | Yields: | ||||||||
an SRE_Pattern object | an SRE_Pattern object | ||||||||
""" | """ | ||||||||
absolute_root_path = os.path.abspath(root_path) | absolute_root_path = os.path.abspath(root_path) | ||||||||
for pattern in patterns: | for pattern in patterns: | ||||||||
for path in glob.glob(pattern): | if os.path.isabs(pattern): | ||||||||
absolute_path = os.path.abspath(path) | pattern = os.path.relpath(pattern, root_path) | ||||||||
if not absolute_path.startswith(absolute_root_path): | # python 3.10 has a `root_dir` argument for glob, but not the previous | ||||||||
Not Done Inline Actions
ardumont: | |||||||||
# version. So we adjust the pattern | |||||||||
test_pattern = os.path.join(absolute_root_path, pattern) | |||||||||
for path in glob.glob(test_pattern): | |||||||||
if os.path.isabs(path) and not path.startswith(absolute_root_path): | |||||||||
error_msg = ( | error_msg = ( | ||||||||
b'The path "' + path + b'" is not a subdirectory or relative ' | b'The path "' + path + b'" is not a subdirectory or relative ' | ||||||||
b'to the root directory path: "' + root_path + b'"' | b'to the root directory path: "' + root_path + b'"' | ||||||||
) | ) | ||||||||
raise InvalidDirectoryPath(error_msg) | raise InvalidDirectoryPath(error_msg) | ||||||||
regex = fnmatch.translate((pattern.decode())) | regex = fnmatch.translate((pattern.decode())) | ||||||||
yield re.compile(regex.encode()) | yield re.compile(regex.encode()) | ||||||||
def ignore_directories_patterns(root_path: bytes, patterns: Iterable[bytes]): | def ignore_directories_patterns(root_path: bytes, patterns: Iterable[bytes]): | ||||||||
"""Filter for :func:`directory_to_objects` to ignore directories | """Filter for :func:`directory_to_objects` to ignore directories | ||||||||
matching certain patterns. | matching certain patterns. | ||||||||
Args: | Args: | ||||||||
root_path (bytes): path of the root directory | root_path (bytes): path of the root directory | ||||||||
patterns (list of byte): patterns to ignore | patterns (list of bytes): patterns to ignore | ||||||||
Returns: | Returns: | ||||||||
a directory filter for :func:`directory_to_objects` | a directory filter for :func:`directory_to_objects` | ||||||||
""" | """ | ||||||||
sre_patterns = set(extract_regex_objs(root_path, patterns)) | sre_patterns = set(extract_regex_objs(root_path, patterns)) | ||||||||
def pattern_filter( | def pattern_filter( | ||||||||
dirpath: bytes, | dirpath: bytes, | ||||||||
▲ Show 20 Lines • Show All 251 Lines • Show Last 20 Lines |
Those were documented when we did not have types yet.
So now it's ok to drop those (one less typo \o/.