Changeset View
Changeset View
Standalone View
Standalone View
swh/model/from_disk.py
# Copyright (C) 2017-2020 The Software Heritage developers | # Copyright (C) 2017-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import datetime | import datetime | ||||
import enum | import enum | ||||
import fnmatch | |||||
import glob | |||||
import os | import os | ||||
from pathlib import Path | |||||
import re | |||||
import stat | import stat | ||||
from typing import Any, Iterable, List, Optional, Tuple | from typing import Any, Iterable, Iterator, List, Optional, Pattern, Tuple | ||||
import attr | import attr | ||||
from attrs_strict import type_validator | from attrs_strict import type_validator | ||||
from typing_extensions import Final | from typing_extensions import Final | ||||
from . import model | from . import model | ||||
from .exceptions import InvalidDirectoryPath | |||||
from .hashutil import MultiHash | from .hashutil import MultiHash | ||||
from .identifiers import directory_entry_sort_key, directory_identifier | from .identifiers import directory_entry_sort_key, directory_identifier | ||||
from .identifiers import identifier_to_bytes as id_to_bytes | from .identifiers import identifier_to_bytes as id_to_bytes | ||||
from .identifiers import identifier_to_str as id_to_str | from .identifiers import identifier_to_str as id_to_str | ||||
from .merkle import MerkleLeaf, MerkleNode | from .merkle import MerkleLeaf, MerkleNode | ||||
@attr.s | @attr.s | ||||
▲ Show 20 Lines • Show All 246 Lines • ▼ Show 20 Lines | ): | ||||
if case_sensitive: | if case_sensitive: | ||||
return dirname not in names | return dirname not in names | ||||
else: | else: | ||||
return dirname.lower() not in names | return dirname.lower() not in names | ||||
return named_filter | return named_filter | ||||
# TODO: `extract_regex_objs` has been copied from `swh.scanner`. In the future | |||||
# `swh.scanner` should use the `swh.model` version and remove its own. | |||||
def extract_regex_objs( | |||||
root_path: Path, patterns: Iterable[str] | |||||
) -> Iterator[Pattern[str]]: | |||||
"""Generates a regex object for each pattern given in input and checks if | |||||
douardda: please document the arguments | |||||
the path is a subdirectory or relative to the root path. | |||||
Yields: | |||||
an SRE_Pattern object | |||||
""" | |||||
for pattern in patterns: | |||||
for path in glob.glob(pattern): | |||||
dirpath = Path(path) | |||||
if root_path not in dirpath.parents: | |||||
error_msg = ( | |||||
f'The path "{dirpath}" is not a subdirectory or relative ' | |||||
f'to the root directory path: "{root_path}"' | |||||
) | |||||
raise InvalidDirectoryPath(error_msg) | |||||
regex = fnmatch.translate((pattern)) | |||||
yield re.compile(regex) | |||||
def ignore_directories_patterns(root_path: str, patterns: Iterable[str]): | |||||
"""Filter for :func:`directory_to_objects` to ignore directories | |||||
matching certain patterns. | |||||
Args: | |||||
patterns (list of str): pattern to ignore | |||||
Done Inline Actionslack the root description in this docstring (even if pretty abvious) douardda: lack the root description in this docstring (even if pretty abvious) | |||||
Returns: | |||||
a directory filter for :func:`directory_to_objects` | |||||
""" | |||||
root = Path(root_path) | |||||
sre_patterns = set(extract_regex_objs(root, patterns)) | |||||
def pattern_filter( | |||||
dirpath: bytes, | |||||
dirname: bytes, | |||||
entries: Iterable[Any], | |||||
patterns: Iterable[Any] = sre_patterns, | |||||
absolute_root_path: Path = root.absolute(), | |||||
): | |||||
full_path = Path(dirpath.decode()).absolute() | |||||
relative_path = full_path.relative_to(absolute_root_path) | |||||
candidate = str(relative_path) | |||||
return not any([pattern.match(candidate) for pattern in patterns]) | |||||
return pattern_filter | |||||
def iter_directory( | def iter_directory( | ||||
directory, | directory, | ||||
) -> Tuple[List[model.Content], List[model.SkippedContent], List[model.Directory]]: | ) -> Tuple[List[model.Content], List[model.SkippedContent], List[model.Directory]]: | ||||
"""Return the directory listing from a disk-memory directory instance. | """Return the directory listing from a disk-memory directory instance. | ||||
Raises: | Raises: | ||||
TypeError in case an unexpected object type is listed. | TypeError in case an unexpected object type is listed. | ||||
▲ Show 20 Lines • Show All 193 Lines • Show Last 20 Lines |
please document the arguments