Page MenuHomeSoftware Heritage

D4193.id14846.diff
No OneTemporary

D4193.id14846.diff

Index: swh/model/cli.py
===================================================================
--- swh/model/cli.py
+++ swh/model/cli.py
@@ -52,11 +52,21 @@
return swhid(CONTENT, object)
-def swhid_of_dir(path):
- from swh.model.from_disk import Directory
+def swhid_of_dir(path, exclude_patterns=None):
+ from swh.model.from_disk import (
+ Directory,
+ accept_all_directories,
+ ignore_directories_patterns,
+ )
from swh.model.identifiers import DIRECTORY, swhid
- object = Directory.from_disk(path=path).get_data()
+ dir_filter = (
+ ignore_directories_patterns(path.decode(), exclude_patterns)
+ if exclude_patterns
+ else accept_all_directories
+ )
+
+ object = Directory.from_disk(path=path, dir_filter=dir_filter).get_data()
return swhid(DIRECTORY, object)
@@ -96,7 +106,7 @@
return str(SWHID(object_type="snapshot", object_id=snapshot_identifier(snapshot)))
-def identify_object(obj_type, follow_symlinks, obj):
+def identify_object(obj_type, follow_symlinks, exclude_patterns, obj):
from urllib.parse import urlparse
if obj_type == "auto":
@@ -125,7 +135,7 @@
if obj_type == "content":
swhid = swhid_of_file(path)
elif obj_type == "directory":
- swhid = swhid_of_dir(path)
+ swhid = swhid_of_dir(path, exclude_patterns)
elif obj_type == "origin":
swhid = swhid_of_origin(obj)
elif obj_type == "snapshot":
@@ -160,6 +170,15 @@
type=click.Choice(["auto", "content", "directory", "origin", "snapshot"]),
help="type of object to identify (default: auto)",
)
+@click.option(
+ "--exclude",
+ "-x",
+ "exclude_patterns",
+ metavar="PATTERN",
+ multiple=True,
+ help="Exclude directories using glob patterns \
+ (e.g., '*.git' to exclude all .git directories)",
+)
@click.option(
"--verify",
"-v",
@@ -168,7 +187,9 @@
help="reference identifier to be compared with computed one",
)
@click.argument("objects", nargs=-1, required=True)
-def identify(obj_type, verify, show_filename, follow_symlinks, objects):
+def identify(
+ obj_type, verify, show_filename, follow_symlinks, objects, exclude_patterns,
+):
"""Compute the Software Heritage persistent identifier (SWHID) for the given
source code object(s).
@@ -203,7 +224,9 @@
if verify and len(objects) != 1:
raise click.BadParameter("verification requires a single object")
- results = map(partial(identify_object, obj_type, follow_symlinks), objects)
+ results = map(
+ partial(identify_object, obj_type, follow_symlinks, exclude_patterns), objects,
+ )
if verify:
swhid = next(results)[1]
Index: swh/model/exceptions.py
===================================================================
--- swh/model/exceptions.py
+++ swh/model/exceptions.py
@@ -129,3 +129,7 @@
def __repr__(self):
return "ValidationError(%s)" % self
+
+
+class InvalidDirectoryPath(Exception):
+ pass
Index: swh/model/from_disk.py
===================================================================
--- swh/model/from_disk.py
+++ swh/model/from_disk.py
@@ -5,15 +5,20 @@
import datetime
import enum
+import fnmatch
+import glob
import os
+from pathlib import Path
+import re
import stat
-from typing import Any, Iterable, List, Optional, Tuple
+from typing import Any, Iterable, Iterator, List, Optional, Pattern, Tuple
import attr
from attrs_strict import type_validator
from typing_extensions import Final
from . import model
+from .exceptions import InvalidDirectoryPath
from .hashutil import MultiHash
from .identifiers import directory_entry_sort_key, directory_identifier
from .identifiers import identifier_to_bytes as id_to_bytes
@@ -276,6 +281,54 @@
return named_filter
+def extract_regex_objs(
+ root_path: Path, patterns: Iterable[str]
+) -> Iterator[Pattern[str]]:
+ """Generates a regex object for each pattern given in input and checks if
+ the path is a subdirectory or relative to the root path.
+
+ Yields:
+ an SRE_Pattern object
+ """
+ for pattern in patterns:
+ for path in glob.glob(pattern):
+ dirpath = Path(path)
+ if root_path not in dirpath.parents:
+ error_msg = (
+ f'The path "{dirpath}" is not a subdirectory or relative '
+ f'to the root directory path: "{root_path}"'
+ )
+ raise InvalidDirectoryPath(error_msg)
+
+ regex = fnmatch.translate((pattern))
+ yield re.compile(regex)
+
+
+def ignore_directories_patterns(root_path: str, patterns: Iterable[str]):
+ """Filter for :func:`directory_to_objects` to ignore directories
+ matching certain patterns.
+
+ Args:
+ patterns (list of str): pattern to ignore
+ Returns:
+ a directory filter for :func:`directory_to_objects`
+ """
+ full_root_path = Path(root_path).absolute()
+ patterns = [str(full_root_path / pattern) for pattern in patterns]
+ sre_patterns = set(extract_regex_objs(full_root_path, patterns))
+
+ def pattern_filter(
+ dirpath: bytes,
+ dirname: bytes,
+ entries: Iterable[Any],
+ patterns: Iterable[Any] = sre_patterns,
+ ):
+ candidate = str(Path(dirpath.decode()).absolute())
+ return not any([pattern.match(candidate) for pattern in patterns])
+
+ return pattern_filter
+
+
def iter_directory(
directory,
) -> Tuple[List[model.Content], List[model.SkippedContent], List[model.Directory]]:

File Metadata

Mime Type
text/plain
Expires
Nov 4 2024, 8:19 PM (9 w, 1 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3225275

Event Timeline