Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7066160
D4193.id14846.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
5 KB
Subscribers
None
D4193.id14846.diff
View Options
Index: swh/model/cli.py
===================================================================
--- swh/model/cli.py
+++ swh/model/cli.py
@@ -52,11 +52,21 @@
return swhid(CONTENT, object)
-def swhid_of_dir(path):
- from swh.model.from_disk import Directory
+def swhid_of_dir(path, exclude_patterns=None):
+ from swh.model.from_disk import (
+ Directory,
+ accept_all_directories,
+ ignore_directories_patterns,
+ )
from swh.model.identifiers import DIRECTORY, swhid
- object = Directory.from_disk(path=path).get_data()
+ dir_filter = (
+ ignore_directories_patterns(path.decode(), exclude_patterns)
+ if exclude_patterns
+ else accept_all_directories
+ )
+
+ object = Directory.from_disk(path=path, dir_filter=dir_filter).get_data()
return swhid(DIRECTORY, object)
@@ -96,7 +106,7 @@
return str(SWHID(object_type="snapshot", object_id=snapshot_identifier(snapshot)))
-def identify_object(obj_type, follow_symlinks, obj):
+def identify_object(obj_type, follow_symlinks, exclude_patterns, obj):
from urllib.parse import urlparse
if obj_type == "auto":
@@ -125,7 +135,7 @@
if obj_type == "content":
swhid = swhid_of_file(path)
elif obj_type == "directory":
- swhid = swhid_of_dir(path)
+ swhid = swhid_of_dir(path, exclude_patterns)
elif obj_type == "origin":
swhid = swhid_of_origin(obj)
elif obj_type == "snapshot":
@@ -160,6 +170,15 @@
type=click.Choice(["auto", "content", "directory", "origin", "snapshot"]),
help="type of object to identify (default: auto)",
)
+@click.option(
+ "--exclude",
+ "-x",
+ "exclude_patterns",
+ metavar="PATTERN",
+ multiple=True,
+ help="Exclude directories using glob patterns \
+ (e.g., '*.git' to exclude all .git directories)",
+)
@click.option(
"--verify",
"-v",
@@ -168,7 +187,9 @@
help="reference identifier to be compared with computed one",
)
@click.argument("objects", nargs=-1, required=True)
-def identify(obj_type, verify, show_filename, follow_symlinks, objects):
+def identify(
+ obj_type, verify, show_filename, follow_symlinks, objects, exclude_patterns,
+):
"""Compute the Software Heritage persistent identifier (SWHID) for the given
source code object(s).
@@ -203,7 +224,9 @@
if verify and len(objects) != 1:
raise click.BadParameter("verification requires a single object")
- results = map(partial(identify_object, obj_type, follow_symlinks), objects)
+ results = map(
+ partial(identify_object, obj_type, follow_symlinks, exclude_patterns), objects,
+ )
if verify:
swhid = next(results)[1]
Index: swh/model/exceptions.py
===================================================================
--- swh/model/exceptions.py
+++ swh/model/exceptions.py
@@ -129,3 +129,7 @@
def __repr__(self):
return "ValidationError(%s)" % self
+
+
+class InvalidDirectoryPath(Exception):
+ pass
Index: swh/model/from_disk.py
===================================================================
--- swh/model/from_disk.py
+++ swh/model/from_disk.py
@@ -5,15 +5,20 @@
import datetime
import enum
+import fnmatch
+import glob
import os
+from pathlib import Path
+import re
import stat
-from typing import Any, Iterable, List, Optional, Tuple
+from typing import Any, Iterable, Iterator, List, Optional, Pattern, Tuple
import attr
from attrs_strict import type_validator
from typing_extensions import Final
from . import model
+from .exceptions import InvalidDirectoryPath
from .hashutil import MultiHash
from .identifiers import directory_entry_sort_key, directory_identifier
from .identifiers import identifier_to_bytes as id_to_bytes
@@ -276,6 +281,54 @@
return named_filter
+def extract_regex_objs(
+ root_path: Path, patterns: Iterable[str]
+) -> Iterator[Pattern[str]]:
+ """Generates a regex object for each pattern given in input and checks if
+ the path is a subdirectory or relative to the root path.
+
+ Yields:
+ an SRE_Pattern object
+ """
+ for pattern in patterns:
+ for path in glob.glob(pattern):
+ dirpath = Path(path)
+ if root_path not in dirpath.parents:
+ error_msg = (
+ f'The path "{dirpath}" is not a subdirectory or relative '
+ f'to the root directory path: "{root_path}"'
+ )
+ raise InvalidDirectoryPath(error_msg)
+
+ regex = fnmatch.translate((pattern))
+ yield re.compile(regex)
+
+
+def ignore_directories_patterns(root_path: str, patterns: Iterable[str]):
+ """Filter for :func:`directory_to_objects` to ignore directories
+ matching certain patterns.
+
+ Args:
+ patterns (list of str): pattern to ignore
+ Returns:
+ a directory filter for :func:`directory_to_objects`
+ """
+ full_root_path = Path(root_path).absolute()
+ patterns = [str(full_root_path / pattern) for pattern in patterns]
+ sre_patterns = set(extract_regex_objs(full_root_path, patterns))
+
+ def pattern_filter(
+ dirpath: bytes,
+ dirname: bytes,
+ entries: Iterable[Any],
+ patterns: Iterable[Any] = sre_patterns,
+ ):
+ candidate = str(Path(dirpath.decode()).absolute())
+ return not any([pattern.match(candidate) for pattern in patterns])
+
+ return pattern_filter
+
+
def iter_directory(
directory,
) -> Tuple[List[model.Content], List[model.SkippedContent], List[model.Directory]]:
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Nov 4 2024, 8:19 PM (9 w, 1 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3225275
Attached To
D4193: swh identify: add --exclude
Event Timeline
Log In to Comment