Page MenuHomeSoftware Heritage

D5825.diff
No OneTemporary

D5825.diff

diff --git a/swh/model/cli.py b/swh/model/cli.py
--- a/swh/model/cli.py
+++ b/swh/model/cli.py
@@ -25,6 +25,7 @@
# stub so that swh-identify can be used when swh-core isn't installed
swh_cli_group = click # type: ignore
+from swh.model.from_disk import Directory
from swh.model.identifiers import CoreSWHID, ObjectType
CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"])
@@ -74,13 +75,8 @@
)
-def swhid_of_dir(path: bytes, exclude_patterns: List[bytes] = None) -> CoreSWHID:
- from swh.model.from_disk import (
- Directory,
- accept_all_directories,
- ignore_directories_patterns,
- )
- from swh.model.hashutil import hash_to_bytes
+def model_of_dir(path: bytes, exclude_patterns: List[bytes] = None) -> Directory:
+ from swh.model.from_disk import accept_all_directories, ignore_directories_patterns
dir_filter = (
ignore_directories_patterns(path, exclude_patterns)
@@ -88,9 +84,16 @@
else accept_all_directories
)
- object = Directory.from_disk(path=path, dir_filter=dir_filter).get_data()
+ return Directory.from_disk(path=path, dir_filter=dir_filter)
+
+
+def swhid_of_dir(path: bytes, exclude_patterns: List[bytes] = None) -> CoreSWHID:
+ from swh.model.hashutil import hash_to_bytes
+
+ obj = model_of_dir(path, exclude_patterns)
+
return CoreSWHID(
- object_type=ObjectType.DIRECTORY, object_id=hash_to_bytes(object["id"])
+ object_type=ObjectType.DIRECTORY, object_id=hash_to_bytes(obj.get_data()["id"])
)
@@ -227,9 +230,18 @@
type=CoreSWHIDParamType(),
help="reference identifier to be compared with computed one",
)
+@click.option(
+ "-r", "--recursive", is_flag=True, help="compute SWHID recursively",
+)
@click.argument("objects", nargs=-1, required=True)
def identify(
- obj_type, verify, show_filename, follow_symlinks, objects, exclude_patterns,
+ obj_type,
+ verify,
+ show_filename,
+ follow_symlinks,
+ objects,
+ exclude_patterns,
+ recursive,
):
"""Compute the Software Heritage persistent identifier (SWHID) for the given
source code object(s).
@@ -261,32 +273,62 @@
""" # NoQA # overlong lines in shell examples are fine
from functools import partial
+ import logging
if verify and len(objects) != 1:
raise click.BadParameter("verification requires a single object")
- results = zip(
- objects,
- map(
- partial(identify_object, obj_type, follow_symlinks, exclude_patterns),
+ if recursive and not os.path.isdir(objects[0]):
+ recursive = False
+ logging.warn("recursive option disabled, input is not a directory object")
+
+ if recursive:
+ if verify:
+ raise click.BadParameter(
+ "verification of recursive object identification is not supported"
+ )
+
+ if not obj_type == ("auto" or "directory"):
+ raise click.BadParameter(
+ "recursive identification is supported only for directories"
+ )
+
+ path = os.fsencode(objects[0])
+ dir_obj = model_of_dir(path, exclude_patterns)
+ for sub_obj in dir_obj.iter_tree():
+ path_name = "path" if "path" in sub_obj.data.keys() else "data"
+ path = os.fsdecode(sub_obj.data[path_name])
+ swhid = str(
+ CoreSWHID(
+ object_type=ObjectType[sub_obj.object_type.upper()],
+ object_id=sub_obj.hash,
+ )
+ )
+ msg = f"{swhid}\t{path}" if show_filename else f"{swhid}"
+ click.echo(msg)
+ else:
+ results = zip(
objects,
- ),
- )
+ map(
+ partial(identify_object, obj_type, follow_symlinks, exclude_patterns),
+ objects,
+ ),
+ )
- if verify:
- swhid = next(results)[1]
- if str(verify) == swhid:
- click.echo("SWHID match: %s" % swhid)
- sys.exit(0)
+ if verify:
+ swhid = next(results)[1]
+ if str(verify) == swhid:
+ click.echo("SWHID match: %s" % swhid)
+ sys.exit(0)
+ else:
+ click.echo("SWHID mismatch: %s != %s" % (verify, swhid))
+ sys.exit(1)
else:
- click.echo("SWHID mismatch: %s != %s" % (verify, swhid))
- sys.exit(1)
- else:
- for (obj, swhid) in results:
- msg = swhid
- if show_filename:
- msg = "%s\t%s" % (swhid, os.fsdecode(obj))
- click.echo(msg)
+ for (obj, swhid) in results:
+ msg = swhid
+ if show_filename:
+ msg = "%s\t%s" % (swhid, os.fsdecode(obj))
+ click.echo(msg)
if __name__ == "__main__":
diff --git a/swh/model/from_disk.py b/swh/model/from_disk.py
--- a/swh/model/from_disk.py
+++ b/swh/model/from_disk.py
@@ -429,7 +429,7 @@
if dir_filter(path, name, dirs[path].entries):
entries[name] = dirs[path]
- dirs[root] = cls({"name": os.path.basename(root)})
+ dirs[root] = cls({"name": os.path.basename(root), "path": root})
dirs[root].update(entries)
return dirs[top_path]
diff --git a/swh/model/tests/swh_model_data.py b/swh/model/tests/swh_model_data.py
--- a/swh/model/tests/swh_model_data.py
+++ b/swh/model/tests/swh_model_data.py
@@ -361,3 +361,19 @@
"snapshot": SNAPSHOTS,
"skipped_content": SKIPPED_CONTENTS,
}
+
+SAMPLE_FOLDER_SWHIDS = [
+ "swh:1:dir:e8b0f1466af8608c8a3fb9879db172b887e80759",
+ "swh:1:cnt:7d5c08111e21c8a9f71540939998551683375fad",
+ "swh:1:cnt:68769579c3eaadbe555379b9c3538e6628bae1eb",
+ "swh:1:cnt:e86b45e538d9b6888c969c89fbd22a85aa0e0366",
+ "swh:1:dir:3c1f578394f4623f74a0ba7fe761729f59fc6ec4",
+ "swh:1:dir:c3020f6bf135a38c6df3afeb5fb38232c5e07087",
+ "swh:1:cnt:133693b125bad2b4ac318535b84901ebb1f6b638",
+ "swh:1:dir:4b825dc642cb6eb9a060e54bf8d69288fbee4904",
+ "swh:1:cnt:19102815663d23f8b75a47e7a01965dcdc96468c",
+ "swh:1:dir:2b41c40f0d1fbffcba12497db71fba83fcca96e5",
+ "swh:1:cnt:8185dfb2c0c2c597d16f75a8a0c37668567c3d7e",
+ "swh:1:cnt:7c4c57ba9ff496ad179b8f65b1d286edbda34c9a",
+ "swh:1:cnt:acac326ddd63b0bc70840659d4ac43619484e69f",
+]
diff --git a/swh/model/tests/test_cli.py b/swh/model/tests/test_cli.py
--- a/swh/model/tests/test_cli.py
+++ b/swh/model/tests/test_cli.py
@@ -15,6 +15,7 @@
from swh.model import cli
from swh.model.hashutil import hash_to_hex
+from swh.model.tests.swh_model_data import SAMPLE_FOLDER_SWHIDS
from swh.model.tests.test_from_disk import DataMixin
@@ -177,3 +178,34 @@
)
self.assertSWHID(result, "swh:1:dir:e8b0f1466af8608c8a3fb9879db172b887e80759")
+
+ def test_recursive_directory(self):
+ self.make_from_tarball(self.tmpdir_name)
+ path = os.path.join(self.tmpdir_name, b"sample-folder")
+ result = self.runner.invoke(cli.identify, ["--recursive", path])
+ self.assertEqual(result.exit_code, 0, result.output)
+
+ result = result.output.split()
+ result_swhids = []
+ # get all SWHID from the result
+ for i in range(0, len(result)):
+ if i % 2 == 0:
+ result_swhids.append(result[i])
+
+ assert len(result_swhids) == len(SAMPLE_FOLDER_SWHIDS)
+ for swhid in SAMPLE_FOLDER_SWHIDS:
+ assert swhid in result_swhids
+
+ def test_recursive_directory_no_filename(self):
+ self.make_from_tarball(self.tmpdir_name)
+ path = os.path.join(self.tmpdir_name, b"sample-folder")
+ result = self.runner.invoke(
+ cli.identify, ["--recursive", "--no-filename", path]
+ )
+ self.assertEqual(result.exit_code, 0, result.output)
+
+ result_swhids = result.output.split()
+
+ assert len(result_swhids) == len(SAMPLE_FOLDER_SWHIDS)
+ for swhid in SAMPLE_FOLDER_SWHIDS:
+ assert swhid in result_swhids

File Metadata

Mime Type
text/plain
Expires
Dec 21 2024, 11:43 AM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3217004

Event Timeline