diff --git a/swh/model/cli.py b/swh/model/cli.py --- a/swh/model/cli.py +++ b/swh/model/cli.py @@ -25,6 +25,7 @@ # stub so that swh-identify can be used when swh-core isn't installed swh_cli_group = click # type: ignore +from swh.model.from_disk import Directory from swh.model.identifiers import CoreSWHID, ObjectType CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"]) @@ -74,13 +75,8 @@ ) -def swhid_of_dir(path: bytes, exclude_patterns: List[bytes] = None) -> CoreSWHID: - from swh.model.from_disk import ( - Directory, - accept_all_directories, - ignore_directories_patterns, - ) - from swh.model.hashutil import hash_to_bytes +def model_of_dir(path: bytes, exclude_patterns: List[bytes] = None) -> Directory: + from swh.model.from_disk import accept_all_directories, ignore_directories_patterns dir_filter = ( ignore_directories_patterns(path, exclude_patterns) @@ -88,9 +84,16 @@ else accept_all_directories ) - object = Directory.from_disk(path=path, dir_filter=dir_filter).get_data() + return Directory.from_disk(path=path, dir_filter=dir_filter) + + +def swhid_of_dir(path: bytes, exclude_patterns: List[bytes] = None) -> CoreSWHID: + from swh.model.hashutil import hash_to_bytes + + obj = model_of_dir(path, exclude_patterns) + return CoreSWHID( - object_type=ObjectType.DIRECTORY, object_id=hash_to_bytes(object["id"]) + object_type=ObjectType.DIRECTORY, object_id=hash_to_bytes(obj.get_data()["id"]) ) @@ -227,9 +230,18 @@ type=CoreSWHIDParamType(), help="reference identifier to be compared with computed one", ) +@click.option( + "-r", "--recursive", is_flag=True, help="compute SWHID recursively", +) @click.argument("objects", nargs=-1, required=True) def identify( - obj_type, verify, show_filename, follow_symlinks, objects, exclude_patterns, + obj_type, + verify, + show_filename, + follow_symlinks, + objects, + exclude_patterns, + recursive, ): """Compute the Software Heritage persistent identifier (SWHID) for the given source code object(s). @@ -261,32 +273,62 @@ """ # NoQA # overlong lines in shell examples are fine from functools import partial + import logging if verify and len(objects) != 1: raise click.BadParameter("verification requires a single object") - results = zip( - objects, - map( - partial(identify_object, obj_type, follow_symlinks, exclude_patterns), + if recursive and not os.path.isdir(objects[0]): + recursive = False + logging.warn("recursive option disabled, input is not a directory object") + + if recursive: + if verify: + raise click.BadParameter( + "verification of recursive object identification is not supported" + ) + + if not obj_type == ("auto" or "directory"): + raise click.BadParameter( + "recursive identification is supported only for directories" + ) + + path = os.fsencode(objects[0]) + dir_obj = model_of_dir(path, exclude_patterns) + for sub_obj in dir_obj.iter_tree(): + path_name = "path" if "path" in sub_obj.data.keys() else "data" + path = os.fsdecode(sub_obj.data[path_name]) + swhid = str( + CoreSWHID( + object_type=ObjectType[sub_obj.object_type.upper()], + object_id=sub_obj.hash, + ) + ) + msg = f"{swhid}\t{path}" if show_filename else f"{swhid}" + click.echo(msg) + else: + results = zip( objects, - ), - ) + map( + partial(identify_object, obj_type, follow_symlinks, exclude_patterns), + objects, + ), + ) - if verify: - swhid = next(results)[1] - if str(verify) == swhid: - click.echo("SWHID match: %s" % swhid) - sys.exit(0) + if verify: + swhid = next(results)[1] + if str(verify) == swhid: + click.echo("SWHID match: %s" % swhid) + sys.exit(0) + else: + click.echo("SWHID mismatch: %s != %s" % (verify, swhid)) + sys.exit(1) else: - click.echo("SWHID mismatch: %s != %s" % (verify, swhid)) - sys.exit(1) - else: - for (obj, swhid) in results: - msg = swhid - if show_filename: - msg = "%s\t%s" % (swhid, os.fsdecode(obj)) - click.echo(msg) + for (obj, swhid) in results: + msg = swhid + if show_filename: + msg = "%s\t%s" % (swhid, os.fsdecode(obj)) + click.echo(msg) if __name__ == "__main__": diff --git a/swh/model/from_disk.py b/swh/model/from_disk.py --- a/swh/model/from_disk.py +++ b/swh/model/from_disk.py @@ -429,7 +429,7 @@ if dir_filter(path, name, dirs[path].entries): entries[name] = dirs[path] - dirs[root] = cls({"name": os.path.basename(root)}) + dirs[root] = cls({"name": os.path.basename(root), "path": root}) dirs[root].update(entries) return dirs[top_path] diff --git a/swh/model/tests/swh_model_data.py b/swh/model/tests/swh_model_data.py --- a/swh/model/tests/swh_model_data.py +++ b/swh/model/tests/swh_model_data.py @@ -361,3 +361,19 @@ "snapshot": SNAPSHOTS, "skipped_content": SKIPPED_CONTENTS, } + +SAMPLE_FOLDER_SWHIDS = [ + "swh:1:dir:e8b0f1466af8608c8a3fb9879db172b887e80759", + "swh:1:cnt:7d5c08111e21c8a9f71540939998551683375fad", + "swh:1:cnt:68769579c3eaadbe555379b9c3538e6628bae1eb", + "swh:1:cnt:e86b45e538d9b6888c969c89fbd22a85aa0e0366", + "swh:1:dir:3c1f578394f4623f74a0ba7fe761729f59fc6ec4", + "swh:1:dir:c3020f6bf135a38c6df3afeb5fb38232c5e07087", + "swh:1:cnt:133693b125bad2b4ac318535b84901ebb1f6b638", + "swh:1:dir:4b825dc642cb6eb9a060e54bf8d69288fbee4904", + "swh:1:cnt:19102815663d23f8b75a47e7a01965dcdc96468c", + "swh:1:dir:2b41c40f0d1fbffcba12497db71fba83fcca96e5", + "swh:1:cnt:8185dfb2c0c2c597d16f75a8a0c37668567c3d7e", + "swh:1:cnt:7c4c57ba9ff496ad179b8f65b1d286edbda34c9a", + "swh:1:cnt:acac326ddd63b0bc70840659d4ac43619484e69f", +] diff --git a/swh/model/tests/test_cli.py b/swh/model/tests/test_cli.py --- a/swh/model/tests/test_cli.py +++ b/swh/model/tests/test_cli.py @@ -15,6 +15,7 @@ from swh.model import cli from swh.model.hashutil import hash_to_hex +from swh.model.tests.swh_model_data import SAMPLE_FOLDER_SWHIDS from swh.model.tests.test_from_disk import DataMixin @@ -177,3 +178,34 @@ ) self.assertSWHID(result, "swh:1:dir:e8b0f1466af8608c8a3fb9879db172b887e80759") + + def test_recursive_directory(self): + self.make_from_tarball(self.tmpdir_name) + path = os.path.join(self.tmpdir_name, b"sample-folder") + result = self.runner.invoke(cli.identify, ["--recursive", path]) + self.assertEqual(result.exit_code, 0, result.output) + + result = result.output.split() + result_swhids = [] + # get all SWHID from the result + for i in range(0, len(result)): + if i % 2 == 0: + result_swhids.append(result[i]) + + assert len(result_swhids) == len(SAMPLE_FOLDER_SWHIDS) + for swhid in SAMPLE_FOLDER_SWHIDS: + assert swhid in result_swhids + + def test_recursive_directory_no_filename(self): + self.make_from_tarball(self.tmpdir_name) + path = os.path.join(self.tmpdir_name, b"sample-folder") + result = self.runner.invoke( + cli.identify, ["--recursive", "--no-filename", path] + ) + self.assertEqual(result.exit_code, 0, result.output) + + result_swhids = result.output.split() + + assert len(result_swhids) == len(SAMPLE_FOLDER_SWHIDS) + for swhid in SAMPLE_FOLDER_SWHIDS: + assert swhid in result_swhids