diff --git a/swh/model/cli.py b/swh/model/cli.py --- a/swh/model/cli.py +++ b/swh/model/cli.py @@ -25,6 +25,7 @@ # stub so that swh-identify can be used when swh-core isn't installed swh_cli_group = click # type: ignore +from swh.model.from_disk import Directory from swh.model.identifiers import CoreSWHID, ObjectType CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"]) @@ -74,13 +75,8 @@ ) -def swhid_of_dir(path: bytes, exclude_patterns: List[bytes] = None) -> CoreSWHID: - from swh.model.from_disk import ( - Directory, - accept_all_directories, - ignore_directories_patterns, - ) - from swh.model.hashutil import hash_to_bytes +def model_of_dir(path: bytes, exclude_patterns: List[bytes] = None) -> Directory: + from swh.model.from_disk import accept_all_directories, ignore_directories_patterns dir_filter = ( ignore_directories_patterns(path, exclude_patterns) @@ -88,9 +84,16 @@ else accept_all_directories ) - object = Directory.from_disk(path=path, dir_filter=dir_filter).get_data() + return Directory.from_disk(path=path, dir_filter=dir_filter) + + +def swhid_of_dir(path: bytes, exclude_patterns: List[bytes] = None) -> CoreSWHID: + from swh.model.hashutil import hash_to_bytes + + obj = model_of_dir(path, exclude_patterns) + return CoreSWHID( - object_type=ObjectType.DIRECTORY, object_id=hash_to_bytes(object["id"]) + object_type=ObjectType.DIRECTORY, object_id=hash_to_bytes(obj.get_data()["id"]) ) @@ -227,9 +230,18 @@ type=CoreSWHIDParamType(), help="reference identifier to be compared with computed one", ) +@click.option( + "-r", "--recursive", is_flag=True, help="compute SWHID recursively", +) @click.argument("objects", nargs=-1, required=True) def identify( - obj_type, verify, show_filename, follow_symlinks, objects, exclude_patterns, + obj_type, + verify, + show_filename, + follow_symlinks, + objects, + exclude_patterns, + recursive, ): """Compute the Software Heritage persistent identifier (SWHID) for the given source code object(s). @@ -265,28 +277,48 @@ if verify and len(objects) != 1: raise click.BadParameter("verification requires a single object") - results = zip( - objects, - map( - partial(identify_object, obj_type, follow_symlinks, exclude_patterns), + if verify and recursive: + raise click.BadParameter("recursive objects comparison is not supported") + + if recursive: + if not os.path.isdir(objects[0]): + raise click.BadOptionUsage("recursive option require a directory object") + + path = os.fsencode(objects[0]) + dir_obj = model_of_dir(path, exclude_patterns) + for sub_obj in dir_obj.iter_tree(): + path_name = "path" if "path" in sub_obj.data.keys() else "data" + path = os.fsdecode(sub_obj.data[path_name]) + swhid = str( + CoreSWHID( + object_type=ObjectType[sub_obj.object_type.upper()], + object_id=sub_obj.hash, + ) + ) + click.echo("%s\t%s" % (swhid, path)) + else: + results = zip( objects, - ), - ) + map( + partial(identify_object, obj_type, follow_symlinks, exclude_patterns), + objects, + ), + ) - if verify: - swhid = next(results)[1] - if str(verify) == swhid: - click.echo("SWHID match: %s" % swhid) - sys.exit(0) + if verify: + swhid = next(results)[1] + if str(verify) == swhid: + click.echo("SWHID match: %s" % swhid) + sys.exit(0) + else: + click.echo("SWHID mismatch: %s != %s" % (verify, swhid)) + sys.exit(1) else: - click.echo("SWHID mismatch: %s != %s" % (verify, swhid)) - sys.exit(1) - else: - for (obj, swhid) in results: - msg = swhid - if show_filename: - msg = "%s\t%s" % (swhid, os.fsdecode(obj)) - click.echo(msg) + for (obj, swhid) in results: + msg = swhid + if show_filename: + msg = "%s\t%s" % (swhid, os.fsdecode(obj)) + click.echo(msg) if __name__ == "__main__": diff --git a/swh/model/from_disk.py b/swh/model/from_disk.py --- a/swh/model/from_disk.py +++ b/swh/model/from_disk.py @@ -429,7 +429,7 @@ if dir_filter(path, name, dirs[path].entries): entries[name] = dirs[path] - dirs[root] = cls({"name": os.path.basename(root)}) + dirs[root] = cls({"name": os.path.basename(root), "path": root}) dirs[root].update(entries) return dirs[top_path] diff --git a/swh/model/tests/test_cli.py b/swh/model/tests/test_cli.py --- a/swh/model/tests/test_cli.py +++ b/swh/model/tests/test_cli.py @@ -177,3 +177,36 @@ ) self.assertSWHID(result, "swh:1:dir:e8b0f1466af8608c8a3fb9879db172b887e80759") + + def test_recursive_directory(self): + self.make_from_tarball(self.tmpdir_name) + path = os.path.join(self.tmpdir_name, b"sample-folder") + result = self.runner.invoke(cli.identify, ["--recursive", path]) + self.assertEqual(result.exit_code, 0, result.output) + + expected_swhids = [ + "swh:1:dir:e8b0f1466af8608c8a3fb9879db172b887e80759", + "swh:1:cnt:7d5c08111e21c8a9f71540939998551683375fad", + "swh:1:cnt:68769579c3eaadbe555379b9c3538e6628bae1eb", + "swh:1:cnt:e86b45e538d9b6888c969c89fbd22a85aa0e0366", + "swh:1:dir:3c1f578394f4623f74a0ba7fe761729f59fc6ec4", + "swh:1:dir:c3020f6bf135a38c6df3afeb5fb38232c5e07087", + "swh:1:cnt:133693b125bad2b4ac318535b84901ebb1f6b638", + "swh:1:dir:4b825dc642cb6eb9a060e54bf8d69288fbee4904", + "swh:1:cnt:19102815663d23f8b75a47e7a01965dcdc96468c", + "swh:1:dir:2b41c40f0d1fbffcba12497db71fba83fcca96e5", + "swh:1:cnt:8185dfb2c0c2c597d16f75a8a0c37668567c3d7e", + "swh:1:cnt:7c4c57ba9ff496ad179b8f65b1d286edbda34c9a", + "swh:1:cnt:acac326ddd63b0bc70840659d4ac43619484e69f", + ] + + result = result.output.split() + result_swhids = [] + # get all SWHID from the result + for i in range(0, len(result)): + if i % 2 == 0: + result_swhids.append(result[i]) + + assert len(expected_swhids) == len(result_swhids) + for swhid in expected_swhids: + assert swhid in result_swhids