diff --git a/swh/model/cli.py b/swh/model/cli.py --- a/swh/model/cli.py +++ b/swh/model/cli.py @@ -45,6 +45,11 @@ return pids.persistent_identifier(pids.CONTENT, object) +def pid_of_file_content(data): + object = Content.from_bytes(mode=644, data=data).get_data() + return pids.persistent_identifier(pids.CONTENT, object) + + def pid_of_dir(path): object = Directory.from_disk(path=path).get_data() return pids.persistent_identifier(pids.DIRECTORY, object) @@ -85,7 +90,7 @@ def identify_object(obj_type, follow_symlinks, obj): if obj_type == 'auto': - if os.path.isfile(obj): + if obj == '-' or os.path.isfile(obj): obj_type = 'content' elif os.path.isdir(obj): obj_type = 'directory' @@ -101,7 +106,10 @@ pid = None - if obj_type in ['content', 'directory']: + if obj == '-': + content = sys.stdin.buffer.read() + pid = pid_of_file_content(content) + elif obj_type in ['content', 'directory']: path = obj.encode(sys.getfilesystemencoding()) if follow_symlinks and os.path.islink(obj): path = os.path.realpath(obj) @@ -134,7 +142,7 @@ help='type of object to identify (default: auto)') @click.option('--verify', '-v', metavar='PID', type=PidParamType(), help='reference identifier to be compared with computed one') -@click.argument('objects', nargs=-1, required=True) +@click.argument('objects', nargs=-1) def identify(obj_type, verify, show_filename, follow_symlinks, objects): """Compute the Software Heritage persistent identifier (PID) for the given source code object(s). @@ -163,6 +171,9 @@ swh:1:snp:510aa88bdc517345d258c1fc2babcd0e1f905e93 helloworld.git """ # NoQA # overlong lines in shell examples are fine + if not objects: + objects = ['-'] + if verify and len(objects) != 1: raise click.BadParameter('verification requires a single object') diff --git a/swh/model/tests/test_cli.py b/swh/model/tests/test_cli.py --- a/swh/model/tests/test_cli.py +++ b/swh/model/tests/test_cli.py @@ -37,6 +37,15 @@ self.assertPidOK(result, 'swh:1:cnt:' + hash_to_hex(content['sha1_git'])) + def test_content_id_from_stdin(self): + """identify file content""" + self.make_contents(self.tmpdir_name) + for _, content in self.contents.items(): + result = self.runner.invoke(cli.identify, + input=content['data']) + self.assertPidOK(result, + 'swh:1:cnt:' + hash_to_hex(content['sha1_git'])) + def test_directory_id(self): """identify an entire directory""" self.make_from_tarball(self.tmpdir_name)