diff --git a/bin/swh-hash-file b/bin/swh-hash-file deleted file mode 100755 index c30de78..0000000 --- a/bin/swh-hash-file +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/python3 - -# Copyright (C) 2018 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -import sys - -from swh.model.from_disk import Content -from swh.model.hashutil import hash_to_hex - - -HASH_ALGO = 'sha1_git' - - -def hash_file(fname): - return hash_to_hex(Content.from_file(path=fname.encode()).hash) - - -def main(fnames): - for f in fnames: - print(f, hash_file(f), sep='\t') - - -if __name__ == '__main__': - fnames = sys.argv[1:] - if not fnames: - print('Usage: swh-hash-file FILE...') - sys.exit(2) - - main(fnames) diff --git a/setup.py b/setup.py index 232f3bc..dd32e44 100644 --- a/setup.py +++ b/setup.py @@ -1,37 +1,41 @@ import hashlib from setuptools import setup, find_packages def parse_requirements(): requirements = [] for reqf in ('requirements.txt', 'requirements-swh.txt'): with open(reqf) as f: for line in f.readlines(): line = line.strip() if not line or line.startswith('#'): continue requirements.append(line) return requirements extra_requirements = [] pyblake2_hashes = {'blake2s256', 'blake2b512'} if pyblake2_hashes - set(hashlib.algorithms_available): extra_requirements.append('pyblake2') setup( name='swh.model', description='Software Heritage data model', author='Software Heritage developers', author_email='swh-devel@inria.fr', url='https://forge.softwareheritage.org/diffusion/DMOD/', packages=find_packages(), # packages's modules scripts=[], # scripts to package install_requires=parse_requirements() + extra_requirements, + entry_points=''' + [console_scripts] + swh-identify=swh.model.cli:identify + ''', setup_requires=['vcversioner'], vcversioner={}, include_package_data=True, ) diff --git a/swh/model/cli.py b/swh/model/cli.py new file mode 100644 index 0000000..9e0471a --- /dev/null +++ b/swh/model/cli.py @@ -0,0 +1,96 @@ +# Copyright (C) 2018 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import click +import os +import sys + +from swh.model import identifiers as pids +from swh.model.from_disk import Content, Directory + + +class PidParamType(click.ParamType): + name = 'persistent identifier' + + def convert(self, value, param, ctx): + try: + _parsed_pid = pids.parse_persistent_identifier(value) # noqa + return value # return as string, as we need just that + except Exception: + # TODO catch more specific parsing exception. Requires + # https://forge.softwareheritage.org/T1104 to be addressed first. + self.fail('%s is not a valid PID' % value, param, ctx) + + +def pid_of_file(path): + object = Content.from_file(path=path).get_data() + return pids.persistent_identifier(pids.CONTENT, object) + + +def pid_of_dir(path): + object = Directory.from_disk(path=path).get_data() + return pids.persistent_identifier(pids.DIRECTORY, object) + + +@click.command() +@click.option('--type', '-t', default='auto', + type=click.Choice(['auto', 'content', 'directory']), + help='type of object to identify (default: auto)') +@click.option('--verify', '-v', metavar='PID', type=PidParamType(), + help='reference identifier to be compared with computed one') +@click.argument('object', + type=click.Path(exists=True, readable=True, + allow_dash=True, path_type=bytes)) +def identify(type, verify, object): + """Compute the Software Heritage persistent identifier (PID) for a given + source code object. + + For more details about Software Heritage PIDs see: + + \b + https://docs.softwareheritage.org/devel/swh-model/persistent-identifiers.html + + \b + Examples: + + \b + $ swh-identify /usr/src/linux/kernel/ + swh:1:dir:f9f858a48d663b3809c9e2f336412717496202ab + + \b + $ swh-identify /usr/src/linux/kernel/sched/deadline.c + swh:1:cnt:57b939c81bce5d06fa587df8915f05affbe22b82 + + """ + if type == 'auto': + if os.path.isfile(object): + type = 'content' + elif os.path.isdir(object): + type = 'directory' + else: # shouldn't happen, due to path validation + raise click.BadParameter('%s is neither a file nor a directory' % + object) + + pid = None + if type == 'content': + pid = pid_of_file(object) + elif type == 'directory': + pid = pid_of_dir(object) + else: # shouldn't happen, due to option validation + raise click.BadParameter('invalid object type: ' + type) + + if verify: + if verify == pid: + click.echo('PID match: %s' % pid) + sys.exit(0) + else: + click.echo('PID mismatch: %s != %s' % (verify, pid)) + sys.exit(1) + else: + click.echo(pid) + + +if __name__ == '__main__': + identify() diff --git a/swh/model/tests/test_cli.py b/swh/model/tests/test_cli.py new file mode 100644 index 0000000..5612c9c --- /dev/null +++ b/swh/model/tests/test_cli.py @@ -0,0 +1,71 @@ +# Copyright (C) 2018 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import os +import tempfile +import unittest + +from click.testing import CliRunner + +from swh.model import cli +from swh.model.tests.test_from_disk import DataMixin +from swh.model.hashutil import hash_to_hex + + +class TestIdentify(DataMixin, unittest.TestCase): + + def setUp(self): + super().setUp() + self.runner = CliRunner() + + def test_content_id(self): + self.make_contents(self.tmpdir_name) + for filename, content in self.contents.items(): + path = os.path.join(self.tmpdir_name, filename) + result = self.runner.invoke(cli.identify, + ['--type', 'content', path]) + + self.assertEqual(result.exit_code, 0) + self.assertEqual(result.output.rstrip(), + 'swh:1:cnt:' + hash_to_hex(content['sha1_git'])) + + def test_directory_id(self): + self.make_from_tarball(self.tmpdir_name) + path = os.path.join(self.tmpdir_name, b'sample-folder') + result = self.runner.invoke(cli.identify, + ['--type', 'directory', path]) + + self.assertEqual(result.exit_code, 0) + self.assertEqual(result.output.rstrip(), + 'swh:1:dir:e8b0f1466af8608c8a3fb9879db172b887e80759') + + def test_auto_id(self): + with tempfile.NamedTemporaryFile(prefix='swh.model.cli') as f: + result = self.runner.invoke(cli.identify, [f.name]) + self.assertEqual(result.exit_code, 0) + self.assertRegex(result.output, r'^swh:\d+:cnt:') + + with tempfile.TemporaryDirectory(prefix='swh.model.cli') as dirname: + result = self.runner.invoke(cli.identify, [dirname]) + self.assertEqual(result.exit_code, 0) + self.assertRegex(result.output, r'^swh:\d+:dir:') + + def test_verify_content(self): + self.make_contents(self.tmpdir_name) + for filename, content in self.contents.items(): + expected_id = 'swh:1:cnt:' + hash_to_hex(content['sha1_git']) + + # match + path = os.path.join(self.tmpdir_name, filename) + result = self.runner.invoke(cli.identify, + ['--verify', expected_id, path]) + self.assertEqual(result.exit_code, 0) + + # mismatch + with open(path, 'a') as f: + f.write('trailing garbage to make verification fail') + result = self.runner.invoke(cli.identify, + ['--verify', expected_id, path]) + self.assertEqual(result.exit_code, 1)