Page MenuHomeSoftware Heritage

No OneTemporary

diff --git a/bin/swh-hash-file b/bin/swh-hash-file
deleted file mode 100755
index c30de78..0000000
--- a/bin/swh-hash-file
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/usr/bin/python3
-
-# Copyright (C) 2018 The Software Heritage developers
-# See the AUTHORS file at the top-level directory of this distribution
-# License: GNU General Public License version 3, or any later version
-# See top-level LICENSE file for more information
-
-import sys
-
-from swh.model.from_disk import Content
-from swh.model.hashutil import hash_to_hex
-
-
-HASH_ALGO = 'sha1_git'
-
-
-def hash_file(fname):
- return hash_to_hex(Content.from_file(path=fname.encode()).hash)
-
-
-def main(fnames):
- for f in fnames:
- print(f, hash_file(f), sep='\t')
-
-
-if __name__ == '__main__':
- fnames = sys.argv[1:]
- if not fnames:
- print('Usage: swh-hash-file FILE...')
- sys.exit(2)
-
- main(fnames)
diff --git a/setup.py b/setup.py
index 232f3bc..dd32e44 100644
--- a/setup.py
+++ b/setup.py
@@ -1,37 +1,41 @@
import hashlib
from setuptools import setup, find_packages
def parse_requirements():
requirements = []
for reqf in ('requirements.txt', 'requirements-swh.txt'):
with open(reqf) as f:
for line in f.readlines():
line = line.strip()
if not line or line.startswith('#'):
continue
requirements.append(line)
return requirements
extra_requirements = []
pyblake2_hashes = {'blake2s256', 'blake2b512'}
if pyblake2_hashes - set(hashlib.algorithms_available):
extra_requirements.append('pyblake2')
setup(
name='swh.model',
description='Software Heritage data model',
author='Software Heritage developers',
author_email='swh-devel@inria.fr',
url='https://forge.softwareheritage.org/diffusion/DMOD/',
packages=find_packages(), # packages's modules
scripts=[], # scripts to package
install_requires=parse_requirements() + extra_requirements,
+ entry_points='''
+ [console_scripts]
+ swh-identify=swh.model.cli:identify
+ ''',
setup_requires=['vcversioner'],
vcversioner={},
include_package_data=True,
)
diff --git a/swh/model/cli.py b/swh/model/cli.py
new file mode 100644
index 0000000..9e0471a
--- /dev/null
+++ b/swh/model/cli.py
@@ -0,0 +1,96 @@
+# Copyright (C) 2018 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import click
+import os
+import sys
+
+from swh.model import identifiers as pids
+from swh.model.from_disk import Content, Directory
+
+
+class PidParamType(click.ParamType):
+ name = 'persistent identifier'
+
+ def convert(self, value, param, ctx):
+ try:
+ _parsed_pid = pids.parse_persistent_identifier(value) # noqa
+ return value # return as string, as we need just that
+ except Exception:
+ # TODO catch more specific parsing exception. Requires
+ # https://forge.softwareheritage.org/T1104 to be addressed first.
+ self.fail('%s is not a valid PID' % value, param, ctx)
+
+
+def pid_of_file(path):
+ object = Content.from_file(path=path).get_data()
+ return pids.persistent_identifier(pids.CONTENT, object)
+
+
+def pid_of_dir(path):
+ object = Directory.from_disk(path=path).get_data()
+ return pids.persistent_identifier(pids.DIRECTORY, object)
+
+
+@click.command()
+@click.option('--type', '-t', default='auto',
+ type=click.Choice(['auto', 'content', 'directory']),
+ help='type of object to identify (default: auto)')
+@click.option('--verify', '-v', metavar='PID', type=PidParamType(),
+ help='reference identifier to be compared with computed one')
+@click.argument('object',
+ type=click.Path(exists=True, readable=True,
+ allow_dash=True, path_type=bytes))
+def identify(type, verify, object):
+ """Compute the Software Heritage persistent identifier (PID) for a given
+ source code object.
+
+ For more details about Software Heritage PIDs see:
+
+ \b
+ https://docs.softwareheritage.org/devel/swh-model/persistent-identifiers.html
+
+ \b
+ Examples:
+
+ \b
+ $ swh-identify /usr/src/linux/kernel/
+ swh:1:dir:f9f858a48d663b3809c9e2f336412717496202ab
+
+ \b
+ $ swh-identify /usr/src/linux/kernel/sched/deadline.c
+ swh:1:cnt:57b939c81bce5d06fa587df8915f05affbe22b82
+
+ """
+ if type == 'auto':
+ if os.path.isfile(object):
+ type = 'content'
+ elif os.path.isdir(object):
+ type = 'directory'
+ else: # shouldn't happen, due to path validation
+ raise click.BadParameter('%s is neither a file nor a directory' %
+ object)
+
+ pid = None
+ if type == 'content':
+ pid = pid_of_file(object)
+ elif type == 'directory':
+ pid = pid_of_dir(object)
+ else: # shouldn't happen, due to option validation
+ raise click.BadParameter('invalid object type: ' + type)
+
+ if verify:
+ if verify == pid:
+ click.echo('PID match: %s' % pid)
+ sys.exit(0)
+ else:
+ click.echo('PID mismatch: %s != %s' % (verify, pid))
+ sys.exit(1)
+ else:
+ click.echo(pid)
+
+
+if __name__ == '__main__':
+ identify()
diff --git a/swh/model/tests/test_cli.py b/swh/model/tests/test_cli.py
new file mode 100644
index 0000000..5612c9c
--- /dev/null
+++ b/swh/model/tests/test_cli.py
@@ -0,0 +1,71 @@
+# Copyright (C) 2018 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import os
+import tempfile
+import unittest
+
+from click.testing import CliRunner
+
+from swh.model import cli
+from swh.model.tests.test_from_disk import DataMixin
+from swh.model.hashutil import hash_to_hex
+
+
+class TestIdentify(DataMixin, unittest.TestCase):
+
+ def setUp(self):
+ super().setUp()
+ self.runner = CliRunner()
+
+ def test_content_id(self):
+ self.make_contents(self.tmpdir_name)
+ for filename, content in self.contents.items():
+ path = os.path.join(self.tmpdir_name, filename)
+ result = self.runner.invoke(cli.identify,
+ ['--type', 'content', path])
+
+ self.assertEqual(result.exit_code, 0)
+ self.assertEqual(result.output.rstrip(),
+ 'swh:1:cnt:' + hash_to_hex(content['sha1_git']))
+
+ def test_directory_id(self):
+ self.make_from_tarball(self.tmpdir_name)
+ path = os.path.join(self.tmpdir_name, b'sample-folder')
+ result = self.runner.invoke(cli.identify,
+ ['--type', 'directory', path])
+
+ self.assertEqual(result.exit_code, 0)
+ self.assertEqual(result.output.rstrip(),
+ 'swh:1:dir:e8b0f1466af8608c8a3fb9879db172b887e80759')
+
+ def test_auto_id(self):
+ with tempfile.NamedTemporaryFile(prefix='swh.model.cli') as f:
+ result = self.runner.invoke(cli.identify, [f.name])
+ self.assertEqual(result.exit_code, 0)
+ self.assertRegex(result.output, r'^swh:\d+:cnt:')
+
+ with tempfile.TemporaryDirectory(prefix='swh.model.cli') as dirname:
+ result = self.runner.invoke(cli.identify, [dirname])
+ self.assertEqual(result.exit_code, 0)
+ self.assertRegex(result.output, r'^swh:\d+:dir:')
+
+ def test_verify_content(self):
+ self.make_contents(self.tmpdir_name)
+ for filename, content in self.contents.items():
+ expected_id = 'swh:1:cnt:' + hash_to_hex(content['sha1_git'])
+
+ # match
+ path = os.path.join(self.tmpdir_name, filename)
+ result = self.runner.invoke(cli.identify,
+ ['--verify', expected_id, path])
+ self.assertEqual(result.exit_code, 0)
+
+ # mismatch
+ with open(path, 'a') as f:
+ f.write('trailing garbage to make verification fail')
+ result = self.runner.invoke(cli.identify,
+ ['--verify', expected_id, path])
+ self.assertEqual(result.exit_code, 1)

File Metadata

Mime Type
text/x-diff
Expires
Jul 4 2025, 8:13 AM (9 w, 6 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3272652

Event Timeline