Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9337670
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
8 KB
Subscribers
None
View Options
diff --git a/bin/swh-hash-file b/bin/swh-hash-file
deleted file mode 100755
index c30de78..0000000
--- a/bin/swh-hash-file
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/usr/bin/python3
-
-# Copyright (C) 2018 The Software Heritage developers
-# See the AUTHORS file at the top-level directory of this distribution
-# License: GNU General Public License version 3, or any later version
-# See top-level LICENSE file for more information
-
-import sys
-
-from swh.model.from_disk import Content
-from swh.model.hashutil import hash_to_hex
-
-
-HASH_ALGO = 'sha1_git'
-
-
-def hash_file(fname):
- return hash_to_hex(Content.from_file(path=fname.encode()).hash)
-
-
-def main(fnames):
- for f in fnames:
- print(f, hash_file(f), sep='\t')
-
-
-if __name__ == '__main__':
- fnames = sys.argv[1:]
- if not fnames:
- print('Usage: swh-hash-file FILE...')
- sys.exit(2)
-
- main(fnames)
diff --git a/setup.py b/setup.py
index 232f3bc..dd32e44 100644
--- a/setup.py
+++ b/setup.py
@@ -1,37 +1,41 @@
import hashlib
from setuptools import setup, find_packages
def parse_requirements():
requirements = []
for reqf in ('requirements.txt', 'requirements-swh.txt'):
with open(reqf) as f:
for line in f.readlines():
line = line.strip()
if not line or line.startswith('#'):
continue
requirements.append(line)
return requirements
extra_requirements = []
pyblake2_hashes = {'blake2s256', 'blake2b512'}
if pyblake2_hashes - set(hashlib.algorithms_available):
extra_requirements.append('pyblake2')
setup(
name='swh.model',
description='Software Heritage data model',
author='Software Heritage developers',
author_email='swh-devel@inria.fr',
url='https://forge.softwareheritage.org/diffusion/DMOD/',
packages=find_packages(), # packages's modules
scripts=[], # scripts to package
install_requires=parse_requirements() + extra_requirements,
+ entry_points='''
+ [console_scripts]
+ swh-identify=swh.model.cli:identify
+ ''',
setup_requires=['vcversioner'],
vcversioner={},
include_package_data=True,
)
diff --git a/swh/model/cli.py b/swh/model/cli.py
new file mode 100644
index 0000000..9e0471a
--- /dev/null
+++ b/swh/model/cli.py
@@ -0,0 +1,96 @@
+# Copyright (C) 2018 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import click
+import os
+import sys
+
+from swh.model import identifiers as pids
+from swh.model.from_disk import Content, Directory
+
+
+class PidParamType(click.ParamType):
+ name = 'persistent identifier'
+
+ def convert(self, value, param, ctx):
+ try:
+ _parsed_pid = pids.parse_persistent_identifier(value) # noqa
+ return value # return as string, as we need just that
+ except Exception:
+ # TODO catch more specific parsing exception. Requires
+ # https://forge.softwareheritage.org/T1104 to be addressed first.
+ self.fail('%s is not a valid PID' % value, param, ctx)
+
+
+def pid_of_file(path):
+ object = Content.from_file(path=path).get_data()
+ return pids.persistent_identifier(pids.CONTENT, object)
+
+
+def pid_of_dir(path):
+ object = Directory.from_disk(path=path).get_data()
+ return pids.persistent_identifier(pids.DIRECTORY, object)
+
+
+@click.command()
+@click.option('--type', '-t', default='auto',
+ type=click.Choice(['auto', 'content', 'directory']),
+ help='type of object to identify (default: auto)')
+@click.option('--verify', '-v', metavar='PID', type=PidParamType(),
+ help='reference identifier to be compared with computed one')
+@click.argument('object',
+ type=click.Path(exists=True, readable=True,
+ allow_dash=True, path_type=bytes))
+def identify(type, verify, object):
+ """Compute the Software Heritage persistent identifier (PID) for a given
+ source code object.
+
+ For more details about Software Heritage PIDs see:
+
+ \b
+ https://docs.softwareheritage.org/devel/swh-model/persistent-identifiers.html
+
+ \b
+ Examples:
+
+ \b
+ $ swh-identify /usr/src/linux/kernel/
+ swh:1:dir:f9f858a48d663b3809c9e2f336412717496202ab
+
+ \b
+ $ swh-identify /usr/src/linux/kernel/sched/deadline.c
+ swh:1:cnt:57b939c81bce5d06fa587df8915f05affbe22b82
+
+ """
+ if type == 'auto':
+ if os.path.isfile(object):
+ type = 'content'
+ elif os.path.isdir(object):
+ type = 'directory'
+ else: # shouldn't happen, due to path validation
+ raise click.BadParameter('%s is neither a file nor a directory' %
+ object)
+
+ pid = None
+ if type == 'content':
+ pid = pid_of_file(object)
+ elif type == 'directory':
+ pid = pid_of_dir(object)
+ else: # shouldn't happen, due to option validation
+ raise click.BadParameter('invalid object type: ' + type)
+
+ if verify:
+ if verify == pid:
+ click.echo('PID match: %s' % pid)
+ sys.exit(0)
+ else:
+ click.echo('PID mismatch: %s != %s' % (verify, pid))
+ sys.exit(1)
+ else:
+ click.echo(pid)
+
+
+if __name__ == '__main__':
+ identify()
diff --git a/swh/model/tests/test_cli.py b/swh/model/tests/test_cli.py
new file mode 100644
index 0000000..5612c9c
--- /dev/null
+++ b/swh/model/tests/test_cli.py
@@ -0,0 +1,71 @@
+# Copyright (C) 2018 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import os
+import tempfile
+import unittest
+
+from click.testing import CliRunner
+
+from swh.model import cli
+from swh.model.tests.test_from_disk import DataMixin
+from swh.model.hashutil import hash_to_hex
+
+
+class TestIdentify(DataMixin, unittest.TestCase):
+
+ def setUp(self):
+ super().setUp()
+ self.runner = CliRunner()
+
+ def test_content_id(self):
+ self.make_contents(self.tmpdir_name)
+ for filename, content in self.contents.items():
+ path = os.path.join(self.tmpdir_name, filename)
+ result = self.runner.invoke(cli.identify,
+ ['--type', 'content', path])
+
+ self.assertEqual(result.exit_code, 0)
+ self.assertEqual(result.output.rstrip(),
+ 'swh:1:cnt:' + hash_to_hex(content['sha1_git']))
+
+ def test_directory_id(self):
+ self.make_from_tarball(self.tmpdir_name)
+ path = os.path.join(self.tmpdir_name, b'sample-folder')
+ result = self.runner.invoke(cli.identify,
+ ['--type', 'directory', path])
+
+ self.assertEqual(result.exit_code, 0)
+ self.assertEqual(result.output.rstrip(),
+ 'swh:1:dir:e8b0f1466af8608c8a3fb9879db172b887e80759')
+
+ def test_auto_id(self):
+ with tempfile.NamedTemporaryFile(prefix='swh.model.cli') as f:
+ result = self.runner.invoke(cli.identify, [f.name])
+ self.assertEqual(result.exit_code, 0)
+ self.assertRegex(result.output, r'^swh:\d+:cnt:')
+
+ with tempfile.TemporaryDirectory(prefix='swh.model.cli') as dirname:
+ result = self.runner.invoke(cli.identify, [dirname])
+ self.assertEqual(result.exit_code, 0)
+ self.assertRegex(result.output, r'^swh:\d+:dir:')
+
+ def test_verify_content(self):
+ self.make_contents(self.tmpdir_name)
+ for filename, content in self.contents.items():
+ expected_id = 'swh:1:cnt:' + hash_to_hex(content['sha1_git'])
+
+ # match
+ path = os.path.join(self.tmpdir_name, filename)
+ result = self.runner.invoke(cli.identify,
+ ['--verify', expected_id, path])
+ self.assertEqual(result.exit_code, 0)
+
+ # mismatch
+ with open(path, 'a') as f:
+ f.write('trailing garbage to make verification fail')
+ result = self.runner.invoke(cli.identify,
+ ['--verify', expected_id, path])
+ self.assertEqual(result.exit_code, 1)
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Jul 4 2025, 8:13 AM (9 w, 6 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3272652
Attached To
rDMOD Data model
Event Timeline
Log In to Comment