diff --git a/PKG-INFO b/PKG-INFO index 92f2105..d31bdb8 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,38 +1,38 @@ Metadata-Version: 2.1 Name: swh.model -Version: 0.0.35 +Version: 0.0.38 Summary: Software Heritage data model Home-page: https://forge.softwareheritage.org/diffusion/DMOD/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Source, https://forge.softwareheritage.org/source/swh-model Description: swh-model ========= Implementation of the Data model of the Software Heritage project, used to archive source code artifacts. This module defines the notion of Persistent Identifier (PID) and provides tools to compute them: ```sh $ swh-identify fork.c kmod.c sched/deadline.c swh:1:cnt:2e391c754ae730bd2d8520c2ab497c403220c6e3 fork.c swh:1:cnt:0277d1216f80ae1adeed84a686ed34c9b2931fc2 kmod.c swh:1:cnt:57b939c81bce5d06fa587df8915f05affbe22b82 sched/deadline.c $ swh-identify --no-filename /usr/src/linux/kernel/ swh:1:dir:f9f858a48d663b3809c9e2f336412717496202ab ``` Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Description-Content-Type: text/markdown Provides-Extra: testing diff --git a/debian/changelog b/debian/changelog index 0e157bd..c7b9923 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,308 +1,332 @@ -swh-model (0.0.35-1~swh2~bpo9+1) stretch-swh; urgency=medium +swh-model (0.0.38-1~swh1) unstable-swh; urgency=medium - * Rebuild for stretch-swh + * New upstream release 0.0.38 - (tagged by Valentin Lorentz + on 2019-06-18 13:40:20 +0200) + * Upstream changes: - Remove dependency on swh-core. - This is + a fix to workaround pip's inability to correctly solve - extra + requirements (swh-model depends on swh-core[], but if other - + packages depend on swh-model and swh-core[http], the 'http' extra + - does not always get installed). - -- Software Heritage autobuilder (on jenkins-debian1) Thu, 18 Apr 2019 16:32:25 +0000 + -- Software Heritage autobuilder (on jenkins-debian1) Tue, 18 Jun 2019 11:50:14 +0000 + +swh-model (0.0.37-1~swh1) unstable-swh; urgency=medium + + * New upstream release 0.0.37 - (tagged by David Douard + on 2019-05-15 15:44:21 +0200) + * Upstream changes: - cli: add support for --help on the + 'identify' cli tool + + -- Software Heritage autobuilder (on jenkins-debian1) Thu, 13 Jun 2019 14:40:16 +0000 + +swh-model (0.0.36-1~swh1) unstable-swh; urgency=medium + + * New upstream release 0.0.36 - (tagged by Valentin Lorentz + on 2019-04-26 13:33:29 +0200) + * Upstream changes: - Prevent from_dict() from changing its input + dict. + + -- Software Heritage autobuilder (on jenkins-debian1) Fri, 26 Apr 2019 11:57:45 +0000 swh-model (0.0.35-1~swh2) unstable-swh; urgency=medium * Remove hypothesis directory -- Nicolas Dandrimont Thu, 18 Apr 2019 18:27:33 +0200 swh-model (0.0.35-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.35 - (tagged by Nicolas Dandrimont on 2019-04-11 12:05:11 +0200) * Upstream changes: - Release swh.model v0.0.35 - Fix hypothesis strategies to work in non-UTC timezones -- Software Heritage autobuilder (on jenkins-debian1) Thu, 11 Apr 2019 10:08:14 +0000 swh-model (0.0.34-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.34 - (tagged by Valentin Lorentz on 2019-04-09 18:30:50 +0200) * Upstream changes: - Limit Content.length to what the pgsql storage supports. -- Software Heritage autobuilder (on jenkins-debian1) Wed, 10 Apr 2019 07:45:31 +0000 swh-model (0.0.33-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.33 - (tagged by Valentin Lorentz on 2019-04-08 21:46:28 +0200) * Upstream changes: - Tune the model generation to work with the pgsql storage. -- Software Heritage autobuilder (on jenkins-debian1) Tue, 09 Apr 2019 15:11:51 +0000 swh-model (0.0.32-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.32 - (tagged by Valentin Lorentz on 2019-04-05 19:15:16 +0200) * Upstream changes: - Add a model based using 'attrs' and Hypothesis strategies to generate it. -- Software Heritage autobuilder (on jenkins-debian1) Mon, 08 Apr 2019 12:57:45 +0000 swh-model (0.0.31-1~swh2) unstable-swh; urgency=medium * Add new dependencies on python3-attr and python3-hypothesis -- Nicolas Dandrimont Mon, 08 Apr 2019 14:55:50 +0200 swh-model (0.0.31-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.31 - (tagged by Valentin Lorentz on 2019-04-04 20:46:15 +0200) * Upstream changes: - Make snapshot_identifier add the cycle to the exception's arguments when it detects one. -- Software Heritage autobuilder (on jenkins-debian1) Fri, 05 Apr 2019 09:07:35 +0000 swh-model (0.0.30-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.30 - (tagged by David Douard on 2019-01-08 12:28:35 +0100) * Upstream changes: - v0.0.30 -- Software Heritage autobuilder (on jenkins-debian1) Wed, 09 Jan 2019 17:31:53 +0000 swh-model (0.0.29-1~swh1) unstable-swh; urgency=medium * Release swh.model v0.0.29 * Reference iPRES paper in PID documentation * Remove deprecated swh.model.hashutil.hash_* functions * Split debian packaging to separate branch -- Nicolas Dandrimont Wed, 31 Oct 2018 18:26:32 +0100 swh-model (0.0.28-1~swh1) unstable-swh; urgency=medium * v0.0.28 * setup: prepare for pypi upload * tests: Initialize tox use * tests: Migrate to pytest * docs: Improve basic repository information * docs: document PID resolution possibilities other than Web UI / * hashutil: Migrate towards MultiHash api -- Antoine R. Dumont (@ardumont) Tue, 23 Oct 2018 16:24:21 +0200 swh-model (0.0.27-1~swh1) unstable-swh; urgency=medium * v0.0.27 * Refactor: Add MultiHash class to improve hash computations * swh.model.hashutil: Improve and clarify docstrings * swh.model.hashutil: Mark hash_* function as deprecated -- Antoine R. Dumont (@ardumont) Mon, 17 Sep 2018 12:07:59 +0200 swh-model (0.0.26-1~swh1) unstable-swh; urgency=medium * v0.0.26 * swh.model.identifiers: Open metadata in persistent_identifier method * refactor CLI tests to avoid duplicate assertion pairs * swh-identify: follow symlinks for CLI arguments (by default) * cli.py: prefer os.fsdecode() over manual fiddling with locale.getpref... * swh-identify: add support for passing multiple CLI arguments -- Antoine R. Dumont (@ardumont) Mon, 23 Jul 2018 14:29:54 +0200 swh-model (0.0.25-1~swh1) unstable-swh; urgency=medium * version 0.0.25 -- Antoine Lambert Fri, 29 Jun 2018 11:49:25 +0200 swh-model (0.0.24-1~swh1) unstable-swh; urgency=medium * v0.0.24 * swh.model.cli: Catch specific exception during identifiers check * identifiers: Validate input * identifiers: Raise when error during parsing persistent identifiers * Update blake2 support to be less Debian-specific * add swh-identify CLI tool to compute persistent identifiers * docs: Update high-level documentation (Merkle DAG description, * contextual information for persistent IDs, etc...) -- Antoine R. Dumont (@ardumont) Fri, 22 Jun 2018 15:38:32 +0200 swh-model (0.0.23-1~swh1) unstable-swh; urgency=medium * version 0.0.23 -- Antoine Lambert Tue, 29 May 2018 14:08:45 +0200 swh-model (0.0.22-1~swh1) unstable-swh; urgency=medium * version 0.0.22 -- Antoine Pietri Tue, 30 Jan 2018 18:22:42 +0100 swh-model (0.0.21-1~swh1) unstable-swh; urgency=medium * v0.0.21 * swh.model.identifiers: Add persistent identifier function * docs: document the naming scheme for persistent identifiers * bin/swh-hash-file: new binary to compute SWH-style content identifiers -- Antoine R. Dumont (@ardumont) Wed, 17 Jan 2018 11:06:33 +0100 swh-model (0.0.20-1~swh1) unstable-swh; urgency=medium * v0.0.20 * swh.model.hashutil.hash_data: Optionally integrate length in result * hashutil: add `snapshot` object type for git hashes * docs: add absolute anchor to documentation index -- Antoine R. Dumont (@ardumont) Wed, 20 Dec 2017 10:47:10 +0100 swh-model (0.0.19-1~swh1) unstable-swh; urgency=medium * Release swh.model version 0.0.19 * Update packaging runes -- Nicolas Dandrimont Thu, 12 Oct 2017 18:07:59 +0200 swh-model (0.0.18-1~swh1) unstable-swh; urgency=medium * Release swh.model v0.0.18 * Replace swh.model.git with swh.model.from_disk (T709). * Clean up documentation -- Nicolas Dandrimont Thu, 05 Oct 2017 20:48:29 +0200 swh-model (0.0.17-1~swh1) unstable-swh; urgency=medium * Release swh.model v0.0.17 * Clean up pyblake2 requirement for Python 3.5+ -- Nicolas Dandrimont Mon, 26 Jun 2017 14:41:49 +0200 swh-model (0.0.16-1~swh1) unstable-swh; urgency=medium * Release swh.model v0.0.16 * Make sure we generate proper permissions in directories -- Nicolas Dandrimont Fri, 07 Apr 2017 14:32:34 +0200 swh-model (0.0.15-1~swh1) unstable-swh; urgency=medium * v0.0.15 * Add possibility to compute new blake2 hashes * Add blake2s256 hash as default new hash computation algorithm -- Antoine R. Dumont (@ardumont) Fri, 24 Mar 2017 16:32:35 +0100 swh-model (0.0.14-1~swh1) unstable-swh; urgency=medium * v0.0.14 * Migrate functions from swh.core.hashutil to swh.model.hashutil -- Antoine R. Dumont (@ardumont) Wed, 15 Mar 2017 16:00:56 +0100 swh-model (0.0.13-1~swh1) unstable-swh; urgency=medium * Release swh.model v0.0.13 * Timestamps are now fully integer values -- Nicolas Dandrimont Tue, 14 Feb 2017 19:32:24 +0100 swh-model (0.0.12-1~swh1) unstable-swh; urgency=medium * Release swh.model v0.0.12 * Add more tests to git tree hash computations -- Nicolas Dandrimont Tue, 14 Jun 2016 17:08:20 +0200 swh-model (0.0.11-1~swh1) unstable-swh; urgency=medium * v0.0.11 * Open git.children_hashes api * Rename git.walk_and_compute_sha1_from_directory_2 to git.compute_hashes_from_directory * Remove dead code -- Antoine R. Dumont (@ardumont) Sat, 11 Jun 2016 02:23:19 +0200 swh-model (0.0.10-1~swh1) unstable-swh; urgency=medium * v0.0.10 * Add objects_per_type api * Open a new walk_and_compute_sha1_from_directory_2 api * Improve internal api regarding directory and tree hash computations -- Antoine R. Dumont (@ardumont) Wed, 08 Jun 2016 15:54:59 +0200 swh-model (0.0.9-1~swh1) unstable-swh; urgency=medium * v0.0.9 * Add coverage on edge case * Optimize git hash walk -- Antoine R. Dumont (@ardumont) Thu, 26 May 2016 12:56:17 +0200 swh-model (0.0.8-1~swh1) unstable-swh; urgency=medium * v0.0.8 * Add coverage on edge case * Optimize git hash walk -- Antoine R. Dumont (@ardumont) Thu, 26 May 2016 12:33:59 +0200 swh-model (0.0.7-1~swh1) unstable-swh; urgency=medium * v0.0.7 * Improve corner case policy about walking and computing hash tree (+ update) -- Antoine R. Dumont (@ardumont) Wed, 25 May 2016 23:47:19 +0200 swh-model (0.0.6-1~swh1) unstable-swh; urgency=medium * v0.0.6 * Improve corner case on git hash memory update function * debian packaging: Ignore fs tests for packaging -- Antoine R. Dumont (@ardumont) Tue, 24 May 2016 17:01:06 +0200 swh-model (0.0.5-1~swh1) unstable-swh; urgency=medium * v0.0.5 * Add update git hash computation from existing data * Add revision identifier data for hash identifier computation (extra- headers) -- Antoine R. Dumont (@ardumont) Fri, 15 Apr 2016 12:51:21 +0200 swh-model (0.0.4-1~swh1) unstable-swh; urgency=medium * v0.0.4 * Migrate swh.loader.dir.git module to swh.model.git -- Antoine R. Dumont (@ardumont) Mon, 21 Mar 2016 15:20:28 +0100 swh-model (0.0.3-1~swh1) unstable-swh; urgency=medium * v0.0.3 * Release name is now in bytes -- Antoine R. Dumont (@ardumont) Wed, 27 Jan 2016 15:50:08 +0100 swh-model (0.0.2-1~swh1) unstable-swh; urgency=medium * Prepare release of v0.0.2 * Import the rest of swh.core.hashutil -- Nicolas Dandrimont Wed, 16 Dec 2015 18:30:12 +0100 swh-model (0.0.1-1~swh1) unstable-swh; urgency=medium * Initial release * Prepare swh.model release v0.0.1 -- Nicolas Dandrimont Mon, 07 Dec 2015 18:26:58 +0100 diff --git a/requirements-swh.txt b/requirements-swh.txt deleted file mode 100644 index e69de29..0000000 diff --git a/requirements.txt b/requirements.txt index cd97184..5962345 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,8 @@ # Add here external Python modules dependencies, one per line. Module names # should match https://pypi.python.org/pypi names. For the full spec or # dependency lines, see https://pip.readthedocs.org/en/1.1/requirements.html vcversioner Click attrs hypothesis +python-dateutil diff --git a/setup.py b/setup.py index c28e4bf..0e24d22 100755 --- a/setup.py +++ b/setup.py @@ -1,89 +1,91 @@ #!/usr/bin/env python3 # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from setuptools import setup, find_packages import hashlib from os import path from io import open here = path.abspath(path.dirname(__file__)) # Get the long description from the README file with open(path.join(here, 'README.md'), encoding='utf-8') as f: long_description = f.read() def parse_requirements(name=None): if name: reqf = 'requirements-%s.txt' % name else: reqf = 'requirements.txt' requirements = [] if not path.exists(reqf): return requirements with open(reqf) as f: for line in f.readlines(): line = line.strip() if not line or line.startswith('#'): continue requirements.append(line) return requirements blake2_requirements = [] pyblake2_hash_sets = [ # Built-in implementation in Python 3.6+ {'blake2s', 'blake2b'}, # Potentially shipped by OpenSSL 1.1 (e.g. Python 3.5 in Debian stretch # has these) {'blake2s256', 'blake2b512'}, ] for pyblake2_hashes in pyblake2_hash_sets: if not pyblake2_hashes - set(hashlib.algorithms_available): # The required blake2 hashes have been found break else: # None of the possible sets of blake2 hashes are available. # use pyblake2 instead blake2_requirements.append('pyblake2') setup( name='swh.model', description='Software Heritage data model', long_description=long_description, long_description_content_type='text/markdown', author='Software Heritage developers', author_email='swh-devel@inria.fr', url='https://forge.softwareheritage.org/diffusion/DMOD/', packages=find_packages(), setup_requires=['vcversioner'], install_requires=(parse_requirements() + parse_requirements('swh') + blake2_requirements), extras_require={'testing': parse_requirements('test')}, vcversioner={}, include_package_data=True, entry_points=''' [console_scripts] swh-identify=swh.model.cli:identify + [swh.cli.subcommands] + identify=swh.model.cli:identify ''', classifiers=[ "Programming Language :: Python :: 3", "Intended Audience :: Developers", "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", "Operating System :: OS Independent", "Development Status :: 5 - Production/Stable", ], project_urls={ 'Bug Reports': 'https://forge.softwareheritage.org/maniphest', 'Funding': 'https://www.softwareheritage.org/donate', 'Source': 'https://forge.softwareheritage.org/source/swh-model', }, ) diff --git a/swh.model.egg-info/PKG-INFO b/swh.model.egg-info/PKG-INFO index 92f2105..d31bdb8 100644 --- a/swh.model.egg-info/PKG-INFO +++ b/swh.model.egg-info/PKG-INFO @@ -1,38 +1,38 @@ Metadata-Version: 2.1 Name: swh.model -Version: 0.0.35 +Version: 0.0.38 Summary: Software Heritage data model Home-page: https://forge.softwareheritage.org/diffusion/DMOD/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Source, https://forge.softwareheritage.org/source/swh-model Description: swh-model ========= Implementation of the Data model of the Software Heritage project, used to archive source code artifacts. This module defines the notion of Persistent Identifier (PID) and provides tools to compute them: ```sh $ swh-identify fork.c kmod.c sched/deadline.c swh:1:cnt:2e391c754ae730bd2d8520c2ab497c403220c6e3 fork.c swh:1:cnt:0277d1216f80ae1adeed84a686ed34c9b2931fc2 kmod.c swh:1:cnt:57b939c81bce5d06fa587df8915f05affbe22b82 sched/deadline.c $ swh-identify --no-filename /usr/src/linux/kernel/ swh:1:dir:f9f858a48d663b3809c9e2f336412717496202ab ``` Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Description-Content-Type: text/markdown Provides-Extra: testing diff --git a/swh.model.egg-info/SOURCES.txt b/swh.model.egg-info/SOURCES.txt index 7ca184a..f7e8ffe 100644 --- a/swh.model.egg-info/SOURCES.txt +++ b/swh.model.egg-info/SOURCES.txt @@ -1,44 +1,44 @@ MANIFEST.in Makefile README.md -requirements-swh.txt requirements.txt setup.py version.txt swh/__init__.py swh.model.egg-info/PKG-INFO swh.model.egg-info/SOURCES.txt swh.model.egg-info/dependency_links.txt swh.model.egg-info/entry_points.txt swh.model.egg-info/requires.txt swh.model.egg-info/top_level.txt swh/model/__init__.py swh/model/cli.py swh/model/exceptions.py swh/model/from_disk.py swh/model/hashutil.py swh/model/hypothesis_strategies.py swh/model/identifiers.py swh/model/merkle.py swh/model/model.py swh/model/toposort.py swh/model/validators.py swh/model/fields/__init__.py swh/model/fields/compound.py swh/model/fields/hashes.py swh/model/fields/simple.py swh/model/tests/__init__.py swh/model/tests/generate_testdata_from_disk.py swh/model/tests/test_cli.py swh/model/tests/test_from_disk.py swh/model/tests/test_hashutil.py swh/model/tests/test_hypothesis_strategies.py swh/model/tests/test_identifiers.py swh/model/tests/test_merkle.py +swh/model/tests/test_model.py swh/model/tests/test_toposort.py swh/model/tests/test_validators.py swh/model/tests/data/dir-folders/sample-folder.tgz swh/model/tests/fields/__init__.py swh/model/tests/fields/test_compound.py swh/model/tests/fields/test_hashes.py swh/model/tests/fields/test_simple.py \ No newline at end of file diff --git a/swh.model.egg-info/entry_points.txt b/swh.model.egg-info/entry_points.txt index 9c74f36..03eb111 100644 --- a/swh.model.egg-info/entry_points.txt +++ b/swh.model.egg-info/entry_points.txt @@ -1,4 +1,6 @@ [console_scripts] swh-identify=swh.model.cli:identify + [swh.cli.subcommands] + identify=swh.model.cli:identify \ No newline at end of file diff --git a/swh.model.egg-info/requires.txt b/swh.model.egg-info/requires.txt index 674d225..af8f5be 100644 --- a/swh.model.egg-info/requires.txt +++ b/swh.model.egg-info/requires.txt @@ -1,7 +1,8 @@ vcversioner Click attrs hypothesis +python-dateutil [testing] pytest diff --git a/swh/model/cli.py b/swh/model/cli.py index 82af76f..8355629 100644 --- a/swh/model/cli.py +++ b/swh/model/cli.py @@ -1,125 +1,128 @@ # Copyright (C) 2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import click import os import sys from functools import partial from swh.model import identifiers as pids from swh.model.exceptions import ValidationError from swh.model.from_disk import Content, Directory +CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help']) + + class PidParamType(click.ParamType): name = 'persistent identifier' def convert(self, value, param, ctx): try: pids.parse_persistent_identifier(value) return value # return as string, as we need just that except ValidationError as e: self.fail('%s is not a valid PID. %s.' % (value, e), param, ctx) def pid_of_file(path): object = Content.from_file(path=path).get_data() return pids.persistent_identifier(pids.CONTENT, object) def pid_of_dir(path): object = Directory.from_disk(path=path).get_data() return pids.persistent_identifier(pids.DIRECTORY, object) def identify_object(obj_type, follow_symlinks, obj): if obj_type == 'auto': if os.path.isfile(obj): obj_type = 'content' elif os.path.isdir(obj): obj_type = 'directory' else: # shouldn't happen, due to path validation raise click.BadParameter('%s is neither a file nor a directory' % obj) path = obj if follow_symlinks and os.path.islink(obj): path = os.path.realpath(obj) pid = None if obj_type == 'content': pid = pid_of_file(path) elif obj_type == 'directory': pid = pid_of_dir(path) else: # shouldn't happen, due to option validation raise click.BadParameter('invalid object type: ' + obj_type) # note: we return original obj instead of path here, to preserve user-given # file name in output return (obj, pid) -@click.command() +@click.command(context_settings=CONTEXT_SETTINGS) @click.option('--dereference/--no-dereference', 'follow_symlinks', default=True, help='follow (or not) symlinks for OBJECTS passed as arguments ' + '(default: follow)') @click.option('--filename/--no-filename', 'show_filename', default=True, help='show/hide file name (default: show)') @click.option('--type', '-t', 'obj_type', default='auto', type=click.Choice(['auto', 'content', 'directory']), help='type of object to identify (default: auto)') @click.option('--verify', '-v', metavar='PID', type=PidParamType(), help='reference identifier to be compared with computed one') -@click.argument('objects', nargs=-1, +@click.argument('objects', nargs=-1, required=True, type=click.Path(exists=True, readable=True, allow_dash=True, path_type=bytes)) def identify(obj_type, verify, show_filename, follow_symlinks, objects): """Compute the Software Heritage persistent identifier (PID) for the given source code object(s). For more details about Software Heritage PIDs see: \b https://docs.softwareheritage.org/devel/swh-model/persistent-identifiers.html \b Examples: \b - $ swh-identify fork.c kmod.c sched/deadline.c + $ swh identify fork.c kmod.c sched/deadline.c swh:1:cnt:2e391c754ae730bd2d8520c2ab497c403220c6e3 fork.c swh:1:cnt:0277d1216f80ae1adeed84a686ed34c9b2931fc2 kmod.c swh:1:cnt:57b939c81bce5d06fa587df8915f05affbe22b82 sched/deadline.c \b - $ swh-identify --no-filename /usr/src/linux/kernel/ + $ swh identify --no-filename /usr/src/linux/kernel/ swh:1:dir:f9f858a48d663b3809c9e2f336412717496202ab """ if verify and len(objects) != 1: raise click.BadParameter('verification requires a single object') results = map(partial(identify_object, obj_type, follow_symlinks), objects) if verify: pid = next(results)[1] if verify == pid: click.echo('PID match: %s' % pid) sys.exit(0) else: click.echo('PID mismatch: %s != %s' % (verify, pid)) sys.exit(1) else: for (obj, pid) in results: msg = pid if show_filename: msg = '%s\t%s' % (pid, os.fsdecode(obj)) click.echo(msg) if __name__ == '__main__': identify() diff --git a/swh/model/hypothesis_strategies.py b/swh/model/hypothesis_strategies.py index 3e006c8..1b99957 100644 --- a/swh/model/hypothesis_strategies.py +++ b/swh/model/hypothesis_strategies.py @@ -1,222 +1,228 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import datetime from hypothesis.strategies import ( binary, builds, characters, composite, dictionaries, from_regex, integers, just, lists, none, one_of, sampled_from, text, tuples, ) from .from_disk import DentryPerms from .model import ( Person, Timestamp, TimestampWithTimezone, Origin, OriginVisit, Snapshot, SnapshotBranch, TargetType, Release, Revision, Directory, DirectoryEntry, Content ) from .identifiers import snapshot_identifier, identifier_to_bytes +pgsql_alphabet = characters( + blacklist_categories=('Cs', ), + blacklist_characters=['\u0000']) # postgresql does not like these + + +def pgsql_text(): + return text(alphabet=pgsql_alphabet) + + def sha1_git(): return binary(min_size=20, max_size=20) def sha1(): return binary(min_size=20, max_size=20) @composite def urls(draw): protocol = draw(sampled_from(['git', 'http', 'https', 'deb'])) domain = draw(from_regex(r'\A([a-z]([a-z0-9-]*)\.){1,3}[a-z0-9]+\Z')) return '%s://%s' % (protocol, domain) def persons(): return builds(Person) def timestamps(): max_seconds = datetime.datetime.max.replace( tzinfo=datetime.timezone.utc).timestamp() min_seconds = datetime.datetime.min.replace( tzinfo=datetime.timezone.utc).timestamp() return builds( Timestamp, seconds=integers(min_seconds, max_seconds), microseconds=integers(0, 1000000)) def timestamps_with_timezone(): return builds( TimestampWithTimezone, timestamp=timestamps(), offset=integers(min_value=-14*60, max_value=14*60)) def origins(): return builds( Origin, type=sampled_from(['git', 'hg', 'svn', 'pypi', 'deb']), url=urls()) def origin_visits(): return builds( OriginVisit, visit=integers(0, 1000), origin=origins()) @composite def releases(draw): (date, author) = draw(one_of( tuples(none(), none()), tuples(timestamps_with_timezone(), persons()))) rel = draw(builds( Release, id=sha1_git(), author=none(), date=none(), target=sha1_git())) rel.date = date rel.author = author return rel def revision_metadata(): - alphabet = characters( - blacklist_categories=('Cs', ), - blacklist_characters=['\u0000']) # postgresql does not like these - return dictionaries(text(alphabet=alphabet), text(alphabet=alphabet)) + return dictionaries(pgsql_text(), pgsql_text()) def revisions(): return builds( Revision, id=sha1_git(), date=timestamps_with_timezone(), committer_date=timestamps_with_timezone(), parents=lists(sha1_git()), directory=sha1_git(), metadata=one_of(none(), revision_metadata())) # TODO: metadata['extra_headers'] can have binary keys and values def directory_entries(): return builds( DirectoryEntry, target=sha1_git(), perms=sampled_from([perm.value for perm in DentryPerms])) def directories(): return builds( Directory, id=sha1_git(), entries=lists(directory_entries())) @composite def contents(draw): (status, data, reason) = draw(one_of( tuples(just('visible'), binary(), none()), - tuples(just('absent'), none(), text()), + tuples(just('absent'), none(), pgsql_text()), tuples(just('hidden'), none(), none()), )) return draw(builds( Content, length=integers(min_value=0, max_value=2**63-1), sha1=sha1(), sha1_git=sha1_git(), sha256=binary(min_size=32, max_size=32), blake2s256=binary(min_size=32, max_size=32), status=just(status), data=just(data), reason=just(reason), )) def branch_names(): - return binary() + return binary(min_size=1) def branch_targets_object(): return builds( SnapshotBranch, target=sha1_git(), target_type=sampled_from([ TargetType.CONTENT, TargetType.DIRECTORY, TargetType.REVISION, TargetType.RELEASE, TargetType.SNAPSHOT])) def branch_targets_alias(): return builds( SnapshotBranch, target_type=just(TargetType.ALIAS)) def branch_targets(*, only_objects=False): if only_objects: return branch_targets_object() else: return one_of(branch_targets_alias(), branch_targets_object()) @composite def snapshots(draw, *, min_size=0, max_size=100, only_objects=False): branches = draw(dictionaries( keys=branch_names(), values=branch_targets(only_objects=only_objects), min_size=min_size, max_size=max_size, )) if not only_objects: # Make sure aliases point to actual branches unresolved_aliases = { target.target for target in branches.values() if (target and target.target_type == 'alias' and target.target not in branches) } for alias in unresolved_aliases: branches[alias] = draw(branch_targets(only_objects=True)) while True: try: id_ = snapshot_identifier({ 'branches': { name: branch.to_dict() for (name, branch) in branches.items()}}) except ValueError as e: for (source, target) in e.args[1]: branches[source] = draw(branch_targets(only_objects=True)) else: break return Snapshot( id=identifier_to_bytes(id_), branches=branches) def objects(): return one_of( origins().map(lambda x: ('origin', x)), origin_visits().map(lambda x: ('origin_visit', x)), snapshots().map(lambda x: ('snapshot', x)), releases().map(lambda x: ('release', x)), revisions().map(lambda x: ('revision', x)), directories().map(lambda x: ('directory', x)), contents().map(lambda x: ('content', x)), ) def object_dicts(): return objects().map(lambda x: (x[0], x[1].to_dict())) diff --git a/swh/model/model.py b/swh/model/model.py index 036879d..6e3fd0e 100644 --- a/swh/model/model.py +++ b/swh/model/model.py @@ -1,247 +1,355 @@ # Copyright (C) 2018-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import datetime from enum import Enum from typing import List, Optional, Dict import attr +import dateutil.parser +from .identifiers import normalize_timestamp # TODO: Limit this to 20 bytes Sha1Git = bytes +class BaseModel: + """Base class for SWH model classes. + + Provides serialization/deserialization to/from Python dictionaries, + that are suitable for JSON/msgpack-like formats.""" + + def to_dict(self): + """Wrapper of `attr.asdict` that can be overriden by subclasses + that have special handling of some of the fields.""" + return attr.asdict(self) + + @classmethod + def from_dict(cls, d): + """Takes a dictionary representing a tree of SWH objects, and + recursively builds the corresponding objects.""" + return cls(**d) + + @attr.s -class Person: +class Person(BaseModel): + """Represents the author/committer of a revision or release.""" name = attr.ib(type=bytes) email = attr.ib(type=bytes) fullname = attr.ib(type=bytes) @attr.s -class Timestamp: +class Timestamp(BaseModel): + """Represents a naive timestamp from a VCS.""" seconds = attr.ib(type=int) microseconds = attr.ib(type=int) @seconds.validator def check_seconds(self, attribute, value): """Check that seconds fit in a 64-bits signed integer.""" if not (-2**63 <= value < 2**63): raise ValueError('Seconds must be a signed 64-bits integer.') @microseconds.validator def check_microseconds(self, attribute, value): """Checks that microseconds are positive and < 1000000.""" if not (0 <= value < 10**6): raise ValueError('Microseconds must be in [0, 1000000[.') @attr.s -class TimestampWithTimezone: +class TimestampWithTimezone(BaseModel): + """Represents a TZ-aware timestamp from a VCS.""" timestamp = attr.ib(type=Timestamp) offset = attr.ib(type=int) negative_utc = attr.ib(type=bool) - def to_dict(self): - return attr.asdict(self) - @offset.validator def check_offset(self, attribute, value): + """Checks the offset is a 16-bits signed integer (in theory, it + should always be between -14 and +14 hours).""" if not (-2**15 <= value < 2**15): # max 14 hours offset in theory, but you never know what # you'll find in the wild... raise ValueError('offset too large: %d minutes' % value) + @classmethod + def from_dict(cls, d): + """Builds a TimestampWithTimezone from any of the formats + accepted by :py:`swh.model.normalize_timestamp`.""" + d = normalize_timestamp(d) + return cls( + timestamp=Timestamp.from_dict(d['timestamp']), + offset=d['offset'], + negative_utc=d['negative_utc']) + @attr.s -class Origin: +class Origin(BaseModel): + """Represents a software source: a VCS and an URL.""" type = attr.ib(type=str) url = attr.ib(type=str) - def to_dict(self): - return attr.asdict(self) - @attr.s -class OriginVisit: +class OriginVisit(BaseModel): + """Represents a visit of an origin at a given point in time, by a + SWH loader.""" origin = attr.ib(type=Origin) date = attr.ib(type=datetime.datetime) - visit = attr.ib(type=Optional[int]) + visit = attr.ib(type=Optional[int], + validator=attr.validators.optional([])) """Should not be set before calling 'origin_visit_add()'.""" def to_dict(self): - ov = attr.asdict(self) - ov['origin'] = self.origin.to_dict() + """Serializes the date as a string and omits the visit id if it is + `None`.""" + ov = super().to_dict() ov['date'] = str(self.date) - if not ov['visit']: + if ov['visit'] is None: del ov['visit'] return ov + @classmethod + def from_dict(cls, d): + """Parses the date from a string, and accepts missing visit ids.""" + return cls( + origin=Origin.from_dict(d['origin']), + date=dateutil.parser.parse(d['date']), + visit=d.get('visit')) + class TargetType(Enum): + """The type of content pointed to by a snapshot branch. Usually a + revision or an alias.""" CONTENT = 'content' DIRECTORY = 'directory' REVISION = 'revision' RELEASE = 'release' SNAPSHOT = 'snapshot' ALIAS = 'alias' class ObjectType(Enum): + """The type of content pointed to by a release. Usually a revision""" CONTENT = 'content' DIRECTORY = 'directory' REVISION = 'revision' RELEASE = 'release' SNAPSHOT = 'snapshot' @attr.s -class SnapshotBranch: +class SnapshotBranch(BaseModel): + """Represents one of the branches of a snapshot.""" target = attr.ib(type=bytes) target_type = attr.ib(type=TargetType) @target.validator def check_target(self, attribute, value): + """Checks the target type is not an alias, checks the target is a + valid sha1_git.""" if self.target_type != TargetType.ALIAS: if len(value) != 20: raise ValueError('Wrong length for bytes identifier: %d' % len(value)) def to_dict(self): branch = attr.asdict(self) branch['target_type'] = branch['target_type'].value return branch + @classmethod + def from_dict(cls, d): + return cls( + target=d['target'], + target_type=TargetType(d['target_type'])) + @attr.s -class Snapshot: +class Snapshot(BaseModel): + """Represents the full state of an origin at a given point in time.""" id = attr.ib(type=Sha1Git) branches = attr.ib(type=Dict[bytes, Optional[SnapshotBranch]]) def to_dict(self): return { 'id': self.id, 'branches': { name: branch.to_dict() for (name, branch) in self.branches.items() } } + @classmethod + def from_dict(cls, d): + return cls( + id=d['id'], + branches={ + name: SnapshotBranch.from_dict(branch) + for (name, branch) in d['branches'].items() + }) + @attr.s -class Release: +class Release(BaseModel): id = attr.ib(type=Sha1Git) name = attr.ib(type=bytes) message = attr.ib(type=bytes) - date = attr.ib(type=Optional[TimestampWithTimezone]) - author = attr.ib(type=Optional[Person]) - target = attr.ib(type=Optional[Sha1Git]) + target = attr.ib(type=Optional[Sha1Git], + validator=attr.validators.optional([])) target_type = attr.ib(type=ObjectType) synthetic = attr.ib(type=bool) + author = attr.ib(type=Optional[Person], + default=None, + validator=attr.validators.optional([])) + date = attr.ib(type=Optional[TimestampWithTimezone], + default=None, + validator=attr.validators.optional([])) + + @author.validator + def check_author(self, attribute, value): + """If the author is `None`, checks the date is `None` too.""" + if self.author is None and self.date is not None: + raise ValueError('release date must be None if author is None.') def to_dict(self): rel = attr.asdict(self) rel['date'] = self.date.to_dict() if self.date is not None else None rel['target_type'] = rel['target_type'].value return rel - @author.validator - def check_author(self, attribute, value): - if self.author is None and self.date is not None: - raise ValueError('release date must be None if date is None.') + @classmethod + def from_dict(cls, d): + d = d.copy() + if d.get('author'): + d['author'] = Person.from_dict(d['author']) + if d.get('date'): + d['date'] = TimestampWithTimezone.from_dict(d['date']) + return cls( + target_type=ObjectType(d.pop('target_type')), + **d) class RevisionType(Enum): GIT = 'git' TAR = 'tar' DSC = 'dsc' SUBVERSION = 'svn' MERCURIAL = 'hg' @attr.s -class Revision: +class Revision(BaseModel): id = attr.ib(type=Sha1Git) message = attr.ib(type=bytes) author = attr.ib(type=Person) committer = attr.ib(type=Person) date = attr.ib(type=TimestampWithTimezone) committer_date = attr.ib(type=TimestampWithTimezone) - parents = attr.ib(type=List[Sha1Git]) type = attr.ib(type=RevisionType) directory = attr.ib(type=Sha1Git) - metadata = attr.ib(type=Optional[Dict[str, object]]) synthetic = attr.ib(type=bool) + metadata = attr.ib(type=Optional[Dict[str, object]], + default=None, + validator=attr.validators.optional([])) + parents = attr.ib(type=List[Sha1Git], + default=attr.Factory(list)) def to_dict(self): rev = attr.asdict(self) rev['date'] = self.date.to_dict() rev['committer_date'] = self.committer_date.to_dict() rev['type'] = rev['type'].value return rev + @classmethod + def from_dict(cls, d): + return cls( + id=d['id'], + message=d['message'], + author=Person.from_dict(d['author']), + committer=Person.from_dict(d['committer']), + date=TimestampWithTimezone.from_dict(d['date']), + committer_date=TimestampWithTimezone.from_dict( + d['committer_date']), + type=RevisionType(d['type']), + directory=d['directory'], + synthetic=d['synthetic'], + metadata=d['metadata'], + parents=d['parents']) + @attr.s -class DirectoryEntry: +class DirectoryEntry(BaseModel): name = attr.ib(type=bytes) type = attr.ib(type=str, validator=attr.validators.in_(['file', 'dir', 'rev'])) target = attr.ib(type=Sha1Git) perms = attr.ib(type=int) """Usually one of the values of `swh.model.from_disk.DentryPerms`.""" - def to_dict(self): - return attr.asdict(self) - @attr.s -class Directory: +class Directory(BaseModel): id = attr.ib(type=Sha1Git) entries = attr.ib(type=List[DirectoryEntry]) def to_dict(self): dir_ = attr.asdict(self) dir_['entries'] = [entry.to_dict() for entry in self.entries] return dir_ + @classmethod + def from_dict(cls, d): + return cls( + id=d['id'], + entries=[DirectoryEntry.from_dict(entry) + for entry in d['entries']]) + @attr.s -class Content: +class Content(BaseModel): sha1 = attr.ib(type=bytes) sha1_git = attr.ib(type=Sha1Git) sha256 = attr.ib(type=bytes) blake2s256 = attr.ib(type=bytes) - data = attr.ib(type=bytes) length = attr.ib(type=int) status = attr.ib( type=str, validator=attr.validators.in_(['visible', 'absent', 'hidden'])) - reason = attr.ib(type=Optional[str]) + reason = attr.ib(type=Optional[str], + default=None, + validator=attr.validators.optional([])) + data = attr.ib(type=Optional[bytes], + default=None, + validator=attr.validators.optional([])) @length.validator def check_length(self, attribute, value): """Checks the length is positive.""" if value < 0: raise ValueError('Length must be positive.') @reason.validator def check_reason(self, attribute, value): """Checks the reason is full iff status != absent.""" assert self.reason == value if self.status == 'absent' and value is None: raise ValueError('Must provide a reason if content is absent.') elif self.status != 'absent' and value is not None: raise ValueError( 'Must not provide a reason if content is not absent.') def to_dict(self): content = attr.asdict(self) if content['data'] is None: del content['data'] if content['reason'] is None: del content['reason'] return content diff --git a/swh/model/tests/test_model.py b/swh/model/tests/test_model.py new file mode 100644 index 0000000..220ba32 --- /dev/null +++ b/swh/model/tests/test_model.py @@ -0,0 +1,26 @@ +# Copyright (C) 2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import copy + +from hypothesis import given + +from swh.model.hypothesis_strategies import objects + + +@given(objects()) +def test_todict_inverse_fromdict(objtype_and_obj): + (obj_type, obj) = objtype_and_obj + obj_as_dict = obj.to_dict() + obj_as_dict_copy = copy.deepcopy(obj_as_dict) + + # Check the composition of to_dict and from_dict is the identity + assert obj == type(obj).from_dict(obj_as_dict) + + # Check from_dict() does not change the input dict + assert obj_as_dict == obj_as_dict_copy + + # Check the composition of from_dict and to_dict is the identity + assert obj_as_dict == type(obj).from_dict(obj_as_dict).to_dict() diff --git a/version.txt b/version.txt index 871d5f5..ba49f49 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -v0.0.35-0-gfee3a41 \ No newline at end of file +v0.0.38-0-gb3250d2 \ No newline at end of file