diff --git a/PKG-INFO b/PKG-INFO index c286f2ca..ea28bc5b 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,35 +1,35 @@ Metadata-Version: 2.1 Name: swh.deposit -Version: 0.0.68 +Version: 0.0.69 Summary: Software Heritage Deposit Server Home-page: https://forge.softwareheritage.org/source/swh-deposit/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN +Project-URL: Source, https://forge.softwareheritage.org/source/swh-deposit Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate -Project-URL: Source, https://forge.softwareheritage.org/source/swh-deposit Description: # swh-deposit This is [Software Heritage](https://www.softwareheritage.org)'s [SWORD 2.0](http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html) Server - implementation. + implementation, as well as a simple client to upload deposits on the server. **S.W.O.R.D** (**S**imple **W**eb-Service **O**ffering **R**epository **D**eposit) is an interoperability standard for digital file deposit. This implementation will permit interaction between a client (a repository) and a server (SWH repository) to permit deposits of software source code archives and associated metadata. The documentation is at ./docs/README-specification.md Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Description-Content-Type: text/markdown Provides-Extra: server Provides-Extra: testing diff --git a/README.md b/README.md index b146a9d1..a1bd0265 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,14 @@ # swh-deposit This is [Software Heritage](https://www.softwareheritage.org)'s [SWORD 2.0](http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html) Server -implementation. +implementation, as well as a simple client to upload deposits on the server. **S.W.O.R.D** (**S**imple **W**eb-Service **O**ffering **R**epository **D**eposit) is an interoperability standard for digital file deposit. This implementation will permit interaction between a client (a repository) and a server (SWH repository) to permit deposits of software source code archives and associated metadata. The documentation is at ./docs/README-specification.md diff --git a/requirements-swh-server.txt b/requirements-swh-server.txt index 1b77634b..02920660 100644 --- a/requirements-swh-server.txt +++ b/requirements-swh-server.txt @@ -1,4 +1,5 @@ +swh.core[http] swh.loader.tar >= 0.0.39 swh.loader.core >= 0.0.32 swh.scheduler >= 0.0.39 swh.model >= 0.0.26 diff --git a/requirements-swh.txt b/requirements-swh.txt index 5662ee37..32aa75ec 100644 --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -1 +1 @@ -swh.core >= 0.0.36 +swh.core >= 0.0.60 diff --git a/requirements.txt b/requirements.txt index 68bb2e26..5c72bcaa 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ vcversioner click xmltodict iso8601 +requests diff --git a/setup.py b/setup.py index 2b1163be..a4921dc6 100755 --- a/setup.py +++ b/setup.py @@ -1,71 +1,74 @@ #!/usr/bin/env python3 # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from setuptools import setup, find_packages from os import path from io import open here = path.abspath(path.dirname(__file__)) # Get the long description from the README file with open(path.join(here, 'README.md'), encoding='utf-8') as f: long_description = f.read() -def parse_requirements(name=None): - if name: - reqf = 'requirements-%s.txt' % name - else: - reqf = 'requirements.txt' - +def parse_requirements(*names): requirements = [] - if not path.exists(reqf): - return requirements + for name in names: + if name: + reqf = 'requirements-%s.txt' % name + else: + reqf = 'requirements.txt' + + if not path.exists(reqf): + return requirements - with open(reqf) as f: - for line in f.readlines(): - line = line.strip() - if not line or line.startswith('#'): - continue - requirements.append(line) + with open(reqf) as f: + for line in f.readlines(): + line = line.strip() + if not line or line.startswith('#'): + continue + requirements.append(line) return requirements setup( name='swh.deposit', description='Software Heritage Deposit Server', long_description=long_description, long_description_content_type='text/markdown', author='Software Heritage developers', author_email='swh-devel@inria.fr', url='https://forge.softwareheritage.org/source/swh-deposit/', packages=find_packages(), - install_requires=parse_requirements() + parse_requirements('swh'), + install_requires=parse_requirements(None, 'swh'), tests_require=parse_requirements('test'), setup_requires=['vcversioner'], - extras_require={'testing': parse_requirements('test'), - 'server': (parse_requirements('server') + - parse_requirements('swh-server'))}, + extras_require={ + 'testing': parse_requirements('test', 'server', 'swh-server'), + 'server': parse_requirements('server', 'swh-server')}, vcversioner={}, include_package_data=True, entry_points=''' [console_scripts] swh-deposit=swh.deposit.cli:main + [swh.cli.subcommands] + deposit=swh.deposit.cli:deposit ''', classifiers=[ "Programming Language :: Python :: 3", "Intended Audience :: Developers", "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", "Operating System :: OS Independent", "Development Status :: 5 - Production/Stable", ], project_urls={ 'Bug Reports': 'https://forge.softwareheritage.org/maniphest', 'Funding': 'https://www.softwareheritage.org/donate', 'Source': 'https://forge.softwareheritage.org/source/swh-deposit', }, ) diff --git a/swh.deposit.egg-info/PKG-INFO b/swh.deposit.egg-info/PKG-INFO index c286f2ca..ea28bc5b 100644 --- a/swh.deposit.egg-info/PKG-INFO +++ b/swh.deposit.egg-info/PKG-INFO @@ -1,35 +1,35 @@ Metadata-Version: 2.1 Name: swh.deposit -Version: 0.0.68 +Version: 0.0.69 Summary: Software Heritage Deposit Server Home-page: https://forge.softwareheritage.org/source/swh-deposit/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN +Project-URL: Source, https://forge.softwareheritage.org/source/swh-deposit Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate -Project-URL: Source, https://forge.softwareheritage.org/source/swh-deposit Description: # swh-deposit This is [Software Heritage](https://www.softwareheritage.org)'s [SWORD 2.0](http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html) Server - implementation. + implementation, as well as a simple client to upload deposits on the server. **S.W.O.R.D** (**S**imple **W**eb-Service **O**ffering **R**epository **D**eposit) is an interoperability standard for digital file deposit. This implementation will permit interaction between a client (a repository) and a server (SWH repository) to permit deposits of software source code archives and associated metadata. The documentation is at ./docs/README-specification.md Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Description-Content-Type: text/markdown Provides-Extra: server Provides-Extra: testing diff --git a/swh.deposit.egg-info/SOURCES.txt b/swh.deposit.egg-info/SOURCES.txt index f69f6e16..6f177247 100644 --- a/swh.deposit.egg-info/SOURCES.txt +++ b/swh.deposit.egg-info/SOURCES.txt @@ -1,122 +1,123 @@ MANIFEST.in Makefile README.md requirements-server.txt requirements-swh-server.txt requirements-swh.txt requirements-test.txt requirements.txt setup.py version.txt swh/__init__.py swh.deposit.egg-info/PKG-INFO swh.deposit.egg-info/SOURCES.txt swh.deposit.egg-info/dependency_links.txt swh.deposit.egg-info/entry_points.txt swh.deposit.egg-info/requires.txt swh.deposit.egg-info/top_level.txt swh/deposit/__init__.py swh/deposit/apps.py swh/deposit/auth.py swh/deposit/config.py swh/deposit/errors.py swh/deposit/manage.py swh/deposit/models.py swh/deposit/parsers.py swh/deposit/signals.py swh/deposit/urls.py swh/deposit/utils.py swh/deposit/wsgi.py swh/deposit/api/__init__.py swh/deposit/api/common.py swh/deposit/api/converters.py swh/deposit/api/deposit.py swh/deposit/api/deposit_content.py swh/deposit/api/deposit_status.py swh/deposit/api/deposit_update.py swh/deposit/api/service_document.py swh/deposit/api/urls.py swh/deposit/api/private/__init__.py swh/deposit/api/private/deposit_check.py swh/deposit/api/private/deposit_list.py swh/deposit/api/private/deposit_read.py swh/deposit/api/private/deposit_update_status.py swh/deposit/api/private/urls.py swh/deposit/cli/__init__.py swh/deposit/cli/admin.py -swh/deposit/cli/deposit.py +swh/deposit/cli/client.py swh/deposit/client/__init__.py swh/deposit/fixtures/__init__.py swh/deposit/fixtures/deposit_data.yaml swh/deposit/loader/__init__.py swh/deposit/loader/checker.py swh/deposit/loader/loader.py swh/deposit/loader/tasks.py swh/deposit/migrations/0001_initial.py swh/deposit/migrations/0002_depositrequest_archive.py swh/deposit/migrations/0003_temporaryarchive.py swh/deposit/migrations/0004_delete_temporaryarchive.py swh/deposit/migrations/0005_auto_20171019_1436.py swh/deposit/migrations/0006_depositclient_url.py swh/deposit/migrations/0007_auto_20171129_1609.py swh/deposit/migrations/0008_auto_20171130_1513.py swh/deposit/migrations/0009_deposit_parent.py swh/deposit/migrations/0010_auto_20180110_0953.py swh/deposit/migrations/0011_auto_20180115_1510.py swh/deposit/migrations/0012_deposit_status_detail.py swh/deposit/migrations/0013_depositrequest_raw_metadata.py swh/deposit/migrations/0014_auto_20180720_1221.py swh/deposit/migrations/0015_depositrequest_typemigration.py +swh/deposit/migrations/0016_auto_20190507_1408.py swh/deposit/migrations/__init__.py swh/deposit/settings/__init__.py swh/deposit/settings/common.py swh/deposit/settings/development.py swh/deposit/settings/production.py swh/deposit/settings/testing.py swh/deposit/static/robots.txt swh/deposit/static/css/bootstrap-responsive.min.css swh/deposit/static/css/style.css swh/deposit/static/img/arrow-up-small.png swh/deposit/static/img/swh-logo-deposit.png swh/deposit/static/img/swh-logo-deposit.svg swh/deposit/static/img/icons/swh-logo-32x32.png swh/deposit/static/img/icons/swh-logo-deposit-180x180.png swh/deposit/static/img/icons/swh-logo-deposit-192x192.png swh/deposit/static/img/icons/swh-logo-deposit-270x270.png swh/deposit/templates/__init__.py swh/deposit/templates/homepage.html swh/deposit/templates/layout.html swh/deposit/templates/deposit/__init__.py swh/deposit/templates/deposit/content.xml swh/deposit/templates/deposit/deposit_receipt.xml swh/deposit/templates/deposit/error.xml swh/deposit/templates/deposit/service_document.xml swh/deposit/templates/deposit/status.xml swh/deposit/templates/rest_framework/api.html swh/deposit/tests/__init__.py swh/deposit/tests/common.py swh/deposit/tests/test_utils.py swh/deposit/tests/api/__init__.py swh/deposit/tests/api/test_common.py swh/deposit/tests/api/test_converters.py swh/deposit/tests/api/test_deposit.py swh/deposit/tests/api/test_deposit_atom.py swh/deposit/tests/api/test_deposit_binary.py swh/deposit/tests/api/test_deposit_check.py swh/deposit/tests/api/test_deposit_delete.py swh/deposit/tests/api/test_deposit_list.py swh/deposit/tests/api/test_deposit_multipart.py swh/deposit/tests/api/test_deposit_read_archive.py swh/deposit/tests/api/test_deposit_read_metadata.py swh/deposit/tests/api/test_deposit_status.py swh/deposit/tests/api/test_deposit_update.py swh/deposit/tests/api/test_deposit_update_status.py swh/deposit/tests/api/test_parser.py swh/deposit/tests/api/test_service_document.py swh/deposit/tests/loader/__init__.py swh/deposit/tests/loader/common.py swh/deposit/tests/loader/conftest.py swh/deposit/tests/loader/test_checker.py swh/deposit/tests/loader/test_client.py swh/deposit/tests/loader/test_loader.py swh/deposit/tests/loader/test_tasks.py \ No newline at end of file diff --git a/swh.deposit.egg-info/entry_points.txt b/swh.deposit.egg-info/entry_points.txt index e0dcc7a1..4304c868 100644 --- a/swh.deposit.egg-info/entry_points.txt +++ b/swh.deposit.egg-info/entry_points.txt @@ -1,4 +1,6 @@ [console_scripts] swh-deposit=swh.deposit.cli:main + [swh.cli.subcommands] + deposit=swh.deposit.cli:deposit \ No newline at end of file diff --git a/swh.deposit.egg-info/requires.txt b/swh.deposit.egg-info/requires.txt index dc10ec30..95fe0b9d 100644 --- a/swh.deposit.egg-info/requires.txt +++ b/swh.deposit.egg-info/requires.txt @@ -1,18 +1,27 @@ vcversioner click xmltodict iso8601 -swh.core>=0.0.36 +requests +swh.core>=0.0.60 [server] Django<2.0 djangorestframework +swh.core[http] swh.loader.tar>=0.0.39 swh.loader.core>=0.0.32 swh.scheduler>=0.0.39 swh.model>=0.0.26 [testing] pytest<4 pytest-django swh.scheduler[testing] +Django<2.0 +djangorestframework +swh.core[http] +swh.loader.tar>=0.0.39 +swh.loader.core>=0.0.32 +swh.scheduler>=0.0.39 +swh.model>=0.0.26 diff --git a/swh/deposit/cli/__init__.py b/swh/deposit/cli/__init__.py index 5a209626..8d994985 100644 --- a/swh/deposit/cli/__init__.py +++ b/swh/deposit/cli/__init__.py @@ -1,37 +1,37 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import click import logging -logger = logging.getLogger(__name__) - +from swh.core.cli import CONTEXT_SETTINGS -CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help']) +logger = logging.getLogger(__name__) @click.group(context_settings=CONTEXT_SETTINGS) -@click.option('--log-level', '-l', default='INFO', - type=click.Choice(logging._nameToLevel.keys()), - help="Log level (default to INFO)") @click.pass_context -def cli(ctx, log_level): - logger.setLevel(log_level) +def deposit(ctx): + """Deposit main command + """ + logger.debug('deposit') ctx.ensure_object(dict) def main(): logging.basicConfig() - from . import deposit # noqa - try: - from . import admin # noqa - except ImportError: # server part is optional - pass - - return cli(auto_envvar_prefix='SWH_DEPOSIT') + return deposit(auto_envvar_prefix='SWH_DEPOSIT') + +# These import statements MUST be executed after defining the 'deposit' group +# since the subcommands in these are defined using this 'deposit' group. +from . import client # noqa +try: + from . import admin # noqa +except ImportError: # server part is optional + logger.debug('admin subcommand not loaded') if __name__ == '__main__': main() diff --git a/swh/deposit/cli/admin.py b/swh/deposit/cli/admin.py index 9b918335..fb3017f7 100644 --- a/swh/deposit/cli/admin.py +++ b/swh/deposit/cli/admin.py @@ -1,178 +1,254 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import click from swh.deposit.config import setup_django_for -from swh.deposit.cli import cli +from swh.deposit.cli import deposit -@cli.group('admin') +@deposit.group('admin') @click.option('--config-file', '-C', default=None, type=click.Path(exists=True, dir_okay=False,), help="Optional extra configuration file.") @click.option('--platform', default='development', type=click.Choice(['development', 'production']), help='development or production platform') @click.pass_context def admin(ctx, config_file, platform): """Server administration tasks (manipulate user or collections)""" # configuration happens here setup_django_for(platform, config_file=config_file) @admin.group('user') @click.pass_context def user(ctx): """Manipulate user.""" # configuration happens here pass def _create_collection(name): """Create the collection with name if it does not exist. Args: name (str): collection's name Returns: collection (DepositCollection): the existing collection object (created or not) """ # to avoid loading too early django namespaces from swh.deposit.models import DepositCollection try: collection = DepositCollection.objects.get(name=name) click.echo('Collection %s exists, nothing to do.' % name) except DepositCollection.DoesNotExist: click.echo('Create new collection %s' % name) collection = DepositCollection.objects.create(name=name) click.echo('Collection %s created' % name) return collection @user.command('create') @click.option('--username', required=True, help="User's name") @click.option('--password', required=True, help="Desired user's password (plain).") @click.option('--firstname', default='', help="User's first name") @click.option('--lastname', default='', help="User's last name") @click.option('--email', default='', help="User's email") @click.option('--collection', help="User's collection") @click.option('--provider-url', default='', help="Provider URL") @click.option('--domain', help="The domain") @click.pass_context def user_create(ctx, username, password, firstname, lastname, email, collection, provider_url, domain): """Create a user with some needed information (password, collection) If the collection does not exist, the collection is then created alongside. The password is stored encrypted using django's utilies. """ # to avoid loading too early django namespaces from swh.deposit.models import DepositClient # If collection is not provided, fallback to username if not collection: collection = username click.echo('collection: %s' % collection) # create the collection if it does not exist collection = _create_collection(collection) # user create/update try: user = DepositClient.objects.get(username=username) click.echo('User %s exists, updating information.' % user) user.set_password(password) except DepositClient.DoesNotExist: click.echo('Create new user %s' % username) user = DepositClient.objects.create_user( username=username, password=password) user.collections = [collection.id] user.first_name = firstname user.last_name = lastname user.email = email user.is_active = True user.provider_url = provider_url user.domain = domain user.save() click.echo('Information registered for user %s' % user) @user.command('list') @click.pass_context def user_list(ctx): """List existing users. This entrypoint is not paginated yet as there is not a lot of entry. """ # to avoid loading too early django namespaces from swh.deposit.models import DepositClient users = DepositClient.objects.all() if not users: output = 'Empty user list' else: output = '\n'.join((user.username for user in users)) click.echo(output) @user.command('exists') @click.argument('username', required=True) @click.pass_context def user_exists(ctx, username): """Check if user exists. """ # to avoid loading too early django namespaces from swh.deposit.models import DepositClient try: DepositClient.objects.get(username=username) click.echo('User %s exists.' % username) ctx.exit(0) except DepositClient.DoesNotExist: - click.echo('User %s does not exists.' % username) + click.echo('User %s does not exist.' % username) ctx.exit(1) @admin.group('collection') @click.pass_context def collection(ctx): """Manipulate collections.""" pass @collection.command('create') @click.option('--name', required=True, help="Collection's name") @click.pass_context def collection_create(ctx, name): _create_collection(name) @collection.command('list') @click.pass_context def collection_list(ctx): """List existing collections. This entrypoint is not paginated yet as there is not a lot of entry. """ # to avoid loading too early django namespaces from swh.deposit.models import DepositCollection collections = DepositCollection.objects.all() if not collections: output = 'Empty collection list' else: output = '\n'.join((col.name for col in collections)) click.echo(output) + + +@admin.group('deposit') +@click.pass_context +def deposit(ctx): + """Manipulate deposit.""" + pass + + +@deposit.command('reschedule') +@click.option('--deposit-id', required=True, help="Deposit identifier") +@click.pass_context +def deposit_reschedule(ctx, deposit_id): + """Reschedule the deposit loading + + This will: + + - check the deposit's status to something reasonable (failed or done). That + means that the checks have passed alright but something went wrong during + the loading (failed: loading failed, done: loading ok, still for some + reasons as in bugs, we need to reschedule it) + + - reset the deposit's status to 'verified' (prior to any loading but after + the checks which are fine) and removes the different archives' + identifiers (swh-id, ...) + + - trigger back the loading task through the scheduler + + """ + # to avoid loading too early django namespaces + from datetime import datetime + from swh.deposit.models import Deposit + from swh.deposit.config import ( + DEPOSIT_STATUS_LOAD_SUCCESS, DEPOSIT_STATUS_LOAD_FAILURE, + DEPOSIT_STATUS_VERIFIED, SWHDefaultConfig, + ) + + try: + deposit = Deposit.objects.get(pk=deposit_id) + except Deposit.DoesNotExist: + click.echo('Deposit %s does not exist.' % deposit_id) + ctx.exit(1) + + # Check the deposit is in a reasonable state + accepted_statuses = [ + DEPOSIT_STATUS_LOAD_SUCCESS, DEPOSIT_STATUS_LOAD_FAILURE + ] + if deposit.status == DEPOSIT_STATUS_VERIFIED: + click.echo('Deposit %s\'s status already set for rescheduling.' % ( + deposit_id)) + ctx.exit(0) + + if deposit.status not in accepted_statuses: + click.echo('Deposit %s\'s status be one of %s.' % ( + deposit_id, ', '.join(accepted_statuses))) + ctx.exit(1) + + task_id = deposit.load_task_id + if not task_id: + click.echo('Deposit %s cannot be rescheduled. It misses the ' + 'associated task.' % deposit_id) + ctx.exit(1) + + # Reset the deposit's state + deposit.swh_id = None + deposit.swh_id_context = None + deposit.swh_anchor_id = None + deposit.swh_anchor_id_context = None + deposit.status = DEPOSIT_STATUS_VERIFIED + deposit.save() + + # Trigger back the deposit + scheduler = SWHDefaultConfig().scheduler + scheduler.set_status_tasks( + [task_id], status='next_run_not_scheduled', + next_run=datetime.now()) diff --git a/swh/deposit/cli/deposit.py b/swh/deposit/cli/client.py similarity index 96% rename from swh/deposit/cli/deposit.py rename to swh/deposit/cli/client.py index d8db2592..bb06d26a 100644 --- a/swh/deposit/cli/deposit.py +++ b/swh/deposit/cli/client.py @@ -1,353 +1,353 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os import logging import tempfile import uuid import click import xmltodict from swh.deposit.client import PublicApiDepositClient -from swh.deposit.cli import cli +from swh.deposit.cli import deposit logger = logging.getLogger(__name__) class InputError(ValueError): """Input script error """ pass def generate_slug(): """Generate a slug (sample purposes). """ return str(uuid.uuid4()) def generate_metadata_file(name, external_id, authors): """Generate a temporary metadata file with the minimum required metadata This generates a xml file in a temporary location and returns the path to that file. This is up to the client of that function to clean up the temporary file. Args: name (str): Software's name external_id (str): External identifier (slug) or generated one authors (List[str]): List of author names Returns: Filepath to the metadata generated file """ _, tmpfile = tempfile.mkstemp(prefix='swh.deposit.cli.') # generate a metadata file with the minimum required metadata codemetadata = { 'entry': { '@xmlns': "http://www.w3.org/2005/Atom", '@xmlns:codemeta': "https://doi.org/10.5063/SCHEMA/CODEMETA-2.0", 'codemeta:name': name, 'codemeta:identifier': external_id, 'codemeta:author': [{ 'codemeta:name': author_name } for author_name in authors], }, } logging.debug('Temporary file: %s', tmpfile) logging.debug('Metadata dict to generate as xml: %s', codemetadata) s = xmltodict.unparse(codemetadata, pretty=True) logging.debug('Metadata dict as xml generated: %s', s) with open(tmpfile, 'w') as fp: fp.write(s) return tmpfile def _cleanup_tempfile(config): """Clean up the temporary metadata file generated. Args: config (Dict): A configuration dict with 2 important keys for that routine, 'cleanup_tempfile' (bool) and 'metadata' (path to eventually clean up) """ if config['cleanup_tempfile']: path = config['metadata'] if os.path.exists(path): os.unlink(path) def client_command_parse_input( username, password, archive, metadata, archive_deposit, metadata_deposit, collection, slug, partial, deposit_id, replace, url, status, name, authors): """Parse the client subcommand options and make sure the combination is acceptable*. If not, an InputError exception is raised explaining the issue. By acceptable, we mean: - A multipart deposit (create or update) needs both an existing software archive and an existing metadata file - A binary deposit (create/update) needs an existing software archive - A metadata deposit (create/update) needs an existing metadata file - A deposit update needs a deposit_id to be provided This won't prevent all failure cases though. The remaining errors are already dealt with the underlying api client. Raises: InputError explaining the issue Returns: dict with the following keys: 'archive': the software archive to deposit 'username': username 'password': associated password 'metadata': the metadata file to deposit 'collection': the username's associated client 'slug': the slug or external id identifying the deposit to make 'partial': if the deposit is partial or not 'client': instantiated class 'url': deposit's server main entry point 'deposit_type': deposit's type (binary, multipart, metadata) 'deposit_id': optional deposit identifier """ cleanup_tempfile = False try: if status and not deposit_id: raise InputError("Deposit id must be provided for status check") if status and deposit_id: # status is higher priority over deposit archive_deposit = False metadata_deposit = False archive = None metadata = None if archive_deposit and metadata_deposit: # too many flags use, remove redundant ones (-> multipart deposit) archive_deposit = False metadata_deposit = False if archive and not os.path.exists(archive): raise InputError('Software Archive %s must exist!' % archive) if not slug: # generate one as this is mandatory slug = generate_slug() if archive and not metadata: # we need to have the metadata if name and authors: metadata = generate_metadata_file(name, slug, authors) cleanup_tempfile = True else: raise InputError('Either metadata deposit file or (`--name` ' ' and `--author`) fields must be provided') if metadata_deposit: archive = None if archive_deposit: metadata = None if metadata_deposit and not metadata: raise InputError( "Metadata deposit filepath must be provided for metadata " "deposit") if metadata and not os.path.exists(metadata): raise InputError('Software Archive metadata %s must exist!' % ( metadata, )) if not status and not archive and not metadata: raise InputError( 'Please provide an actionable command. See --help for more ' 'information.') if replace and not deposit_id: raise InputError( 'To update an existing deposit, you must provide its id') client = PublicApiDepositClient({ 'url': url, 'auth': { 'username': username, 'password': password }, }) if not collection: # retrieve user's collection sd_content = client.service_document() if 'error' in sd_content: raise InputError('Service document retrieval: %s' % ( sd_content['error'], )) collection = sd_content[ 'service']['workspace']['collection']['sword:name'] return { 'archive': archive, 'username': username, 'password': password, 'metadata': metadata, 'cleanup_tempfile': cleanup_tempfile, 'collection': collection, 'slug': slug, 'in_progress': partial, 'client': client, 'url': url, 'deposit_id': deposit_id, 'replace': replace, } except Exception: # to be clean, cleanup prior to raise _cleanup_tempfile({ 'cleanup_tempfile': cleanup_tempfile, 'metadata': metadata }) raise def _subdict(d, keys): 'return a dict from d with only given keys' return {k: v for k, v in d.items() if k in keys} def deposit_status(config, logger): logger.debug('Status deposit') keys = ('collection', 'deposit_id') client = config['client'] return client.deposit_status( **_subdict(config, keys)) def deposit_create(config, logger): """Delegate the actual deposit to the deposit client. """ logger.debug('Create deposit') client = config['client'] keys = ('collection', 'archive', 'metadata', 'slug', 'in_progress') return client.deposit_create( **_subdict(config, keys)) def deposit_update(config, logger): """Delegate the actual deposit to the deposit client. """ logger.debug('Update deposit') client = config['client'] keys = ('collection', 'deposit_id', 'archive', 'metadata', 'slug', 'in_progress', 'replace') return client.deposit_update( **_subdict(config, keys)) -@cli.command() +@deposit.command() @click.option('--username', required=1, help="(Mandatory) User's name") @click.option('--password', required=1, help="(Mandatory) User's associated password") @click.option('--archive', help='(Optional) Software archive to deposit') @click.option('--metadata', help="(Optional) Path to xml metadata file. If not provided, this will use a file named .metadata.xml") # noqa @click.option('--archive-deposit/--no-archive-deposit', default=False, help='(Optional) Software archive only deposit') @click.option('--metadata-deposit/--no-metadata-deposit', default=False, help='(Optional) Metadata only deposit') @click.option('--collection', help="(Optional) User's collection. If not provided, this will be fetched.") # noqa @click.option('--slug', help="""(Optional) External system information identifier. If not provided, it will be generated""") # noqa @click.option('--partial/--no-partial', default=False, help='(Optional) The deposit will be partial, other deposits will have to take place to finalize it.') # noqa @click.option('--deposit-id', default=None, help='(Optional) Update an existing partial deposit with its identifier') # noqa @click.option('--replace/--no-replace', default=False, help='(Optional) Update by replacing existing metadata to a deposit') # noqa @click.option('--url', default='https://deposit.softwareheritage.org/1', help="(Optional) Deposit server api endpoint. By default, https://deposit.softwareheritage.org/1") # noqa @click.option('--status/--no-status', default=False, help="(Optional) Deposit's status") @click.option('--verbose/--no-verbose', default=False, help='Verbose mode') @click.option('--name', help='Software name') @click.option('--author', multiple=True, help='Software author(s), this can be repeated as many times' ' as there are authors') @click.pass_context -def deposit(ctx, - username, password, archive=None, metadata=None, - archive_deposit=False, metadata_deposit=False, - collection=None, slug=None, partial=False, deposit_id=None, - replace=False, status=False, - url='https://deposit.softwareheritage.org/1', - verbose=False, name=None, author=None): +def upload(ctx, + username, password, archive=None, metadata=None, + archive_deposit=False, metadata_deposit=False, + collection=None, slug=None, partial=False, deposit_id=None, + replace=False, status=False, + url='https://deposit.softwareheritage.org/1', + verbose=False, name=None, author=None): """Software Heritage Public Deposit Client Create/Update deposit through the command line or access its status. More documentation can be found at https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html. """ config = {} try: logger.debug('Parsing cli options') config = client_command_parse_input( username, password, archive, metadata, archive_deposit, metadata_deposit, collection, slug, partial, deposit_id, replace, url, status, name, author) except InputError as e: msg = 'Problem during parsing options: %s' % e r = { 'error': msg, } logger.info(r) return 1 try: if verbose: logger.info("Parsed configuration: %s" % ( config, )) deposit_id = config['deposit_id'] if status and deposit_id: r = deposit_status(config, logger) elif not status and deposit_id: r = deposit_update(config, logger) elif not status and not deposit_id: r = deposit_create(config, logger) logger.info(r) finally: _cleanup_tempfile(config) diff --git a/swh/deposit/migrations/0016_auto_20190507_1408.py b/swh/deposit/migrations/0016_auto_20190507_1408.py new file mode 100644 index 00000000..40c50d4c --- /dev/null +++ b/swh/deposit/migrations/0016_auto_20190507_1408.py @@ -0,0 +1,25 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.18 on 2019-05-07 14:08 +from __future__ import unicode_literals + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('deposit', '0015_depositrequest_typemigration'), + ] + + operations = [ + migrations.AddField( + model_name='deposit', + name='check_task_id', + field=models.TextField(blank=True, null=True, verbose_name="Scheduler's associated checking task id"), + ), + migrations.AddField( + model_name='deposit', + name='load_task_id', + field=models.TextField(blank=True, null=True, verbose_name="Scheduler's associated loading task id"), + ), + ] diff --git a/swh/deposit/models.py b/swh/deposit/models.py index f169e447..5f188341 100644 --- a/swh/deposit/models.py +++ b/swh/deposit/models.py @@ -1,212 +1,220 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information # Generated from: # cd swh_deposit && \ # python3 -m manage inspectdb from django.contrib.postgres.fields import JSONField, ArrayField from django.contrib.auth.models import User, UserManager from django.db import models from django.utils.timezone import now from .config import ( DEPOSIT_STATUS_VERIFIED, DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_PARTIAL, DEPOSIT_STATUS_LOAD_SUCCESS, DEPOSIT_STATUS_LOAD_FAILURE, DEPOSIT_STATUS_REJECTED, ARCHIVE_TYPE, METADATA_TYPE ) class Dbversion(models.Model): """Db version """ version = models.IntegerField(primary_key=True) release = models.DateTimeField(default=now, null=True) description = models.TextField(blank=True, null=True) class Meta: db_table = 'dbversion' def __str__(self): return str({ 'version': self.version, 'release': self.release, 'description': self.description }) """Possible status""" DEPOSIT_STATUS = [ (DEPOSIT_STATUS_PARTIAL, DEPOSIT_STATUS_PARTIAL), ('expired', 'expired'), (DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_DEPOSITED), (DEPOSIT_STATUS_VERIFIED, DEPOSIT_STATUS_VERIFIED), (DEPOSIT_STATUS_REJECTED, DEPOSIT_STATUS_REJECTED), ('loading', 'loading'), (DEPOSIT_STATUS_LOAD_SUCCESS, DEPOSIT_STATUS_LOAD_SUCCESS), (DEPOSIT_STATUS_LOAD_FAILURE, DEPOSIT_STATUS_LOAD_FAILURE), ] """Possible status and the detailed meaning.""" DEPOSIT_STATUS_DETAIL = { DEPOSIT_STATUS_PARTIAL: 'Deposit is partially received. To finalize it, ' 'In-Progress header should be false', 'expired': 'Deposit has been there too long and is now ' 'deemed ready to be garbage collected', DEPOSIT_STATUS_DEPOSITED: 'Deposit is ready for additional checks ' '(tarball ok, metadata, etc...)', DEPOSIT_STATUS_VERIFIED: 'Deposit is fully received, checked, and ' 'ready for loading', DEPOSIT_STATUS_REJECTED: 'Deposit failed the checks', 'loading': "Loading is ongoing on swh's side", DEPOSIT_STATUS_LOAD_SUCCESS: 'The deposit has been successfully ' 'loaded into the Software Heritage archive', DEPOSIT_STATUS_LOAD_FAILURE: 'The deposit loading into the ' 'Software Heritage archive failed', } class DepositClient(User): """Deposit client """ collections = ArrayField(models.IntegerField(), null=True) objects = UserManager() provider_url = models.TextField(null=False) domain = models.TextField(null=False) class Meta: db_table = 'deposit_client' def __str__(self): return str({ 'id': self.id, 'collections': self.collections, 'username': super().username, 'domain': self.domain, 'provider_url': self.provider_url, }) class Deposit(models.Model): """Deposit reception table """ id = models.BigAutoField(primary_key=True) # First deposit reception date reception_date = models.DateTimeField(auto_now_add=True) # Date when the deposit is deemed complete and ready for loading complete_date = models.DateTimeField(null=True) # collection concerned by the deposit collection = models.ForeignKey( 'DepositCollection', models.DO_NOTHING) # Deposit's external identifier external_id = models.TextField() # Deposit client client = models.ForeignKey('DepositClient', models.DO_NOTHING) # SWH's loading result identifier swh_id = models.TextField(blank=True, null=True) swh_id_context = models.TextField(blank=True, null=True) swh_anchor_id = models.TextField(blank=True, null=True) swh_anchor_id_context = models.TextField(blank=True, null=True) # Deposit's status regarding loading status = models.TextField( choices=DEPOSIT_STATUS, default=DEPOSIT_STATUS_PARTIAL) status_detail = JSONField(null=True) # deposit can have one parent parent = models.ForeignKey('self', null=True) + check_task_id = models.TextField( + blank=True, null=True, + verbose_name="Scheduler's associated checking task id" + ) + load_task_id = models.TextField( + blank=True, null=True, + verbose_name="Scheduler's associated loading task id" + ) class Meta: db_table = 'deposit' def __str__(self): d = { 'id': self.id, 'reception_date': self.reception_date, 'collection': self.collection.name, 'external_id': self.external_id, 'client': self.client.username, 'status': self.status, } if self.status in (DEPOSIT_STATUS_REJECTED): d['status_detail'] = self.status_detail return str(d) def client_directory_path(instance, filename): """Callable to upload archive in MEDIA_ROOT/user_/ Args: instance (DepositRequest): DepositRequest concerned by the upload filename (str): Filename of the uploaded file Returns: A path to be prefixed by the MEDIA_ROOT to access physically to the file uploaded. """ return 'client_{0}/{1}'.format(instance.deposit.client.id, filename) REQUEST_TYPES = [(ARCHIVE_TYPE, ARCHIVE_TYPE), (METADATA_TYPE, METADATA_TYPE)] class DepositRequest(models.Model): """Deposit request associated to one deposit. """ id = models.BigAutoField(primary_key=True) # Deposit concerned by the request deposit = models.ForeignKey(Deposit, models.DO_NOTHING) date = models.DateTimeField(auto_now_add=True) # Deposit request information on the data to inject # this can be null when type is 'archive' metadata = JSONField(null=True) raw_metadata = models.TextField(null=True) # this can be null when type is 'metadata' archive = models.FileField(null=True, upload_to=client_directory_path) type = models.CharField(max_length=8, choices=REQUEST_TYPES, null=True) class Meta: db_table = 'deposit_request' def __str__(self): meta = None if self.metadata: from json import dumps meta = dumps(self.metadata) archive_name = None if self.archive: archive_name = self.archive.name return str({ 'id': self.id, 'deposit': self.deposit, 'metadata': meta, 'archive': archive_name }) class DepositCollection(models.Model): id = models.BigAutoField(primary_key=True) # Human readable name for the collection type e.g HAL, arXiv, etc... name = models.TextField() class Meta: db_table = 'deposit_collection' def __str__(self): return str({'id': self.id, 'name': self.name}) diff --git a/swh/deposit/signals.py b/swh/deposit/signals.py index 13a6739e..9c29c24b 100644 --- a/swh/deposit/signals.py +++ b/swh/deposit/signals.py @@ -1,83 +1,111 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """Module in charge of defining some uncoupled actions on deposit. Typically, checking that the archives deposited are ok are not directly testing in the request/answer to avoid too long computations. So this is done in the deposit_on_status_ready_for_check callback. """ from django.db.models.signals import post_save from django.dispatch import receiver from .models import Deposit from .config import SWHDefaultConfig, DEPOSIT_STATUS_VERIFIED from .config import DEPOSIT_STATUS_DEPOSITED +def schedule_task(scheduler, task): + """Schedule the task and return its identifier + + Args: + task (dict): Task to schedule + + Returns: + The task identifier + + """ + tasks = scheduler.create_tasks([task]) + if tasks: + created_task = tasks[0] + return created_task['id'] + + @receiver(post_save, sender=Deposit) def post_deposit_save(sender, instance, created, raw, using, update_fields, **kwargs): """When a deposit is saved, check for the deposit's status change and schedule actions accordingly. When the status passes to deposited, schedule checks. When the status pass to ready, schedule loading. Otherwise, do nothing. Args: sender (Deposit): The model class instance (Deposit): The actual instance being saved created (bool): True if a new record was created raw (bool): True if the model is saved exactly as presented (i.e. when loading a fixture). One should not query/modify other records in the database as the database might not be in a consistent state yet using: The database alias being used update_fields: The set of fields to update as passed to Model.save(), or None if update_fields wasn’t passed to save() """ default_config = SWHDefaultConfig() if not default_config.config['checks']: return if instance.status not in {DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_VERIFIED}: return from django.urls import reverse from swh.scheduler.utils import create_oneshot_task_dict args = [instance.collection.name, instance.id] - if instance.status == DEPOSIT_STATUS_DEPOSITED: - # schedule archive check + # In the following, we are checking the instance.*task_id are not already + # populated because the `instance.save()` call will also trigger a call to + # that very function. + + if (instance.status == DEPOSIT_STATUS_DEPOSITED and + not instance.check_task_id): + # schedule deposit's checks from swh.deposit.config import PRIVATE_CHECK_DEPOSIT check_url = reverse(PRIVATE_CHECK_DEPOSIT, args=args) task = create_oneshot_task_dict( 'swh-deposit-archive-checks', deposit_check_url=check_url) - else: # instance.status == DEPOSIT_STATUS_VERIFIED: - # schedule loading + check_task_id = schedule_task(default_config.scheduler, task) + instance.check_task_id = check_task_id + instance.save() + + elif (instance.status == DEPOSIT_STATUS_VERIFIED and + not instance.load_task_id): + # schedule deposit loading from swh.deposit.config import PRIVATE_GET_RAW_CONTENT from swh.deposit.config import PRIVATE_GET_DEPOSIT_METADATA from swh.deposit.config import PRIVATE_PUT_DEPOSIT archive_url = reverse(PRIVATE_GET_RAW_CONTENT, args=args) meta_url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=args) update_url = reverse(PRIVATE_PUT_DEPOSIT, args=args) task = create_oneshot_task_dict( 'swh-deposit-archive-loading', archive_url=archive_url, deposit_meta_url=meta_url, deposit_update_url=update_url) - default_config.scheduler.create_tasks([task]) + load_task_id = schedule_task(default_config.scheduler, task) + instance.load_task_id = load_task_id + instance.save() diff --git a/version.txt b/version.txt index f4fa03ef..4b0ffe1d 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -v0.0.68-0-gd075c56 \ No newline at end of file +v0.0.69-0-gf2f7ee2 \ No newline at end of file