diff --git a/PKG-INFO b/PKG-INFO index 9ae16c24..5e3d8fa8 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,35 +1,35 @@ Metadata-Version: 2.1 Name: swh.deposit -Version: 0.0.78 +Version: 0.0.79 Summary: Software Heritage Deposit Server Home-page: https://forge.softwareheritage.org/source/swh-deposit/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-deposit Description: # swh-deposit This is [Software Heritage](https://www.softwareheritage.org)'s [SWORD 2.0](http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html) Server implementation, as well as a simple client to upload deposits on the server. **S.W.O.R.D** (**S**imple **W**eb-Service **O**ffering **R**epository **D**eposit) is an interoperability standard for digital file deposit. This implementation will permit interaction between a client (a repository) and a server (SWH repository) to permit deposits of software source code archives and associated metadata. The documentation is at ./docs/README-specification.md Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Description-Content-Type: text/markdown Provides-Extra: testing Provides-Extra: server diff --git a/requirements-test.txt b/requirements-test.txt index b5e4fdab..2e64c384 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,6 +1,7 @@ pytest pytest-django +pytest-mock swh.scheduler[testing] pytest-postgresql >= 2.1.0 requests_mock -django-stubs +django-stubs < 1.3.0 diff --git a/swh.deposit.egg-info/PKG-INFO b/swh.deposit.egg-info/PKG-INFO index 9ae16c24..5e3d8fa8 100644 --- a/swh.deposit.egg-info/PKG-INFO +++ b/swh.deposit.egg-info/PKG-INFO @@ -1,35 +1,35 @@ Metadata-Version: 2.1 Name: swh.deposit -Version: 0.0.78 +Version: 0.0.79 Summary: Software Heritage Deposit Server Home-page: https://forge.softwareheritage.org/source/swh-deposit/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-deposit Description: # swh-deposit This is [Software Heritage](https://www.softwareheritage.org)'s [SWORD 2.0](http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html) Server implementation, as well as a simple client to upload deposits on the server. **S.W.O.R.D** (**S**imple **W**eb-Service **O**ffering **R**epository **D**eposit) is an interoperability standard for digital file deposit. This implementation will permit interaction between a client (a repository) and a server (SWH repository) to permit deposits of software source code archives and associated metadata. The documentation is at ./docs/README-specification.md Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Description-Content-Type: text/markdown Provides-Extra: testing Provides-Extra: server diff --git a/swh.deposit.egg-info/SOURCES.txt b/swh.deposit.egg-info/SOURCES.txt index 5ec1f4a2..27c9c6c4 100644 --- a/swh.deposit.egg-info/SOURCES.txt +++ b/swh.deposit.egg-info/SOURCES.txt @@ -1,153 +1,169 @@ MANIFEST.in Makefile README.md requirements-server.txt requirements-swh-server.txt requirements-swh.txt requirements-test.txt requirements.txt setup.py version.txt swh/__init__.py swh.deposit.egg-info/PKG-INFO swh.deposit.egg-info/SOURCES.txt swh.deposit.egg-info/dependency_links.txt swh.deposit.egg-info/entry_points.txt swh.deposit.egg-info/requires.txt swh.deposit.egg-info/top_level.txt swh/deposit/__init__.py swh/deposit/apps.py swh/deposit/auth.py swh/deposit/config.py swh/deposit/errors.py +swh/deposit/gunicorn_config.py swh/deposit/manage.py swh/deposit/models.py swh/deposit/parsers.py swh/deposit/py.typed swh/deposit/signals.py swh/deposit/urls.py swh/deposit/utils.py swh/deposit/api/__init__.py swh/deposit/api/common.py swh/deposit/api/converters.py swh/deposit/api/deposit.py swh/deposit/api/deposit_content.py swh/deposit/api/deposit_status.py swh/deposit/api/deposit_update.py swh/deposit/api/service_document.py swh/deposit/api/urls.py swh/deposit/api/private/__init__.py swh/deposit/api/private/deposit_check.py swh/deposit/api/private/deposit_list.py swh/deposit/api/private/deposit_read.py swh/deposit/api/private/deposit_update_status.py swh/deposit/api/private/urls.py swh/deposit/cli/__init__.py swh/deposit/cli/admin.py swh/deposit/cli/client.py swh/deposit/client/__init__.py swh/deposit/fixtures/__init__.py swh/deposit/fixtures/deposit_data.yaml swh/deposit/loader/__init__.py swh/deposit/loader/checker.py -swh/deposit/loader/loader.py swh/deposit/loader/tasks.py swh/deposit/migrations/0001_initial.py swh/deposit/migrations/0002_depositrequest_archive.py swh/deposit/migrations/0003_temporaryarchive.py swh/deposit/migrations/0004_delete_temporaryarchive.py swh/deposit/migrations/0005_auto_20171019_1436.py swh/deposit/migrations/0006_depositclient_url.py swh/deposit/migrations/0007_auto_20171129_1609.py swh/deposit/migrations/0008_auto_20171130_1513.py swh/deposit/migrations/0009_deposit_parent.py swh/deposit/migrations/0010_auto_20180110_0953.py swh/deposit/migrations/0011_auto_20180115_1510.py swh/deposit/migrations/0012_deposit_status_detail.py swh/deposit/migrations/0013_depositrequest_raw_metadata.py swh/deposit/migrations/0014_auto_20180720_1221.py swh/deposit/migrations/0015_depositrequest_typemigration.py swh/deposit/migrations/0016_auto_20190507_1408.py swh/deposit/migrations/0017_auto_20190925_0906.py swh/deposit/migrations/__init__.py swh/deposit/settings/__init__.py swh/deposit/settings/common.py swh/deposit/settings/development.py swh/deposit/settings/production.py swh/deposit/settings/testing.py swh/deposit/static/robots.txt swh/deposit/static/css/bootstrap-responsive.min.css swh/deposit/static/css/style.css swh/deposit/static/img/arrow-up-small.png swh/deposit/static/img/swh-logo-deposit.png swh/deposit/static/img/swh-logo-deposit.svg swh/deposit/static/img/icons/swh-logo-32x32.png swh/deposit/static/img/icons/swh-logo-deposit-180x180.png swh/deposit/static/img/icons/swh-logo-deposit-192x192.png swh/deposit/static/img/icons/swh-logo-deposit-270x270.png swh/deposit/templates/__init__.py +swh/deposit/templates/api.html swh/deposit/templates/homepage.html swh/deposit/templates/layout.html swh/deposit/templates/deposit/__init__.py swh/deposit/templates/deposit/content.xml swh/deposit/templates/deposit/deposit_receipt.xml swh/deposit/templates/deposit/error.xml swh/deposit/templates/deposit/service_document.xml swh/deposit/templates/deposit/status.xml swh/deposit/templates/rest_framework/api.html swh/deposit/tests/__init__.py swh/deposit/tests/common.py swh/deposit/tests/conftest.py swh/deposit/tests/test_common.py +swh/deposit/tests/test_gunicorn_config.py swh/deposit/tests/test_utils.py swh/deposit/tests/api/__init__.py swh/deposit/tests/api/conftest.py swh/deposit/tests/api/test_converters.py swh/deposit/tests/api/test_deposit.py swh/deposit/tests/api/test_deposit_atom.py swh/deposit/tests/api/test_deposit_binary.py swh/deposit/tests/api/test_deposit_delete.py swh/deposit/tests/api/test_deposit_list.py swh/deposit/tests/api/test_deposit_multipart.py swh/deposit/tests/api/test_deposit_private_check.py swh/deposit/tests/api/test_deposit_private_read_archive.py swh/deposit/tests/api/test_deposit_private_read_metadata.py swh/deposit/tests/api/test_deposit_private_update_status.py swh/deposit/tests/api/test_deposit_status.py swh/deposit/tests/api/test_deposit_update.py swh/deposit/tests/api/test_parser.py swh/deposit/tests/api/test_service_document.py swh/deposit/tests/api/data/atom/codemeta-sample.xml swh/deposit/tests/api/data/atom/entry-data-badly-formatted.xml swh/deposit/tests/api/data/atom/entry-data-deposit-binary.xml swh/deposit/tests/api/data/atom/entry-data-empty-body.xml swh/deposit/tests/api/data/atom/entry-data-ko.xml swh/deposit/tests/api/data/atom/entry-data-minimal.xml swh/deposit/tests/api/data/atom/entry-data-parsing-error-prone.xml swh/deposit/tests/api/data/atom/entry-data0.xml swh/deposit/tests/api/data/atom/entry-data1.xml swh/deposit/tests/api/data/atom/entry-data2.xml swh/deposit/tests/api/data/atom/entry-data3.xml swh/deposit/tests/api/data/atom/entry-update-in-place.xml swh/deposit/tests/api/data/atom/error-with-decimal.xml swh/deposit/tests/api/data/atom/metadata.xml swh/deposit/tests/api/data/atom/tei-sample.xml swh/deposit/tests/cli/__init__.py swh/deposit/tests/cli/test_client.py +swh/deposit/tests/cli/data/atom/codemeta-sample.xml +swh/deposit/tests/cli/data/atom/entry-data-badly-formatted.xml +swh/deposit/tests/cli/data/atom/entry-data-deposit-binary.xml +swh/deposit/tests/cli/data/atom/entry-data-empty-body.xml +swh/deposit/tests/cli/data/atom/entry-data-ko.xml +swh/deposit/tests/cli/data/atom/entry-data-minimal.xml +swh/deposit/tests/cli/data/atom/entry-data-parsing-error-prone.xml +swh/deposit/tests/cli/data/atom/entry-data0.xml +swh/deposit/tests/cli/data/atom/entry-data1.xml +swh/deposit/tests/cli/data/atom/entry-data2.xml +swh/deposit/tests/cli/data/atom/entry-data3.xml +swh/deposit/tests/cli/data/atom/entry-update-in-place.xml +swh/deposit/tests/cli/data/atom/error-with-decimal.xml +swh/deposit/tests/cli/data/atom/metadata.xml +swh/deposit/tests/cli/data/atom/tei-sample.xml swh/deposit/tests/loader/__init__.py swh/deposit/tests/loader/common.py swh/deposit/tests/loader/conftest.py swh/deposit/tests/loader/test_checker.py swh/deposit/tests/loader/test_client.py -swh/deposit/tests/loader/test_loader.py swh/deposit/tests/loader/test_tasks.py swh/deposit/tests/loader/data/http_example.org/hello.json swh/deposit/tests/loader/data/http_example.org/hello_you swh/deposit/tests/loader/data/https_deposit.softwareheritage.org/1_private_test_1_check swh/deposit/tests/loader/data/https_deposit.softwareheritage.org/1_private_test_2_check swh/deposit/tests/loader/data/https_deposit.softwareheritage.org/1_private_test_999_meta swh/deposit/tests/loader/data/https_deposit.softwareheritage.org/1_private_test_999_raw swh/deposit/tests/loader/data/https_deposit.softwareheritage.org/1_private_test_999_update swh/deposit/tests/loader/data/https_nowhere.org/1_private_test_1_check swh/deposit/tests/loader/data/https_nowhere.org/1_private_test_1_metadata swh/deposit/tests/loader/data/https_nowhere.org/1_private_test_1_raw \ No newline at end of file diff --git a/swh.deposit.egg-info/requires.txt b/swh.deposit.egg-info/requires.txt index c6857e04..79807d04 100644 --- a/swh.deposit.egg-info/requires.txt +++ b/swh.deposit.egg-info/requires.txt @@ -1,30 +1,31 @@ vcversioner click xmltodict iso8601 requests swh.core>=0.0.75 [server] Django<2.0 djangorestframework swh.core[http] swh.loader.tar>=0.0.39 swh.loader.core>=0.0.43 swh.scheduler>=0.0.39 swh.model>=0.0.26 [testing] pytest pytest-django +pytest-mock swh.scheduler[testing] pytest-postgresql>=2.1.0 requests_mock -django-stubs +django-stubs<1.3.0 Django<2.0 djangorestframework swh.core[http] swh.loader.tar>=0.0.39 swh.loader.core>=0.0.43 swh.scheduler>=0.0.39 swh.model>=0.0.26 diff --git a/swh/deposit/api/urls.py b/swh/deposit/api/urls.py index cc516092..b65f0be0 100644 --- a/swh/deposit/api/urls.py +++ b/swh/deposit/api/urls.py @@ -1,57 +1,64 @@ -# Copyright (C) 2017-2018 The Software Heritage developers +# Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -"""swh URL Configuration +"""SWH's deposit api URL Configuration """ from django.conf.urls import url +from django.shortcuts import render from ..config import EDIT_SE_IRI, EM_IRI, CONT_FILE_IRI from ..config import SD_IRI, COL_IRI, STATE_IRI from .deposit import SWHDeposit from .deposit_status import SWHDepositStatus from .deposit_update import SWHUpdateMetadataDeposit from .deposit_update import SWHUpdateArchiveDeposit from .deposit_content import SWHDepositContent from .service_document import SWHServiceDocument -urlpatterns = [ - # PUBLIC API +def api_view(req): + return render(req, "api.html") + + +# PUBLIC API +urlpatterns = [ + # simple view on the api + url(r'^$', api_view, name='api'), # SD IRI - Service Document IRI # -> GET url(r'^servicedocument/', SWHServiceDocument.as_view(), name=SD_IRI), # Col IRI - Collection IRI # -> POST url(r'^(?P[^/]+)/$', SWHDeposit.as_view(), name=COL_IRI), # EM IRI - Atom Edit Media IRI (update archive IRI) # -> PUT (update-in-place existing archive) # -> POST (add new archive) url(r'^(?P[^/]+)/(?P[^/]+)/media/$', SWHUpdateArchiveDeposit.as_view(), name=EM_IRI), # Edit IRI - Atom Entry Edit IRI (update metadata IRI) # SE IRI - Sword Edit IRI ;; possibly same as Edit IRI # -> PUT (update in place) # -> POST (add new metadata) url(r'^(?P[^/]+)/(?P[^/]+)/metadata/$', SWHUpdateMetadataDeposit.as_view(), name=EDIT_SE_IRI), # State IRI # -> GET url(r'^(?P[^/]+)/(?P[^/]+)/status/$', SWHDepositStatus.as_view(), name=STATE_IRI), # Cont/File IRI # -> GET url(r'^(?P[^/]+)/(?P[^/]+)/content/$', SWHDepositContent.as_view(), name=CONT_FILE_IRI), # specification is not clear about # FILE-IRI, we assume it's the same as # the CONT-IRI one ] diff --git a/swh/deposit/cli/client.py b/swh/deposit/cli/client.py index 2962f636..8cc0f609 100644 --- a/swh/deposit/cli/client.py +++ b/swh/deposit/cli/client.py @@ -1,394 +1,381 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os import logging +import sys import tempfile import uuid import click import xmltodict from swh.deposit.client import PublicApiDepositClient from swh.deposit.cli import deposit logger = logging.getLogger(__name__) class InputError(ValueError): """Input script error """ pass def generate_slug(): """Generate a slug (sample purposes). """ return str(uuid.uuid4()) def _url(url): """Force the /1 api version at the end of the url (avoiding confusing issues without it). Args: url (str): api url used by cli users Returns: Top level api url to actually request """ if not url.endswith('/1'): url = '%s/1' % url return url -def generate_metadata_file(name, external_id, authors): +def generate_metadata_file(name, external_id, authors, temp_dir): """Generate a temporary metadata file with the minimum required metadata This generates a xml file in a temporary location and returns the path to that file. This is up to the client of that function to clean up the temporary file. Args: name (str): Software's name external_id (str): External identifier (slug) or generated one authors (List[str]): List of author names Returns: Filepath to the metadata generated file """ - _, tmpfile = tempfile.mkstemp(prefix='swh.deposit.cli.') - + path = os.path.join(temp_dir, 'metadata.xml') # generate a metadata file with the minimum required metadata codemetadata = { 'entry': { '@xmlns': "http://www.w3.org/2005/Atom", '@xmlns:codemeta': "https://doi.org/10.5063/SCHEMA/CODEMETA-2.0", 'codemeta:name': name, 'codemeta:identifier': external_id, 'codemeta:author': [{ 'codemeta:name': author_name } for author_name in authors], }, } - logging.debug('Temporary file: %s', tmpfile) + logging.debug('Temporary file: %s', path) logging.debug('Metadata dict to generate as xml: %s', codemetadata) s = xmltodict.unparse(codemetadata, pretty=True) logging.debug('Metadata dict as xml generated: %s', s) - with open(tmpfile, 'w') as fp: + with open(path, 'w') as fp: fp.write(s) - return tmpfile - - -def _cleanup_tempfile(config): - """Clean up the temporary metadata file generated. - - Args: - - config (Dict): A configuration dict with 2 important keys for - that routine, 'cleanup_tempfile' (bool) and 'metadata' (path - to eventually clean up) - - """ - if config['cleanup_tempfile']: - path = config['metadata'] - if os.path.exists(path): - os.unlink(path) + return path def _client(url, username, password): """Instantiate a client to access the deposit api server Args: url (str): Deposit api server username (str): User password (str): User's password """ client = PublicApiDepositClient({ 'url': url, 'auth': { 'username': username, 'password': password }, }) return client def _collection(client): """Retrieve the client's collection """ # retrieve user's collection sd_content = client.service_document() if 'error' in sd_content: raise InputError('Service document retrieval: %s' % ( sd_content['error'], )) collection = sd_content[ 'service']['workspace']['collection']['sword:name'] return collection def client_command_parse_input( username, password, archive, metadata, archive_deposit, metadata_deposit, collection, slug, partial, deposit_id, replace, - url, name, authors): + url, name, authors, temp_dir): """Parse the client subcommand options and make sure the combination is acceptable*. If not, an InputError exception is raised explaining the issue. By acceptable, we mean: - A multipart deposit (create or update) requires: - an existing software archive - an existing metadata file or author(s) and name provided in params - A binary deposit (create/update) requires an existing software archive - A metadata deposit (create/update) requires an existing metadata file or author(s) and name provided in params - A deposit update requires a deposit_id This will not prevent all failure cases though. The remaining errors are already dealt with by the underlying api client. Raises: InputError explaining the issue Returns: dict with the following keys: 'archive': the software archive to deposit 'username': username 'password': associated password 'metadata': the metadata file to deposit 'collection': the username's associated client 'slug': the slug or external id identifying the deposit to make 'partial': if the deposit is partial or not 'client': instantiated class 'url': deposit's server main entry point 'deposit_type': deposit's type (binary, multipart, metadata) 'deposit_id': optional deposit identifier """ - cleanup_tempfile = False - - try: - if archive_deposit and metadata_deposit: - # too many flags use, remove redundant ones (-> multipart deposit) - archive_deposit = False - metadata_deposit = False + if archive_deposit and metadata_deposit: + # too many flags use, remove redundant ones (-> multipart deposit) + archive_deposit = False + metadata_deposit = False + + if not slug: # generate one as this is mandatory + slug = generate_slug() + + if not metadata: + if name and authors: + metadata = generate_metadata_file(name, slug, authors, temp_dir) + elif not archive_deposit and not partial and not deposit_id: + # If we meet all the following conditions: + # * there is not an archive-only deposit + # * it is not part of a multipart deposit (either create/update + # or finish) + # * it misses either name or authors + raise InputError( + "Either a metadata file (--metadata) or both --author and " + "--name must be provided, unless this is an archive-only " + "deposit.") + elif name or authors: + # If we are generating metadata, then all mandatory metadata + # must be present + raise InputError( + "Either a metadata file (--metadata) or both --author and " + "--name must be provided.") + else: + # TODO: this is a multipart deposit, we might want to check that + # metadata are deposited at some point + pass + elif name or authors: + raise InputError( + "Using a metadata file (--metadata) is incompatible with " + "--author and --name, which are used to generate one.") - if not slug: # generate one as this is mandatory - slug = generate_slug() + if metadata_deposit: + archive = None - if not metadata and name and authors: - metadata = generate_metadata_file(name, slug, authors) - cleanup_tempfile = True + if archive_deposit: + metadata = None - if metadata_deposit: - archive = None + if metadata_deposit and not metadata: + raise InputError( + "Metadata deposit must be provided for metadata " + "deposit (either a filepath or --name and --author)") - if archive_deposit: - metadata = None + if not archive and not metadata and partial: + raise InputError( + 'Please provide an actionable command. See --help for more ' + 'information') - if metadata_deposit and not metadata: - raise InputError( - "Metadata deposit must be provided for metadata " - "deposit (either a filepath or --name and --author)") + if replace and not deposit_id: + raise InputError( + 'To update an existing deposit, you must provide its id') - if not archive and not metadata: - raise InputError( - 'Please provide an actionable command. See --help for more ' - 'information') + client = _client(url, username, password) - if replace and not deposit_id: - raise InputError( - 'To update an existing deposit, you must provide its id') - - client = _client(url, username, password) - - if not collection: - collection = _collection(client) + if not collection: + collection = _collection(client) - return { - 'archive': archive, - 'username': username, - 'password': password, - 'metadata': metadata, - 'cleanup_tempfile': cleanup_tempfile, - 'collection': collection, - 'slug': slug, - 'in_progress': partial, - 'client': client, - 'url': url, - 'deposit_id': deposit_id, - 'replace': replace, - } - except Exception: # to be clean, cleanup prior to raise - _cleanup_tempfile({ - 'cleanup_tempfile': cleanup_tempfile, - 'metadata': metadata - }) - raise + return { + 'archive': archive, + 'username': username, + 'password': password, + 'metadata': metadata, + 'collection': collection, + 'slug': slug, + 'in_progress': partial, + 'client': client, + 'url': url, + 'deposit_id': deposit_id, + 'replace': replace, + } def _subdict(d, keys): 'return a dict from d with only given keys' return {k: v for k, v in d.items() if k in keys} def deposit_create(config, logger): """Delegate the actual deposit to the deposit client. """ logger.debug('Create deposit') client = config['client'] keys = ('collection', 'archive', 'metadata', 'slug', 'in_progress') return client.deposit_create( **_subdict(config, keys)) def deposit_update(config, logger): """Delegate the actual deposit to the deposit client. """ logger.debug('Update deposit') client = config['client'] keys = ('collection', 'deposit_id', 'archive', 'metadata', 'slug', 'in_progress', 'replace') return client.deposit_update( **_subdict(config, keys)) @deposit.command() @click.option('--username', required=True, help="(Mandatory) User's name") @click.option('--password', required=True, help="(Mandatory) User's associated password") @click.option('--archive', type=click.Path(exists=True), help='(Optional) Software archive to deposit') @click.option('--metadata', type=click.Path(exists=True), help="(Optional) Path to xml metadata file. If not provided, this will use a file named .metadata.xml") # noqa @click.option('--archive-deposit/--no-archive-deposit', default=False, help='(Optional) Software archive only deposit') @click.option('--metadata-deposit/--no-metadata-deposit', default=False, help='(Optional) Metadata only deposit') @click.option('--collection', help="(Optional) User's collection. If not provided, this will be fetched.") # noqa @click.option('--slug', help="""(Optional) External system information identifier. If not provided, it will be generated""") # noqa @click.option('--partial/--no-partial', default=False, help='(Optional) The deposit will be partial, other deposits will have to take place to finalize it.') # noqa @click.option('--deposit-id', default=None, help='(Optional) Update an existing partial deposit with its identifier') # noqa @click.option('--replace/--no-replace', default=False, help='(Optional) Update by replacing existing metadata to a deposit') # noqa @click.option('--url', default='https://deposit.softwareheritage.org', help="(Optional) Deposit server api endpoint. By default, https://deposit.softwareheritage.org/1") # noqa @click.option('--verbose/--no-verbose', default=False, help='Verbose mode') @click.option('--name', help='Software name') @click.option('--author', multiple=True, help='Software author(s), this can be repeated as many times' ' as there are authors') @click.pass_context def upload(ctx, username, password, archive=None, metadata=None, archive_deposit=False, metadata_deposit=False, collection=None, slug=None, partial=False, deposit_id=None, replace=False, url='https://deposit.softwareheritage.org', verbose=False, name=None, author=None): """Software Heritage Public Deposit Client Create/Update deposit through the command line. More documentation can be found at https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html. """ url = _url(url) config = {} - try: - logger.debug('Parsing cli options') - config = client_command_parse_input( - username, password, archive, metadata, archive_deposit, - metadata_deposit, collection, slug, partial, deposit_id, - replace, url, name, author) - except InputError as e: - msg = 'Problem during parsing options: %s' % e - r = { - 'error': msg, - } - logger.info(r) - return 1 + with tempfile.TemporaryDirectory() as temp_dir: + try: + logger.debug('Parsing cli options') + config = client_command_parse_input( + username, password, archive, metadata, archive_deposit, + metadata_deposit, collection, slug, partial, deposit_id, + replace, url, name, author, temp_dir) + except InputError as e: + logger.error('Problem during parsing options: %s', e) + sys.exit(1) - try: if verbose: logger.info("Parsed configuration: %s" % ( config, )) deposit_id = config['deposit_id'] if deposit_id: r = deposit_update(config, logger) else: r = deposit_create(config, logger) logger.info(r) - finally: - _cleanup_tempfile(config) - @deposit.command() @click.option('--url', default='https://deposit.softwareheritage.org', help="(Optional) Deposit server api endpoint. By default, " "https://deposit.softwareheritage.org/1") @click.option('--username', required=True, help="(Mandatory) User's name") @click.option('--password', required=True, help="(Mandatory) User's associated password") @click.option('--deposit-id', default=None, required=True, help="Deposit identifier.") @click.pass_context def status(ctx, url, username, password, deposit_id): """Deposit's status """ url = _url(url) logger.debug('Status deposit') try: client = _client(url, username, password) collection = _collection(client) except InputError as e: - msg = 'Problem during parsing options: %s' % e - r = { - 'error': msg, - } - logger.info(r) - return 1 + logger.error('Problem during parsing options: %s', e) + sys.exit(1) r = client.deposit_status( collection=collection, deposit_id=deposit_id) logger.info(r) diff --git a/swh/deposit/gunicorn_config.py b/swh/deposit/gunicorn_config.py new file mode 100644 index 00000000..9c9a5b5c --- /dev/null +++ b/swh/deposit/gunicorn_config.py @@ -0,0 +1,14 @@ +# Copyright (C) 2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from sentry_sdk.integrations.django import DjangoIntegration + +from swh.core.api.gunicorn_config import * # noqa +from swh.core.api.gunicorn_config import post_fork as _post_fork + + +def post_fork(server, worker): # type: ignore + _post_fork(server, worker, + flask=False, sentry_integrations=[DjangoIntegration()]) diff --git a/swh/deposit/loader/loader.py b/swh/deposit/loader/loader.py deleted file mode 100644 index af12adf0..00000000 --- a/swh/deposit/loader/loader.py +++ /dev/null @@ -1,142 +0,0 @@ -# Copyright (C) 2017-2018 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -import os -import tempfile - -from swh.model import hashutil -from swh.loader.tar import loader -from swh.loader.core.loader import BufferedLoader - -from ..client import PrivateApiDepositClient - - -class DepositLoader(loader.LegacyLocalTarLoader): - """Deposit loader implementation. - - This is a subclass of the :class:TarLoader as the main goal of - this class is to first retrieve the deposit's tarball contents as - one and its associated metadata. Then provide said tarball to be - loaded by the TarLoader. - - This will: - - - retrieves the deposit's archive locally - - provide the archive to be loaded by the tar loader - - clean up the temporary location used to retrieve the archive locally - - update the deposit's status accordingly - - """ - CONFIG_BASE_FILENAME = 'loader/deposit' - - ADDITIONAL_CONFIG = { - 'extraction_dir': ('str', '/tmp/swh.deposit.loader/'), - } - - visit_type = 'deposit' - - def __init__(self, client=None): - super().__init__( - logging_class='swh.deposit.loader.loader.DepositLoader') - self.deposit_client = client if client else PrivateApiDepositClient() - - def load(self, *, archive_url, deposit_meta_url, deposit_update_url): - return BufferedLoader.load( - self, - archive_url=archive_url, - deposit_meta_url=deposit_meta_url, - deposit_update_url=deposit_update_url) - - def prepare_origin_visit(self, *, deposit_meta_url, **kwargs): - self.metadata = self.deposit_client.metadata_get( - deposit_meta_url) - self.origin = self.metadata['origin'] - self.visit_date = None - - def prepare(self, *, archive_url, deposit_meta_url, deposit_update_url): - """Prepare the loading by first retrieving the deposit's raw archive - content. - - """ - self.deposit_update_url = deposit_update_url - self.deposit_client.status_update(deposit_update_url, 'loading') - - temporary_directory = tempfile.TemporaryDirectory() - self.temporary_directory = temporary_directory - archive_path = os.path.join(temporary_directory.name, 'archive.zip') - archive = self.deposit_client.archive_get( - archive_url, archive_path) - - metadata = self.metadata - revision = metadata['revision'] - branch_name = metadata['branch_name'] - self.origin_metadata = metadata['origin_metadata'] - self.prepare_metadata() - - super().prepare(tar_path=archive, - origin=self.origin, - revision=revision, - branch_name=branch_name) - - def store_metadata(self): - """Storing the origin_metadata during the load processus. - - Provider_id and tool_id are resolved during the prepare() method. - - """ - visit_date = self.visit_date - provider_id = self.origin_metadata['provider']['provider_id'] - tool_id = self.origin_metadata['tool']['tool_id'] - metadata = self.origin_metadata['metadata'] - try: - self.send_origin_metadata(visit_date, provider_id, - tool_id, metadata) - except Exception: - self.log.exception('Problem when storing origin_metadata') - raise - - def post_load(self, success=True): - """Updating the deposit's status according to its loading status. - - If not successful, we update its status to 'failed'. - Otherwise, we update its status to 'done' and pass along its - associated revision. - - """ - try: - if not success: - self.deposit_client.status_update(self.deposit_update_url, - status='failed') - return - - revisions = self.objects['revision'] - # Retrieve the revision - [rev_id] = revisions.keys() - rev = revisions[rev_id] - if rev_id: - rev_id = hashutil.hash_to_hex(rev_id) - - dir_id = rev['directory'] - if dir_id: - dir_id = hashutil.hash_to_hex(dir_id) - - # update the deposit's status to success with its - # revision-id and directory-id - self.deposit_client.status_update( - self.deposit_update_url, - status='done', - revision_id=rev_id, - directory_id=dir_id, - origin_url=self.origin['url']) - except Exception: - self.log.exception( - 'Problem when trying to update the deposit\'s status') - - def cleanup(self): - """Clean up temporary directory where we retrieved the tarball. - - """ - super().cleanup() - self.temporary_directory.cleanup() diff --git a/swh/deposit/loader/tasks.py b/swh/deposit/loader/tasks.py index b075addb..7bc23248 100644 --- a/swh/deposit/loader/tasks.py +++ b/swh/deposit/loader/tasks.py @@ -1,38 +1,18 @@ # Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from celery import shared_task -from swh.deposit.loader.loader import DepositLoader from swh.deposit.loader.checker import DepositChecker -@shared_task(name=__name__ + '.LoadDepositArchiveTsk') -def load_deposit(archive_url, deposit_meta_url, deposit_update_url): - """Deposit archive loading task described by the following steps: - - 1. Retrieve tarball from deposit's private api and store - locally in a temporary directory - 2. Trigger the loading - 3. clean up the temporary directory - 4. Update the deposit's status according to result using the - deposit's private update status api - - """ - loader = DepositLoader() - return loader.load( - archive_url=archive_url, - deposit_meta_url=deposit_meta_url, - deposit_update_url=deposit_update_url) - - @shared_task(name=__name__ + '.ChecksDepositTsk') def check_deposit(deposit_check_url): """Check a deposit's status Args: see :func:`DepositChecker.check`. """ checker = DepositChecker() return checker.check(deposit_check_url) diff --git a/swh/deposit/signals.py b/swh/deposit/signals.py index d3396de7..fa43da00 100644 --- a/swh/deposit/signals.py +++ b/swh/deposit/signals.py @@ -1,119 +1,104 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """Module in charge of defining some uncoupled actions on deposit. Typically, checking that the archives deposited are ok are not directly testing in the request/answer to avoid too long computations. So this is done in the deposit_on_status_ready_for_check callback. """ from swh.deposit import utils from django.db.models.signals import post_save from django.dispatch import receiver from .models import Deposit from .config import SWHDefaultConfig, DEPOSIT_STATUS_VERIFIED from .config import DEPOSIT_STATUS_DEPOSITED def schedule_task(scheduler, task): """Schedule the task and return its identifier Args: task (dict): Task to schedule Returns: The task identifier """ tasks = scheduler.create_tasks([task]) if tasks: created_task = tasks[0] return created_task['id'] @receiver(post_save, sender=Deposit) def post_deposit_save(sender, instance, created, raw, using, update_fields, **kwargs): """When a deposit is saved, check for the deposit's status change and schedule actions accordingly. When the status passes to deposited, schedule checks. When the status pass to ready, schedule loading. Otherwise, do nothing. Args: sender (Deposit): The model class instance (Deposit): The actual instance being saved created (bool): True if a new record was created raw (bool): True if the model is saved exactly as presented (i.e. when loading a fixture). One should not query/modify other records in the database as the database might not be in a consistent state yet using: The database alias being used update_fields: The set of fields to update as passed to Model.save(), or None if update_fields wasn’t passed to save() """ default_config = SWHDefaultConfig() if not default_config.config['checks']: return if instance.status not in {DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_VERIFIED}: return from django.urls import reverse from swh.scheduler.utils import create_oneshot_task_dict args = [instance.collection.name, instance.id] # In the following, we are checking the instance.*task_id are not already # populated because the `instance.save()` call will also trigger a call to # that very function. if (instance.status == DEPOSIT_STATUS_DEPOSITED and not instance.check_task_id): # schedule deposit's checks from swh.deposit.config import PRIVATE_CHECK_DEPOSIT check_url = reverse(PRIVATE_CHECK_DEPOSIT, args=args) task = create_oneshot_task_dict('check-deposit', deposit_check_url=check_url) check_task_id = schedule_task(default_config.scheduler, task) instance.check_task_id = check_task_id instance.save() elif (instance.status == DEPOSIT_STATUS_VERIFIED and not instance.load_task_id): - version = default_config.config.get('loader-version', 1) - # schedule deposit loading - if int(version) == 1: # default version - from swh.deposit.config import PRIVATE_GET_RAW_CONTENT - from swh.deposit.config import PRIVATE_GET_DEPOSIT_METADATA - from swh.deposit.config import PRIVATE_PUT_DEPOSIT - archive_url = reverse(PRIVATE_GET_RAW_CONTENT, args=args) - meta_url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=args) - update_url = reverse(PRIVATE_PUT_DEPOSIT, args=args) - - task = create_oneshot_task_dict('load-deposit', - archive_url=archive_url, - deposit_meta_url=meta_url, - deposit_update_url=update_url) - else: # new version - url = utils.origin_url_from(instance) - task = create_oneshot_task_dict( - 'load-deposit', - url=url, deposit_id=instance.id) + url = utils.origin_url_from(instance) + task = create_oneshot_task_dict( + 'load-deposit', + url=url, deposit_id=instance.id) load_task_id = schedule_task(default_config.scheduler, task) instance.load_task_id = load_task_id instance.save() diff --git a/swh/deposit/templates/api.html b/swh/deposit/templates/api.html new file mode 100644 index 00000000..77dd1773 --- /dev/null +++ b/swh/deposit/templates/api.html @@ -0,0 +1,17 @@ +{% extends "layout.html" %} +{% load static %} +{% block title %}The Software Heritage Deposit Api{% endblock %} + +{% block content %} + +

Documentation

+ +

+

+

+ +{% endblock %} diff --git a/swh/deposit/templates/homepage.html b/swh/deposit/templates/homepage.html index 7d6c3316..063966b1 100644 --- a/swh/deposit/templates/homepage.html +++ b/swh/deposit/templates/homepage.html @@ -1,45 +1,42 @@ {% extends "layout.html" %} {% load static %} {% block title %}The Software Heritage Deposit{% endblock %} {% block content %}

Welcome to the Software Heritage deposit

Overview

The long term goal of the Software Heritage initiative is to collect all publicly available software in source code form together with its development history, replicate it massively to ensure its preservation, and share it with everyone who needs it.

What's the deposit?

This is Software Heritage's SWORD 2.0 Server implementation. S.W.O.R.D (Simple Web-Service Offering Repository Deposit) is an interoperability standard for digital file deposit. -This implementation will permit interaction between a client (a -repository, e.g. hal) and a server -(SWH repository) to -permit deposits of software source code archives with associated -metadata. - +This implementation allows a client (a repository, e.g. hal) to deposit +software source archives and its associated metadata to the (SWH archive).

Documentation

{% endblock %} diff --git a/swh/deposit/tests/__init__.py b/swh/deposit/tests/__init__.py index f8ec3db7..2b34b26e 100644 --- a/swh/deposit/tests/__init__.py +++ b/swh/deposit/tests/__init__.py @@ -1,73 +1,42 @@ -# Copyright (C) 2017 The Software Heritage developers +# Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.deposit.config import setup_django_for from swh.deposit.config import SWHDefaultConfig # noqa -from swh.loader.core.loader import BufferedLoader - TEST_CONFIG = { 'max_upload_size': 500, 'extraction_dir': '/tmp/swh-deposit/test/extraction-dir', 'checks': False, 'provider': { 'provider_name': '', 'provider_type': 'deposit_client', 'provider_url': '', 'metadata': { } }, 'tool': { 'name': 'swh-deposit', 'version': '0.0.1', 'configuration': { 'sword_version': '2' } } } def parse_deposit_config_file(base_filename=None, config_filename=None, additional_configs=None, global_config=True): return TEST_CONFIG -TEST_LOADER_CONFIG = { - 'extraction_dir': '/tmp/swh-loader-tar/test/', - 'working_dir': '/tmp/swh-loader-tar/test/working-dir', - 'debug': False, - 'storage': { - 'cls': 'memory', - 'args': { - } - }, - 'send_contents': True, - 'send_directories': True, - 'send_revisions': True, - 'send_releases': True, - 'send_snapshot': True, - - 'content_size_limit': 100 * 1024 * 1024, - 'content_packet_size': 10, - 'content_packet_size_bytes': 100 * 1024 * 1024, - 'directory_packet_size': 10, - 'revision_packet_size': 10, - 'release_packet_size': 10, -} - - -def parse_loader_config_file(base_filename=None, config_filename=None, - additional_configs=None, global_config=True): - return TEST_LOADER_CONFIG - - # monkey patch classes method permits to override, for tests purposes, # the default configuration without side-effect, i.e do not load the # configuration from disk SWHDefaultConfig.parse_config_file = parse_deposit_config_file # type: ignore -BufferedLoader.parse_config_file = parse_loader_config_file # type: ignore + setup_django_for('testing') diff --git a/swh/deposit/tests/api/conftest.py b/swh/deposit/tests/api/conftest.py index 20819c52..b348b291 100644 --- a/swh/deposit/tests/api/conftest.py +++ b/swh/deposit/tests/api/conftest.py @@ -1,108 +1,84 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import hashlib import pytest from django.urls import reverse -from os import path, listdir -from typing import Mapping from swh.deposit.config import ( DEPOSIT_STATUS_DEPOSITED, COL_IRI, DEPOSIT_STATUS_VERIFIED ) from swh.deposit.models import Deposit from swh.deposit.parsers import parse_xml from swh.deposit.api.private.deposit_check import SWHChecksDeposit -@pytest.fixture -def atom_dataset(datadir) -> Mapping[str, bytes]: - """Compute the paths to atom files. - - Returns: - Dict of atom name per content (bytes) - - """ - atom_path = path.join(datadir, 'atom') - data = {} - for filename in listdir(atom_path): - filepath = path.join(atom_path, filename) - with open(filepath, 'rb') as f: - raw_content = f.read() - - # Keep the filename without extension - atom_name = filename.split('.')[0] - data[atom_name] = raw_content - - return data - - @pytest.fixture def ready_deposit_ok(partial_deposit_with_metadata): """Returns a deposit ready for checks (it will pass the checks). """ deposit = partial_deposit_with_metadata deposit.status = DEPOSIT_STATUS_DEPOSITED deposit.save() return deposit @pytest.fixture def ready_deposit_verified(partial_deposit_with_metadata): """Returns a deposit ready for checks (it will pass the checks). """ deposit = partial_deposit_with_metadata deposit.status = DEPOSIT_STATUS_VERIFIED deposit.save() return deposit @pytest.fixture def ready_deposit_only_metadata(partial_deposit_only_metadata): """Deposit in status ready that will fail the checks (because missing archive). """ deposit = partial_deposit_only_metadata deposit.status = DEPOSIT_STATUS_DEPOSITED deposit.save() return deposit @pytest.fixture def ready_deposit_invalid_archive(authenticated_client, deposit_collection): url = reverse(COL_IRI, args=[deposit_collection.name]) data = b'some data which is clearly not a zip file' md5sum = hashlib.md5(data).hexdigest() # when response = authenticated_client.post( url, content_type='application/zip', # as zip data=data, # + headers CONTENT_LENGTH=len(data), # other headers needs HTTP_ prefix to be taken into account HTTP_SLUG='external-id-invalid', HTTP_CONTENT_MD5=md5sum, HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') response_content = parse_xml(response.content) deposit_id = int(response_content['deposit_id']) deposit = Deposit.objects.get(pk=deposit_id) deposit.status = DEPOSIT_STATUS_DEPOSITED deposit.save() return deposit @pytest.fixture def swh_checks_deposit(): return SWHChecksDeposit() diff --git a/swh/deposit/tests/cli/data/atom/codemeta-sample.xml b/swh/deposit/tests/cli/data/atom/codemeta-sample.xml new file mode 100644 index 00000000..d804eff5 --- /dev/null +++ b/swh/deposit/tests/cli/data/atom/codemeta-sample.xml @@ -0,0 +1,51 @@ + + + %s + hal-01587361 + https://hal.inria.fr/hal-01587361 + https://hal.inria.fr/hal-01587361/document + https://hal.inria.fr/hal-01587361/file/AffectationRO-v1.0.0.zip + doi:10.5281/zenodo.438684 + The assignment problem + AffectationRO + Gruenpeter, Morane + [INFO] Computer Science [cs] + [INFO.INFO-RO] Computer Science [cs]/Operations Research [cs.RO] + SOFTWARE + Project in OR: The assignment problemA java implementation for the assignment problem first release + description fr + 2015-06-01 + 2017-10-19 + en + + + url stable + Version sur hal + Version entre par lutilisateur + Mots-cls + Commentaire + Rfrence interne + + Collaboration/Projet + nom du projet + id + + Voir aussi + Financement + Projet ANR + Projet Europen + Platform/OS + Dpendances + Etat du dveloppement + + license + url spdx + + Outils de dveloppement- outil no1 + Outils de dveloppement- outil no2 + http://code.com + language 1 + language 2 + diff --git a/swh/deposit/tests/cli/data/atom/entry-data-badly-formatted.xml b/swh/deposit/tests/cli/data/atom/entry-data-badly-formatted.xml new file mode 100644 index 00000000..25a417fb --- /dev/null +++ b/swh/deposit/tests/cli/data/atom/entry-data-badly-formatted.xml @@ -0,0 +1,2 @@ + + diff --git a/swh/deposit/tests/cli/data/atom/entry-data-deposit-binary.xml b/swh/deposit/tests/cli/data/atom/entry-data-deposit-binary.xml new file mode 100644 index 00000000..65b7f63b --- /dev/null +++ b/swh/deposit/tests/cli/data/atom/entry-data-deposit-binary.xml @@ -0,0 +1,29 @@ + + + Title + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 2005-10-07T17:17:08Z + Contributor + The abstract + + + The abstract + Access Rights + Alternative Title + Date Available + Bibliographic Citation # noqa + Contributor + Description + Has Part + Has Version + Identifier + Is Part Of + Publisher + References + Rights Holder + Source + Title + Type + + diff --git a/swh/deposit/tests/cli/data/atom/entry-data-empty-body.xml b/swh/deposit/tests/cli/data/atom/entry-data-empty-body.xml new file mode 100644 index 00000000..e4caf44f --- /dev/null +++ b/swh/deposit/tests/cli/data/atom/entry-data-empty-body.xml @@ -0,0 +1,2 @@ + + diff --git a/swh/deposit/tests/cli/data/atom/entry-data-ko.xml b/swh/deposit/tests/cli/data/atom/entry-data-ko.xml new file mode 100644 index 00000000..3f5d8802 --- /dev/null +++ b/swh/deposit/tests/cli/data/atom/entry-data-ko.xml @@ -0,0 +1,6 @@ + + + + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + diff --git a/swh/deposit/tests/cli/data/atom/entry-data-minimal.xml b/swh/deposit/tests/cli/data/atom/entry-data-minimal.xml new file mode 100644 index 00000000..9432ac0e --- /dev/null +++ b/swh/deposit/tests/cli/data/atom/entry-data-minimal.xml @@ -0,0 +1,4 @@ + + + %s + diff --git a/swh/deposit/tests/cli/data/atom/entry-data-parsing-error-prone.xml b/swh/deposit/tests/cli/data/atom/entry-data-parsing-error-prone.xml new file mode 100644 index 00000000..34710195 --- /dev/null +++ b/swh/deposit/tests/cli/data/atom/entry-data-parsing-error-prone.xml @@ -0,0 +1,5 @@ + + + Composing a Web of Audio Applications + + diff --git a/swh/deposit/tests/cli/data/atom/entry-data0.xml b/swh/deposit/tests/cli/data/atom/entry-data0.xml new file mode 100644 index 00000000..2b0ccc00 --- /dev/null +++ b/swh/deposit/tests/cli/data/atom/entry-data0.xml @@ -0,0 +1,26 @@ + + + Awesome Compiler + hal + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + %s + 2017-10-07T15:17:08Z + some awesome author + something + awesome-compiler + This is an awesome compiler destined to +awesomely compile stuff +and other stuff + compiler,programming,language + 2005-10-07T17:17:08Z + 2005-10-07T17:17:08Z + release note + related link + + Awesome + https://hoster.org/awesome-compiler + GNU/Linux + 0.0.1 + running + all + diff --git a/swh/deposit/tests/cli/data/atom/entry-data1.xml b/swh/deposit/tests/cli/data/atom/entry-data1.xml new file mode 100644 index 00000000..e4f415c7 --- /dev/null +++ b/swh/deposit/tests/cli/data/atom/entry-data1.xml @@ -0,0 +1,24 @@ + + + hal + urn:uuid:2225c695-cfb8-4ebb-aaaa-80da344efa6a + 2017-10-07T15:17:08Z + some awesome author + something + awesome-compiler + This is an awesome compiler destined to +awesomely compile stuff +and other stuff + compiler,programming,language + 2005-10-07T17:17:08Z + 2005-10-07T17:17:08Z + release note + related link + + Awesome + https://hoster.org/awesome-compiler + GNU/Linux + 0.0.1 + running + all + diff --git a/swh/deposit/tests/cli/data/atom/entry-data2.xml b/swh/deposit/tests/cli/data/atom/entry-data2.xml new file mode 100644 index 00000000..73cfafeb --- /dev/null +++ b/swh/deposit/tests/cli/data/atom/entry-data2.xml @@ -0,0 +1,6 @@ + + + some-external-id + https://hal-test.archives-ouvertes.fr/some-external-id + some awesome author + diff --git a/swh/deposit/tests/cli/data/atom/entry-data3.xml b/swh/deposit/tests/cli/data/atom/entry-data3.xml new file mode 100644 index 00000000..c75d9739 --- /dev/null +++ b/swh/deposit/tests/cli/data/atom/entry-data3.xml @@ -0,0 +1,6 @@ + + + another one + no one + 2017-10-07T15:17:08Z + diff --git a/swh/deposit/tests/cli/data/atom/entry-update-in-place.xml b/swh/deposit/tests/cli/data/atom/entry-update-in-place.xml new file mode 100644 index 00000000..1a7d7bbb --- /dev/null +++ b/swh/deposit/tests/cli/data/atom/entry-update-in-place.xml @@ -0,0 +1,7 @@ + + + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa7b + Title + Type + diff --git a/swh/deposit/tests/cli/data/atom/error-with-decimal.xml b/swh/deposit/tests/cli/data/atom/error-with-decimal.xml new file mode 100644 index 00000000..be002442 --- /dev/null +++ b/swh/deposit/tests/cli/data/atom/error-with-decimal.xml @@ -0,0 +1,38 @@ + + + Composing a Web of Audio Applications + hal + hal-01243065 + hal-01243065 + https://hal-test.archives-ouvertes.fr/hal-01243065 + test + + + DSP programming,Web,Composability,Faust + 2017-05-03T16:08:47+02:00 + The Web offers a great opportunity to share, deploy and use programs without installation difficulties. In this article we explore the idea of freely combining/composing real-time audio applications deployed on the Web using Faust audio DSP language. + 1 + 10.4 + phpstorm + stable + + linux + php + python + C + + GNU General Public License v3.0 only + + + CeCILL Free Software License Agreement v1.1 + + + HAL + hal@ccsd.cnrs.fr + + + Someone Nice + someone@nice.fr + FFJ + + diff --git a/swh/deposit/tests/cli/data/atom/metadata.xml b/swh/deposit/tests/cli/data/atom/metadata.xml new file mode 100644 index 00000000..65f58543 --- /dev/null +++ b/swh/deposit/tests/cli/data/atom/metadata.xml @@ -0,0 +1,32 @@ + + + Composing a Web of Audio Applications + hal + hal-01243065 + hal-01243065 + https://hal-test.archives-ouvertes.fr/hal-01243065 + test + DSP programming + this is the description + 1 + phpstorm + stable + php + python + C + + GNU General Public License v3.0 only + + + CeCILL Free Software License Agreement v1.1 + + + HAL + hal@ccsd.cnrs.fr + + + Morane Gruenpeter + +%s + diff --git a/swh/deposit/tests/cli/data/atom/tei-sample.xml b/swh/deposit/tests/cli/data/atom/tei-sample.xml new file mode 100644 index 00000000..cf2266af --- /dev/null +++ b/swh/deposit/tests/cli/data/atom/tei-sample.xml @@ -0,0 +1 @@ +HAL TEI export of hal-01587083CCSDDistributed under a Creative Commons Attribution 4.0 International License

HAL API platform

questionnaire software metadataMoraneGruenpeter7de56c632362954fa84172cad80afe4einria.fr1556733MoraneGruenpeterf85a43a5fb4a2e0778a77e017f28c8fdgmail.com2017-09-29 11:21:322017-10-03 17:20:132017-10-03 17:20:132017-09-292017-09-29contributorMoraneGruenpeterf85a43a5fb4a2e0778a77e017f28c8fdgmail.comCCSDhal-01587083https://hal.inria.fr/hal-01587083gruenpeter:hal-0158708320172017questionnaire software metadataMoraneGruenpeter7de56c632362954fa84172cad80afe4einria.fr1556733EnglishComputer Science [cs]SoftwareIRILLInitiative pour la Recherche et l'Innovation sur le Logiciel Libre
https://www.irill.org/
Universite Pierre et Marie Curie - Paris 6UPMC
4 place Jussieu - 75005 Paris
http://www.upmc.fr/
Institut National de Recherche en Informatique et en AutomatiqueInria
Domaine de VoluceauRocquencourt - BP 10578153 Le Chesnay Cedex
http://www.inria.fr/en/
Universite Paris Diderot - Paris 7UPD7
5 rue Thomas-Mann - 75205 Paris cedex 13
http://www.univ-paris-diderot.fr
diff --git a/swh/deposit/tests/cli/test_client.py b/swh/deposit/tests/cli/test_client.py index 066f63d9..656ebf69 100644 --- a/swh/deposit/tests/cli/test_client.py +++ b/swh/deposit/tests/cli/test_client.py @@ -1,49 +1,323 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -import pytest - +import contextlib +import logging +import os +import re from unittest.mock import MagicMock +from click.testing import CliRunner +import pytest + from swh.deposit.client import PublicApiDepositClient -from swh.deposit.cli.client import _url, _client, _collection, InputError +from swh.deposit.cli.client import ( + generate_slug, _url, _client, _collection, InputError) +from swh.deposit.cli import deposit as cli +from ..conftest import TEST_USER + + +EXAMPLE_SERVICE_DOCUMENT = { + 'service': { + 'workspace': { + 'collection': { + 'sword:name': 'softcol', + } + } + } +} + + +@pytest.fixture +def slug(): + return generate_slug() + + +@pytest.fixture +def client_mock(mocker, slug): + mocker.patch('swh.deposit.cli.client.generate_slug', return_value=slug) + mock_client = MagicMock() + mocker.patch( + 'swh.deposit.cli.client._client', + return_value=mock_client) + mock_client.service_document.return_value = EXAMPLE_SERVICE_DOCUMENT + mock_client.deposit_create.return_value = '{"foo": "bar"}' + return mock_client def test_url(): assert _url('http://deposit') == 'http://deposit/1' assert _url('https://other/1') == 'https://other/1' def test_client(): client = _client('http://deposit', 'user', 'pass') assert isinstance(client, PublicApiDepositClient) def test_collection_error(): mock_client = MagicMock() mock_client.service_document.return_value = { 'error': 'something went wrong' } with pytest.raises(InputError) as e: _collection(mock_client) assert 'Service document retrieval: something went wrong' == str(e.value) def test_collection_ok(): mock_client = MagicMock() - mock_client.service_document.return_value = { - 'service': { - 'workspace': { - 'collection': { - 'sword:name': 'softcol', - } - } - } - } + mock_client.service_document.return_value = EXAMPLE_SERVICE_DOCUMENT collection_name = _collection(mock_client) assert collection_name == 'softcol' + + +def test_single_minimal_deposit( + sample_archive, mocker, caplog, client_mock, slug, tmp_path): + """ from: + https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html#single-deposit + """ # noqa + + metadata_path = os.path.join(tmp_path, 'metadata.xml') + mocker.patch('swh.deposit.cli.client.tempfile.TemporaryDirectory', + return_value=contextlib.nullcontext(str(tmp_path))) + + runner = CliRunner() + result = runner.invoke(cli, [ + 'upload', + '--url', 'mock://deposit.swh/1', + '--username', TEST_USER['username'], + '--password', TEST_USER['password'], + '--name', 'test-project', + '--archive', sample_archive['path'], + '--author', 'Jane Doe', + ]) + + assert result.exit_code == 0, result.output + assert result.output == '' + assert caplog.record_tuples == [ + ('swh.deposit.cli.client', logging.INFO, '{"foo": "bar"}'), + ] + + client_mock.deposit_create.assert_called_once_with( + archive=sample_archive['path'], + collection='softcol', in_progress=False, metadata=metadata_path, + slug=slug) + + with open(metadata_path) as fd: + assert fd.read() == f'''\ + + +\ttest-project +\t{slug} +\t +\t\tJane Doe +\t +''' + + +def test_metadata_validation(sample_archive, mocker, caplog, tmp_path): + """ from: + https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html#single-deposit + """ # noqa + slug = generate_slug() + mocker.patch('swh.deposit.cli.client.generate_slug', return_value=slug) + mock_client = MagicMock() + mocker.patch( + 'swh.deposit.cli.client._client', + return_value=mock_client) + mock_client.service_document.return_value = EXAMPLE_SERVICE_DOCUMENT + mock_client.deposit_create.return_value = '{"foo": "bar"}' + + metadata_path = os.path.join(tmp_path, 'metadata.xml') + mocker.patch('swh.deposit.cli.client.tempfile.TemporaryDirectory', + return_value=contextlib.nullcontext(str(tmp_path))) + with open(metadata_path, 'a'): + pass # creates the file + + runner = CliRunner() + + # Test missing author + result = runner.invoke(cli, [ + 'upload', + '--url', 'mock://deposit.swh/1', + '--username', TEST_USER['username'], + '--password', TEST_USER['password'], + '--name', 'test-project', + '--archive', sample_archive['path'], + ]) + + assert result.exit_code == 1, result.output + assert result.output == '' + assert len(caplog.record_tuples) == 1 + (_logger, level, message) = caplog.record_tuples[0] + assert level == logging.ERROR + assert ' --author ' in message + + # Clear mocking state + caplog.clear() + mock_client.reset_mock() + + # Test missing name + result = runner.invoke(cli, [ + 'upload', + '--url', 'mock://deposit.swh/1', + '--username', TEST_USER['username'], + '--password', TEST_USER['password'], + '--archive', sample_archive['path'], + '--author', 'Jane Doe', + ]) + + assert result.exit_code == 1, result.output + assert result.output == '' + assert len(caplog.record_tuples) == 1 + (_logger, level, message) = caplog.record_tuples[0] + assert level == logging.ERROR + assert ' --name ' in message + + # Clear mocking state + caplog.clear() + mock_client.reset_mock() + + # Test both --metadata and --author + result = runner.invoke(cli, [ + 'upload', + '--url', 'mock://deposit.swh/1', + '--username', TEST_USER['username'], + '--password', TEST_USER['password'], + '--archive', sample_archive['path'], + '--metadata', metadata_path, + '--author', 'Jane Doe', + ]) + + assert result.exit_code == 1, result.output + assert result.output == '' + assert len(caplog.record_tuples) == 1 + (_logger, level, message) = caplog.record_tuples[0] + assert level == logging.ERROR + assert re.search('--metadata.*is incompatible with', message) + + # Clear mocking state + caplog.clear() + mock_client.reset_mock() + + +def test_single_deposit_slug_generation( + sample_archive, mocker, caplog, tmp_path, client_mock): + """ from: + https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html#single-deposit + """ # noqa + slug = 'my-slug' + collection = 'my-collection' + + metadata_path = os.path.join(tmp_path, 'metadata.xml') + mocker.patch('swh.deposit.cli.client.tempfile.TemporaryDirectory', + return_value=contextlib.nullcontext(str(tmp_path))) + + runner = CliRunner() + result = runner.invoke(cli, [ + 'upload', + '--url', 'mock://deposit.swh/1', + '--username', TEST_USER['username'], + '--password', TEST_USER['password'], + '--name', 'test-project', + '--archive', sample_archive['path'], + '--slug', slug, + '--collection', collection, + '--author', 'Jane Doe', + ]) + + assert result.exit_code == 0, result.output + assert result.output == '' + assert caplog.record_tuples == [ + ('swh.deposit.cli.client', logging.INFO, '{"foo": "bar"}'), + ] + + client_mock.deposit_create.assert_called_once_with( + archive=sample_archive['path'], + collection=collection, in_progress=False, metadata=metadata_path, + slug=slug) + + with open(metadata_path) as fd: + assert fd.read() == '''\ + + +\ttest-project +\tmy-slug +\t +\t\tJane Doe +\t +''' + + +def test_multisteps_deposit( + sample_archive, atom_dataset, mocker, caplog, datadir, + client_mock, slug): + """ from: + https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html#multisteps-deposit + """ # noqa + slug = generate_slug() + mocker.patch('swh.deposit.cli.client.generate_slug', return_value=slug) + + # https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html#create-an-incomplete-deposit + client_mock.deposit_create.return_value = '{"deposit_id": "42"}' + + runner = CliRunner() + result = runner.invoke(cli, [ + 'upload', + '--url', 'mock://deposit.swh/1', + '--username', TEST_USER['username'], + '--password', TEST_USER['password'], + '--archive', sample_archive['path'], + '--partial', + ]) + + assert result.exit_code == 0, result.output + assert result.output == '' + assert caplog.record_tuples == [ + ('swh.deposit.cli.client', logging.INFO, '{"deposit_id": "42"}'), + ] + + client_mock.deposit_create.assert_called_once_with( + archive=sample_archive['path'], + collection='softcol', in_progress=True, metadata=None, + slug=slug) + + # Clear mocking state + caplog.clear() + client_mock.reset_mock() + + # https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html#add-content-or-metadata-to-the-deposit + + metadata_path = os.path.join( + datadir, 'atom', 'entry-data-deposit-binary.xml') + + result = runner.invoke(cli, [ + 'upload', + '--url', 'mock://deposit.swh/1', + '--username', TEST_USER['username'], + '--password', TEST_USER['password'], + '--metadata', metadata_path, + ]) + + assert result.exit_code == 0, result.output + assert result.output == '' + assert caplog.record_tuples == [ + ('swh.deposit.cli.client', logging.INFO, '{"deposit_id": "42"}'), + ] + + client_mock.deposit_create.assert_called_once_with( + archive=None, + collection='softcol', in_progress=False, metadata=metadata_path, + slug=slug) + + # Clear mocking state + caplog.clear() + client_mock.reset_mock() diff --git a/swh/deposit/tests/conftest.py b/swh/deposit/tests/conftest.py index 725bc20a..e95fad54 100644 --- a/swh/deposit/tests/conftest.py +++ b/swh/deposit/tests/conftest.py @@ -1,316 +1,338 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os import base64 import pytest import psycopg2 from django.urls import reverse from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT from rest_framework import status from rest_framework.test import APIClient from typing import Mapping from swh.scheduler.tests.conftest import * # noqa from swh.deposit.parsers import parse_xml from swh.deposit.config import ( COL_IRI, EDIT_SE_IRI, DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_REJECTED, DEPOSIT_STATUS_PARTIAL, DEPOSIT_STATUS_LOAD_SUCCESS, DEPOSIT_STATUS_VERIFIED, DEPOSIT_STATUS_LOAD_FAILURE ) from swh.deposit.tests.common import create_arborescence_archive TEST_USER = { 'username': 'test', 'password': 'password', 'email': 'test@example.org', 'provider_url': 'https://hal-test.archives-ouvertes.fr/', 'domain': 'archives-ouvertes.fr/', 'collection': { 'name': 'test' }, } def execute_sql(sql): """Execute sql to postgres db""" with psycopg2.connect(database='postgres') as conn: conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) cur = conn.cursor() cur.execute(sql) @pytest.hookimpl(tryfirst=True) def pytest_load_initial_conftests(early_config, parser, args): """This hook is done prior to django loading. Used to initialize the deposit's server db. """ import project.app.signals # type: ignore def prepare_db(*args, **kwargs): from django.conf import settings db_name = 'tests' # work around db settings for django for k, v in [ ('ENGINE', 'django.db.backends.postgresql'), ('NAME', 'tests'), ('USER', postgresql_proc.user), # noqa ('HOST', postgresql_proc.host), # noqa ('PORT', postgresql_proc.port), # noqa ]: settings.DATABASES['default'][k] = v execute_sql('DROP DATABASE IF EXISTS %s' % db_name) execute_sql('CREATE DATABASE %s TEMPLATE template0' % db_name) project.app.signals.something = prepare_db @pytest.fixture(autouse=True, scope='session') def swh_proxy(): """Automatically inject this fixture in all tests to ensure no outside connection takes place. """ os.environ['http_proxy'] = 'http://localhost:999' os.environ['https_proxy'] = 'http://localhost:999' def create_deposit_collection(collection_name: str): """Create a deposit collection with name collection_name """ from swh.deposit.models import DepositCollection try: collection = DepositCollection._default_manager.get( name=collection_name) except DepositCollection.DoesNotExist: collection = DepositCollection(name=collection_name) collection.save() return collection def deposit_collection_factory( collection_name=TEST_USER['collection']['name']): @pytest.fixture def _deposit_collection(db, collection_name=collection_name): return create_deposit_collection(collection_name) return _deposit_collection deposit_collection = deposit_collection_factory() deposit_another_collection = deposit_collection_factory('another-collection') @pytest.fixture def deposit_user(db, deposit_collection): """Create/Return the test_user "test" """ from swh.deposit.models import DepositClient try: user = DepositClient._default_manager.get( username=TEST_USER['username']) except DepositClient.DoesNotExist: user = DepositClient._default_manager.create_user( username=TEST_USER['username'], email=TEST_USER['email'], password=TEST_USER['password'], provider_url=TEST_USER['provider_url'], domain=TEST_USER['domain'], ) user.collections = [deposit_collection.id] user.save() return user @pytest.fixture def client(): """Override pytest-django one which does not work for djangorestframework. """ return APIClient() # <- drf's client @pytest.yield_fixture def authenticated_client(client, deposit_user): """Returned a logged client """ _token = '%s:%s' % (deposit_user.username, TEST_USER['password']) token = base64.b64encode(_token.encode('utf-8')) authorization = 'Basic %s' % token.decode('utf-8') client.credentials(HTTP_AUTHORIZATION=authorization) yield client client.logout() @pytest.fixture def sample_archive(tmp_path): """Returns a sample archive """ tmp_path = str(tmp_path) # pytest version limitation in previous version archive = create_arborescence_archive( tmp_path, 'archive1', 'file1', b'some content in file') return archive +@pytest.fixture +def atom_dataset(datadir) -> Mapping[str, bytes]: + """Compute the paths to atom files. + + Returns: + Dict of atom name per content (bytes) + + """ + atom_path = os.path.join(datadir, 'atom') + data = {} + for filename in os.listdir(atom_path): + filepath = os.path.join(atom_path, filename) + with open(filepath, 'rb') as f: + raw_content = f.read() + + # Keep the filename without extension + atom_name = filename.split('.')[0] + data[atom_name] = raw_content + + return data + + def create_deposit( authenticated_client, collection_name: str, sample_archive, external_id: str, deposit_status=DEPOSIT_STATUS_DEPOSITED): """Create a skeleton shell deposit """ url = reverse(COL_IRI, args=[collection_name]) # when response = authenticated_client.post( url, content_type='application/zip', # as zip data=sample_archive['data'], # + headers CONTENT_LENGTH=sample_archive['length'], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=sample_archive['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS='false', HTTP_CONTENT_DISPOSITION='attachment; filename=%s' % ( sample_archive['name'])) # then assert response.status_code == status.HTTP_201_CREATED from swh.deposit.models import Deposit deposit = Deposit._default_manager.get(external_id=external_id) if deposit.status != deposit_status: deposit.status = deposit_status deposit.save() assert deposit.status == deposit_status return deposit def create_binary_deposit( authenticated_client, collection_name: str, sample_archive, external_id: str, deposit_status: str = DEPOSIT_STATUS_DEPOSITED, atom_dataset: Mapping[str, bytes] = {}): """Create a deposit with both metadata and archive set. Then alters its status to `deposit_status`. """ deposit = create_deposit( authenticated_client, collection_name, sample_archive, external_id=external_id, deposit_status=DEPOSIT_STATUS_PARTIAL) response = authenticated_client.post( reverse(EDIT_SE_IRI, args=[collection_name, deposit.id]), content_type='application/atom+xml;type=entry', data=atom_dataset['entry-data0'] % deposit.external_id.encode('utf-8'), HTTP_SLUG=deposit.external_id, HTTP_IN_PROGRESS='true') assert response.status_code == status.HTTP_201_CREATED assert deposit.status == DEPOSIT_STATUS_PARTIAL from swh.deposit.models import Deposit deposit = Deposit._default_manager.get(pk=deposit.id) if deposit.status != deposit_status: deposit.status = deposit_status deposit.save() assert deposit.status == deposit_status return deposit def deposit_factory(deposit_status=DEPOSIT_STATUS_DEPOSITED): """Build deposit with a specific status """ @pytest.fixture() def _deposit(sample_archive, deposit_collection, authenticated_client, deposit_status=deposit_status): external_id = 'external-id-%s' % deposit_status return create_deposit( authenticated_client, deposit_collection.name, sample_archive, external_id=external_id, deposit_status=deposit_status ) return _deposit deposited_deposit = deposit_factory() rejected_deposit = deposit_factory(deposit_status=DEPOSIT_STATUS_REJECTED) partial_deposit = deposit_factory(deposit_status=DEPOSIT_STATUS_PARTIAL) verified_deposit = deposit_factory(deposit_status=DEPOSIT_STATUS_VERIFIED) completed_deposit = deposit_factory(deposit_status=DEPOSIT_STATUS_LOAD_SUCCESS) failed_deposit = deposit_factory(deposit_status=DEPOSIT_STATUS_LOAD_FAILURE) @pytest.fixture def partial_deposit_with_metadata( sample_archive, deposit_collection, authenticated_client, atom_dataset): """Returns deposit with archive and metadata provided, status 'partial' """ return create_binary_deposit( authenticated_client, deposit_collection.name, sample_archive, external_id='external-id-partial', deposit_status=DEPOSIT_STATUS_PARTIAL, atom_dataset=atom_dataset ) @pytest.fixture def partial_deposit_only_metadata( deposit_collection, authenticated_client, atom_dataset): response = authenticated_client.post( reverse(COL_IRI, args=[deposit_collection.name]), content_type='application/atom+xml;type=entry', data=atom_dataset['entry-data1'], HTTP_SLUG='external-id-partial', HTTP_IN_PROGRESS=True) assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(response.content) deposit_id = response_content['deposit_id'] from swh.deposit.models import Deposit deposit = Deposit._default_manager.get(pk=deposit_id) assert deposit.status == DEPOSIT_STATUS_PARTIAL return deposit @pytest.fixture def complete_deposit(sample_archive, deposit_collection, authenticated_client): """Returns a completed deposit (load success) """ deposit = create_deposit( authenticated_client, deposit_collection.name, sample_archive, external_id='external-id-complete', deposit_status=DEPOSIT_STATUS_LOAD_SUCCESS ) _swh_id_context = 'https://hal.archives-ouvertes.fr/hal-01727745' deposit.swh_id = 'swh:1:dir:42a13fc721c8716ff695d0d62fc851d641f3a12b' deposit.swh_id_context = '%s;%s' % ( deposit.swh_id, _swh_id_context) deposit.swh_anchor_id = \ 'swh:rev:1:548b3c0a2bb43e1fca191e24b5803ff6b3bc7c10' deposit.swh_anchor_id_context = '%s;%s' % ( deposit.swh_anchor_id, _swh_id_context) deposit.save() return deposit @pytest.fixture() def tmp_path(tmp_path): return str(tmp_path) # issue with oldstable's pytest version diff --git a/swh/deposit/tests/loader/conftest.py b/swh/deposit/tests/loader/conftest.py index cd5081f0..addb2eb5 100644 --- a/swh/deposit/tests/loader/conftest.py +++ b/swh/deposit/tests/loader/conftest.py @@ -1,67 +1,61 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import re import os import pytest import yaml from functools import partial from swh.core.pytest_plugin import get_response_cb from swh.scheduler.tests.conftest import * # noqa from swh.storage.tests.conftest import * # noqa from swh.deposit.loader.checker import DepositChecker -from swh.deposit.loader.loader import DepositLoader @pytest.fixture(scope='session') # type: ignore # expected redefinition def celery_includes(): return [ 'swh.deposit.loader.tasks', ] @pytest.fixture def swh_config(tmp_path, swh_storage_postgresql, monkeypatch): storage_config = { 'url': 'https://deposit.softwareheritage.org/', 'storage': { 'cls': 'local', 'args': { 'db': swh_storage_postgresql.dsn, 'objstorage': { 'cls': 'memory', 'args': {} }, }, }, } conffile = os.path.join(tmp_path, 'deposit.yml') with open(conffile, 'w') as f: f.write(yaml.dump(storage_config)) monkeypatch.setenv('SWH_CONFIG_FILENAME', conffile) return conffile @pytest.fixture def deposit_checker(swh_config): return DepositChecker() -@pytest.fixture -def deposit_loader(swh_config): - return DepositLoader() - - @pytest.fixture def requests_mock_datadir(datadir, requests_mock_datadir): """Override default behavior to deal with put method """ cb = partial(get_response_cb, datadir=datadir) requests_mock_datadir.put(re.compile('https://'), body=cb) return requests_mock_datadir diff --git a/swh/deposit/tests/loader/test_loader.py b/swh/deposit/tests/loader/test_loader.py deleted file mode 100644 index 3c6924df..00000000 --- a/swh/deposit/tests/loader/test_loader.py +++ /dev/null @@ -1,108 +0,0 @@ -# Copyright (C) 2017-2019 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -from swh.deposit.config import ( - PRIVATE_GET_RAW_CONTENT, PRIVATE_GET_DEPOSIT_METADATA, PRIVATE_PUT_DEPOSIT -) -from django.urls import reverse -from swh.model.hashutil import hash_to_bytes - -from .common import get_stats, check_snapshot - - -def test_inject_deposit_ready( - swh_config, requests_mock_datadir, datadir, deposit_loader): - """Load a deposit which is ready - - """ - args = ['test', 999] - archive_url = reverse(PRIVATE_GET_RAW_CONTENT, args=args) - deposit_meta_url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=args) - deposit_update_url = reverse(PRIVATE_PUT_DEPOSIT, args=args) - - # when - res = deposit_loader.load( - archive_url=archive_url, - deposit_meta_url=deposit_meta_url, - deposit_update_url=deposit_update_url) - - # then - assert res['status'] == 'eventful' - stats = get_stats(deposit_loader.storage) - - assert { - 'content': 303, - 'skipped_content': 0, - 'directory': 12, - 'origin': 1, - 'origin_visit': 1, - 'person': 1, - 'release': 0, - 'revision': 1, - 'snapshot': 1, - } == stats - - origin_url = 'https://hal-test.archives-ouvertes.fr/some-external-id' - rev_id = 'b1bef04d90ef3ba645df4c4f945748c173a4e9a2' - dir_id = 'bed9acbf2a4502499f659e65a2ab77096bd46a1d' - - expected_revision = { - 'author': { - 'name': b'Software Heritage', - 'fullname': b'Software Heritage', - 'email': b'robot@softwareheritage.org'}, - 'committer': { - 'name': b'Software Heritage', - 'fullname': b'Software Heritage', - 'email': b'robot@softwareheritage.org'}, - 'committer_date': { - 'negative_utc': 'false', - 'offset': 0, - 'timestamp': {'microseconds': 0, 'seconds': 1507389428}}, - 'date': { - 'negative_utc': 'false', - 'offset': 0, - 'timestamp': {'microseconds': 0, 'seconds': 1507389428}}, - 'message': b'test: Deposit 999 in collection test', - 'metadata': { - '@xmlns': ['http://www.w3.org/2005/Atom'], - 'author': ['some awesome author', 'another one', 'no one'], - 'codemeta:dateCreated': '2017-10-07T15:17:08Z', - 'external_identifier': 'some-external-id', - 'url': origin_url, - 'original_artifact': [ - { - 'name': 'archive.zip', - 'archive_type': 'tar', - 'length': 725946, - 'blake2s256': '04fffd328441d216c92492ad72d37388d8c77889880b069151298786fd48d889', # noqa - 'sha256': '31e066137a962676e89f69d1b65382de95a7ef7d914b8cb956f41ea72e0f516b', # noqa - 'sha1': 'f7bebf6f9c62a2295e889f66e05ce9bfaed9ace3', - 'sha1_git': 'cae6b33cc33faafd2d6bd86c6b4273f9338c69c2' - } - ] - }, - 'synthetic': True, - 'type': 'tar', - 'parents': [], - 'directory': hash_to_bytes(dir_id), - 'id': hash_to_bytes(rev_id), - } - - rev = next(deposit_loader.storage.revision_get([hash_to_bytes(rev_id)])) - assert rev is not None - assert expected_revision == rev - - expected_snapshot = { - 'id': '823109c16f9948c6f88cc5dec8e278da1487f06d', - 'branches': { - 'master': { - 'target': rev_id, - 'target_type': 'revision' - } - } - } - - check_snapshot(expected_snapshot, deposit_loader.storage) diff --git a/swh/deposit/tests/loader/test_tasks.py b/swh/deposit/tests/loader/test_tasks.py index c522d2fc..9c9516ba 100644 --- a/swh/deposit/tests/loader/test_tasks.py +++ b/swh/deposit/tests/loader/test_tasks.py @@ -1,39 +1,21 @@ -# Copyright (C) 2018 The Software Heritage developers +# Copyright (C) 2018-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from unittest.mock import patch -@patch('swh.deposit.loader.loader.DepositLoader.load') -def deposit_load(loader, swh_config, swh_app, celery_session_worker): - loader.return_value = {'status': 'eventful'} - - res = swh_app.send_task( - 'swh.deposit.loader.tasks.LoadDepositArchiveTsk', - args=('archive_url', 'deposit_meta_url', 'deposit_update_url')) - assert res - res.wait() - assert res.successful() - - assert res.result == {'status': 'eventful'} - loader.assert_called_once_with( - archive_url='archive_url', - deposit_meta_url='deposit_meta_url', - deposit_update_url='deposit_update_url') - - @patch('swh.deposit.loader.checker.DepositChecker.check') def deposit_check(checker, swh_config, swh_app, celery_session_worker): checker.return_value = {'status': 'uneventful'} res = swh_app.send_task( 'swh.deposit.loader.tasks.ChecksDepositTsk', args=['check_deposit_url']) assert res res.wait() assert res.successful() assert res.result == {'status': 'uneventful'} checker.assert_called_once_with('check_deposit_url') diff --git a/swh/deposit/tests/test_gunicorn_config.py b/swh/deposit/tests/test_gunicorn_config.py new file mode 100644 index 00000000..87b07119 --- /dev/null +++ b/swh/deposit/tests/test_gunicorn_config.py @@ -0,0 +1,47 @@ +# Copyright (C) 2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import os +from unittest.mock import patch + +import swh.deposit.gunicorn_config as gunicorn_config + + +def test_post_fork_default(): + with patch('sentry_sdk.init') as sentry_sdk_init: + gunicorn_config.post_fork(None, None) + + sentry_sdk_init.assert_not_called() + + +def test_post_fork_with_dsn_env(): + django_integration = object() # unique object to check for equality + with patch('swh.deposit.gunicorn_config.DjangoIntegration', + new=lambda: django_integration): + with patch('sentry_sdk.init') as sentry_sdk_init: + with patch.dict(os.environ, {'SWH_SENTRY_DSN': 'test_dsn'}): + gunicorn_config.post_fork(None, None) + + sentry_sdk_init.assert_called_once_with( + dsn='test_dsn', + integrations=[django_integration], + debug=False, + ) + + +def test_post_fork_debug(): + django_integration = object() # unique object to check for equality + with patch('swh.deposit.gunicorn_config.DjangoIntegration', + new=lambda: django_integration): + with patch('sentry_sdk.init') as sentry_sdk_init: + with patch.dict(os.environ, {'SWH_SENTRY_DSN': 'test_dsn', + 'SWH_SENTRY_DEBUG': '1'}): + gunicorn_config.post_fork(None, None) + + sentry_sdk_init.assert_called_once_with( + dsn='test_dsn', + integrations=[django_integration], + debug=True, + ) diff --git a/swh/deposit/urls.py b/swh/deposit/urls.py index 355029b5..f0d26d13 100644 --- a/swh/deposit/urls.py +++ b/swh/deposit/urls.py @@ -1,30 +1,30 @@ -# Copyright (C) 2017-2018 The Software Heritage developers +# Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -"""swhdeposit URL Configuration +"""SWH's main deposit URL Configuration """ from django.conf.urls import url, include from django.shortcuts import render from django.views.generic.base import RedirectView from rest_framework.urlpatterns import format_suffix_patterns favicon_view = RedirectView.as_view(url='/static/img/icons/swh-logo-32x32.png', permanent=True) def default_view(req): return render(req, "homepage.html") urlpatterns = [ url(r'^favicon\.ico$', favicon_view), url(r'^1/', include('swh.deposit.api.urls')), url(r'^1/private/', include('swh.deposit.api.private.urls')), url(r'^$', default_view, name='home'), ] urlpatterns = format_suffix_patterns(urlpatterns) diff --git a/version.txt b/version.txt index 4cada929..b0d69697 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -v0.0.78-0-ga51bf9b \ No newline at end of file +v0.0.79-0-g76fc968 \ No newline at end of file