diff --git a/PKG-INFO b/PKG-INFO index 7e64cb3a..72ddb44d 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,35 +1,35 @@ Metadata-Version: 2.1 Name: swh.deposit -Version: 0.0.73 +Version: 0.0.74 Summary: Software Heritage Deposit Server Home-page: https://forge.softwareheritage.org/source/swh-deposit/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN -Project-URL: Source, https://forge.softwareheritage.org/source/swh-deposit Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest +Project-URL: Source, https://forge.softwareheritage.org/source/swh-deposit Project-URL: Funding, https://www.softwareheritage.org/donate Description: # swh-deposit This is [Software Heritage](https://www.softwareheritage.org)'s [SWORD 2.0](http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html) Server implementation, as well as a simple client to upload deposits on the server. **S.W.O.R.D** (**S**imple **W**eb-Service **O**ffering **R**epository **D**eposit) is an interoperability standard for digital file deposit. This implementation will permit interaction between a client (a repository) and a server (SWH repository) to permit deposits of software source code archives and associated metadata. The documentation is at ./docs/README-specification.md Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Description-Content-Type: text/markdown Provides-Extra: testing Provides-Extra: server diff --git a/setup.py b/setup.py index a4921dc6..7d834de8 100755 --- a/setup.py +++ b/setup.py @@ -1,74 +1,76 @@ #!/usr/bin/env python3 # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from setuptools import setup, find_packages from os import path from io import open here = path.abspath(path.dirname(__file__)) # Get the long description from the README file with open(path.join(here, 'README.md'), encoding='utf-8') as f: long_description = f.read() def parse_requirements(*names): requirements = [] for name in names: if name: reqf = 'requirements-%s.txt' % name else: reqf = 'requirements.txt' if not path.exists(reqf): return requirements with open(reqf) as f: for line in f.readlines(): line = line.strip() if not line or line.startswith('#'): continue requirements.append(line) return requirements setup( name='swh.deposit', description='Software Heritage Deposit Server', long_description=long_description, long_description_content_type='text/markdown', author='Software Heritage developers', author_email='swh-devel@inria.fr', url='https://forge.softwareheritage.org/source/swh-deposit/', packages=find_packages(), install_requires=parse_requirements(None, 'swh'), tests_require=parse_requirements('test'), setup_requires=['vcversioner'], extras_require={ 'testing': parse_requirements('test', 'server', 'swh-server'), 'server': parse_requirements('server', 'swh-server')}, vcversioner={}, include_package_data=True, entry_points=''' [console_scripts] swh-deposit=swh.deposit.cli:main [swh.cli.subcommands] deposit=swh.deposit.cli:deposit + [swh.workers] + deposit.worker=swh.deposit.loader:register ''', classifiers=[ "Programming Language :: Python :: 3", "Intended Audience :: Developers", "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", "Operating System :: OS Independent", "Development Status :: 5 - Production/Stable", ], project_urls={ 'Bug Reports': 'https://forge.softwareheritage.org/maniphest', 'Funding': 'https://www.softwareheritage.org/donate', 'Source': 'https://forge.softwareheritage.org/source/swh-deposit', }, ) diff --git a/swh.deposit.egg-info/PKG-INFO b/swh.deposit.egg-info/PKG-INFO index 7e64cb3a..72ddb44d 100644 --- a/swh.deposit.egg-info/PKG-INFO +++ b/swh.deposit.egg-info/PKG-INFO @@ -1,35 +1,35 @@ Metadata-Version: 2.1 Name: swh.deposit -Version: 0.0.73 +Version: 0.0.74 Summary: Software Heritage Deposit Server Home-page: https://forge.softwareheritage.org/source/swh-deposit/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN -Project-URL: Source, https://forge.softwareheritage.org/source/swh-deposit Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest +Project-URL: Source, https://forge.softwareheritage.org/source/swh-deposit Project-URL: Funding, https://www.softwareheritage.org/donate Description: # swh-deposit This is [Software Heritage](https://www.softwareheritage.org)'s [SWORD 2.0](http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html) Server implementation, as well as a simple client to upload deposits on the server. **S.W.O.R.D** (**S**imple **W**eb-Service **O**ffering **R**epository **D**eposit) is an interoperability standard for digital file deposit. This implementation will permit interaction between a client (a repository) and a server (SWH repository) to permit deposits of software source code archives and associated metadata. The documentation is at ./docs/README-specification.md Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Description-Content-Type: text/markdown Provides-Extra: testing Provides-Extra: server diff --git a/swh.deposit.egg-info/SOURCES.txt b/swh.deposit.egg-info/SOURCES.txt index b9f1b3bd..912c9948 100644 --- a/swh.deposit.egg-info/SOURCES.txt +++ b/swh.deposit.egg-info/SOURCES.txt @@ -1,125 +1,126 @@ MANIFEST.in Makefile README.md requirements-server.txt requirements-swh-server.txt requirements-swh.txt requirements-test.txt requirements.txt setup.py version.txt swh/__init__.py swh.deposit.egg-info/PKG-INFO swh.deposit.egg-info/SOURCES.txt swh.deposit.egg-info/dependency_links.txt swh.deposit.egg-info/entry_points.txt swh.deposit.egg-info/requires.txt swh.deposit.egg-info/top_level.txt swh/deposit/__init__.py swh/deposit/apps.py swh/deposit/auth.py swh/deposit/config.py swh/deposit/errors.py swh/deposit/manage.py swh/deposit/models.py swh/deposit/parsers.py swh/deposit/signals.py swh/deposit/urls.py swh/deposit/utils.py swh/deposit/wsgi.py swh/deposit/api/__init__.py swh/deposit/api/common.py swh/deposit/api/converters.py swh/deposit/api/deposit.py swh/deposit/api/deposit_content.py swh/deposit/api/deposit_status.py swh/deposit/api/deposit_update.py swh/deposit/api/service_document.py swh/deposit/api/urls.py swh/deposit/api/private/__init__.py swh/deposit/api/private/deposit_check.py swh/deposit/api/private/deposit_list.py swh/deposit/api/private/deposit_read.py swh/deposit/api/private/deposit_update_status.py swh/deposit/api/private/urls.py swh/deposit/cli/__init__.py swh/deposit/cli/admin.py swh/deposit/cli/client.py swh/deposit/client/__init__.py swh/deposit/fixtures/__init__.py swh/deposit/fixtures/deposit_data.yaml swh/deposit/loader/__init__.py swh/deposit/loader/checker.py swh/deposit/loader/loader.py swh/deposit/loader/tasks.py swh/deposit/migrations/0001_initial.py swh/deposit/migrations/0002_depositrequest_archive.py swh/deposit/migrations/0003_temporaryarchive.py swh/deposit/migrations/0004_delete_temporaryarchive.py swh/deposit/migrations/0005_auto_20171019_1436.py swh/deposit/migrations/0006_depositclient_url.py swh/deposit/migrations/0007_auto_20171129_1609.py swh/deposit/migrations/0008_auto_20171130_1513.py swh/deposit/migrations/0009_deposit_parent.py swh/deposit/migrations/0010_auto_20180110_0953.py swh/deposit/migrations/0011_auto_20180115_1510.py swh/deposit/migrations/0012_deposit_status_detail.py swh/deposit/migrations/0013_depositrequest_raw_metadata.py swh/deposit/migrations/0014_auto_20180720_1221.py swh/deposit/migrations/0015_depositrequest_typemigration.py swh/deposit/migrations/0016_auto_20190507_1408.py +swh/deposit/migrations/0017_auto_20190925_0906.py swh/deposit/migrations/__init__.py swh/deposit/settings/__init__.py swh/deposit/settings/common.py swh/deposit/settings/development.py swh/deposit/settings/production.py swh/deposit/settings/testing.py swh/deposit/static/robots.txt swh/deposit/static/css/bootstrap-responsive.min.css swh/deposit/static/css/style.css swh/deposit/static/img/arrow-up-small.png swh/deposit/static/img/swh-logo-deposit.png swh/deposit/static/img/swh-logo-deposit.svg swh/deposit/static/img/icons/swh-logo-32x32.png swh/deposit/static/img/icons/swh-logo-deposit-180x180.png swh/deposit/static/img/icons/swh-logo-deposit-192x192.png swh/deposit/static/img/icons/swh-logo-deposit-270x270.png swh/deposit/templates/__init__.py swh/deposit/templates/homepage.html swh/deposit/templates/layout.html swh/deposit/templates/deposit/__init__.py swh/deposit/templates/deposit/content.xml swh/deposit/templates/deposit/deposit_receipt.xml swh/deposit/templates/deposit/error.xml swh/deposit/templates/deposit/service_document.xml swh/deposit/templates/deposit/status.xml swh/deposit/templates/rest_framework/api.html swh/deposit/tests/__init__.py swh/deposit/tests/common.py swh/deposit/tests/test_utils.py swh/deposit/tests/api/__init__.py swh/deposit/tests/api/test_common.py swh/deposit/tests/api/test_converters.py swh/deposit/tests/api/test_deposit.py swh/deposit/tests/api/test_deposit_atom.py swh/deposit/tests/api/test_deposit_binary.py -swh/deposit/tests/api/test_deposit_check.py swh/deposit/tests/api/test_deposit_delete.py swh/deposit/tests/api/test_deposit_list.py swh/deposit/tests/api/test_deposit_multipart.py -swh/deposit/tests/api/test_deposit_read_archive.py -swh/deposit/tests/api/test_deposit_read_metadata.py +swh/deposit/tests/api/test_deposit_private_check.py +swh/deposit/tests/api/test_deposit_private_read_archive.py +swh/deposit/tests/api/test_deposit_private_read_metadata.py +swh/deposit/tests/api/test_deposit_private_update_status.py swh/deposit/tests/api/test_deposit_status.py swh/deposit/tests/api/test_deposit_update.py -swh/deposit/tests/api/test_deposit_update_status.py swh/deposit/tests/api/test_parser.py swh/deposit/tests/api/test_service_document.py swh/deposit/tests/cli/__init__.py swh/deposit/tests/cli/test_client.py swh/deposit/tests/loader/__init__.py swh/deposit/tests/loader/common.py swh/deposit/tests/loader/conftest.py swh/deposit/tests/loader/test_checker.py swh/deposit/tests/loader/test_client.py swh/deposit/tests/loader/test_loader.py swh/deposit/tests/loader/test_tasks.py \ No newline at end of file diff --git a/swh.deposit.egg-info/entry_points.txt b/swh.deposit.egg-info/entry_points.txt index 4304c868..dbdecaea 100644 --- a/swh.deposit.egg-info/entry_points.txt +++ b/swh.deposit.egg-info/entry_points.txt @@ -1,6 +1,8 @@ [console_scripts] swh-deposit=swh.deposit.cli:main [swh.cli.subcommands] deposit=swh.deposit.cli:deposit + [swh.workers] + deposit.worker=swh.deposit.loader:register \ No newline at end of file diff --git a/swh/deposit/api/common.py b/swh/deposit/api/common.py index cb972894..c5cc631c 100644 --- a/swh/deposit/api/common.py +++ b/swh/deposit/api/common.py @@ -1,893 +1,884 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import hashlib from abc import ABCMeta, abstractmethod from django.urls import reverse from django.http import HttpResponse from django.shortcuts import render from django.utils import timezone from rest_framework import status from rest_framework.authentication import BasicAuthentication -from rest_framework.permissions import IsAuthenticated, AllowAny +from rest_framework.permissions import IsAuthenticated from rest_framework.views import APIView from swh.model import hashutil from ..config import ( SWHDefaultConfig, EDIT_SE_IRI, EM_IRI, CONT_FILE_IRI, ARCHIVE_KEY, METADATA_KEY, RAW_METADATA_KEY, STATE_IRI, DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_PARTIAL, DEPOSIT_STATUS_LOAD_SUCCESS, ARCHIVE_TYPE, METADATA_TYPE ) from ..errors import ( MAX_UPLOAD_SIZE_EXCEEDED, BAD_REQUEST, ERROR_CONTENT, CHECKSUM_MISMATCH, make_error_dict, MEDIATION_NOT_ALLOWED, make_error_response_from_dict, FORBIDDEN, NOT_FOUND, make_error_response, METHOD_NOT_ALLOWED, ParserError, PARSING_ERROR ) from ..models import ( Deposit, DepositRequest, DepositCollection, DepositClient ) from ..parsers import parse_xml ACCEPT_PACKAGINGS = ['http://purl.org/net/sword/package/SimpleZip'] ACCEPT_ARCHIVE_CONTENT_TYPES = ['application/zip', 'application/x-tar'] class SWHAPIView(APIView): """Mixin intended as a based API view to enforce the basic authentication check """ authentication_classes = (BasicAuthentication, ) permission_classes = (IsAuthenticated, ) -class SWHPrivateAPIView(SWHAPIView): - """Mixin intended as private api (so no authentication) based API view - (for the private ones). - - """ - authentication_classes = () - permission_classes = (AllowAny, ) - - class SWHBaseDeposit(SWHDefaultConfig, SWHAPIView, metaclass=ABCMeta): """Base deposit request class sharing multiple common behaviors. """ def _read_headers(self, req): """Read and unify the necessary headers from the request (those are not stored in the same location or not properly formatted). Args: req (Request): Input request Returns: Dictionary with the following keys (some associated values may be None): - content-type - content-length - in-progress - content-disposition - packaging - slug - on-behalf-of """ meta = req._request.META content_type = req.content_type content_length = meta.get('CONTENT_LENGTH') if content_length and isinstance(content_length, str): content_length = int(content_length) # final deposit if not provided in_progress = meta.get('HTTP_IN_PROGRESS', False) content_disposition = meta.get('HTTP_CONTENT_DISPOSITION') if isinstance(in_progress, str): in_progress = in_progress.lower() == 'true' content_md5sum = meta.get('HTTP_CONTENT_MD5') if content_md5sum: content_md5sum = bytes.fromhex(content_md5sum) packaging = meta.get('HTTP_PACKAGING') slug = meta.get('HTTP_SLUG') on_behalf_of = meta.get('HTTP_ON_BEHALF_OF') metadata_relevant = meta.get('HTTP_METADATA_RELEVANT') return { 'content-type': content_type, 'content-length': content_length, 'in-progress': in_progress, 'content-disposition': content_disposition, 'content-md5sum': content_md5sum, 'packaging': packaging, 'slug': slug, 'on-behalf-of': on_behalf_of, 'metadata-relevant': metadata_relevant, } def _compute_md5(self, filehandler): """Compute uploaded file's md5 sum. Args: filehandler (InMemoryUploadedFile): the file to compute the md5 hash Returns: the md5 checksum (str) """ h = hashlib.md5() for chunk in filehandler: h.update(chunk) return h.digest() def _deposit_put(self, deposit_id=None, in_progress=False, external_id=None): """Save/Update a deposit in db. Args: deposit_id (int): deposit identifier in_progress (dict): The deposit's status external_id (str): The external identifier to associate to the deposit Returns: The Deposit instance saved or updated. """ if in_progress is False: complete_date = timezone.now() status_type = DEPOSIT_STATUS_DEPOSITED else: complete_date = None status_type = DEPOSIT_STATUS_PARTIAL if not deposit_id: try: # find a deposit parent (same external id, status load # to success) deposit_parent = Deposit.objects.filter( external_id=external_id, status=DEPOSIT_STATUS_LOAD_SUCCESS).order_by('-id')[0:1].get() # noqa except Deposit.DoesNotExist: deposit_parent = None deposit = Deposit(collection=self._collection, external_id=external_id, complete_date=complete_date, status=status_type, client=self._client, parent=deposit_parent) else: deposit = Deposit.objects.get(pk=deposit_id) # update metadata deposit.complete_date = complete_date deposit.status = status_type deposit.save() return deposit def _deposit_request_put(self, deposit, deposit_request_data, replace_metadata=False, replace_archives=False): """Save a deposit request with metadata attached to a deposit. Args: deposit (Deposit): The deposit concerned by the request deposit_request_data (dict): The dictionary with at most 2 deposit request types (archive, metadata) to associate to the deposit replace_metadata (bool): Flag defining if we add or update existing metadata to the deposit replace_archives (bool): Flag defining if we add or update archives to existing deposit Returns: None """ if replace_metadata: DepositRequest.objects.filter( deposit=deposit, type=METADATA_TYPE).delete() if replace_archives: DepositRequest.objects.filter( deposit=deposit, type=ARCHIVE_TYPE).delete() deposit_request = None archive_file = deposit_request_data.get(ARCHIVE_KEY) if archive_file: deposit_request = DepositRequest( type=ARCHIVE_TYPE, deposit=deposit, archive=archive_file) deposit_request.save() metadata = deposit_request_data.get(METADATA_KEY) if metadata: raw_metadata = deposit_request_data.get(RAW_METADATA_KEY) deposit_request = DepositRequest( type=METADATA_TYPE, deposit=deposit, metadata=metadata, raw_metadata=raw_metadata) deposit_request.save() assert deposit_request is not None def _delete_archives(self, collection_name, deposit_id): """Delete archives reference from the deposit id. """ try: deposit = Deposit.objects.get(pk=deposit_id) except Deposit.DoesNotExist: return make_error_dict( NOT_FOUND, 'The deposit %s does not exist' % deposit_id) DepositRequest.objects.filter( deposit=deposit, type=ARCHIVE_TYPE).delete() return {} def _delete_deposit(self, collection_name, deposit_id): """Delete deposit reference. Args: collection_name (str): Client's name deposit_id (id): The deposit to delete Returns Empty dict when ok. Dict with error key to describe the failure. """ try: deposit = Deposit.objects.get(pk=deposit_id) except Deposit.DoesNotExist: return make_error_dict( NOT_FOUND, 'The deposit %s does not exist' % deposit_id) if deposit.collection.name != collection_name: summary = 'Cannot delete a deposit from another collection' description = "Deposit %s does not belong to the collection %s" % ( deposit_id, collection_name) return make_error_dict( BAD_REQUEST, summary=summary, verbose_description=description) DepositRequest.objects.filter(deposit=deposit).delete() deposit.delete() return {} def _check_preconditions_on(self, filehandler, md5sum, content_length=None): """Check preconditions on provided file are respected. That is the length and/or the md5sum hash match the file's content. Args: filehandler (InMemoryUploadedFile): The file to check md5sum (hex str): md5 hash expected from the file's content content_length (int): the expected length if provided. Returns: Either none if no error or a dictionary with a key error detailing the problem. """ if content_length: if content_length > self.config['max_upload_size']: return make_error_dict( MAX_UPLOAD_SIZE_EXCEEDED, 'Upload size limit exceeded (max %s bytes).' % self.config['max_upload_size'], 'Please consider sending the archive in ' 'multiple steps.') length = filehandler.size if length != content_length: return make_error_dict(status.HTTP_412_PRECONDITION_FAILED, 'Wrong length') if md5sum: _md5sum = self._compute_md5(filehandler) if _md5sum != md5sum: return make_error_dict( CHECKSUM_MISMATCH, 'Wrong md5 hash', 'The checksum sent %s and the actual checksum ' '%s does not match.' % (hashutil.hash_to_hex(md5sum), hashutil.hash_to_hex(_md5sum))) return None def _binary_upload(self, req, headers, collection_name, deposit_id=None, replace_metadata=False, replace_archives=False): """Binary upload routine. Other than such a request, a 415 response is returned. Args: req (Request): the request holding information to parse and inject in db headers (dict): request headers formatted collection_name (str): the associated client deposit_id (id): deposit identifier if provided replace_metadata (bool): 'Update or add' request to existing deposit. If False (default), this adds new metadata request to existing ones. Otherwise, this will replace existing metadata. replace_archives (bool): 'Update or add' request to existing deposit. If False (default), this adds new archive request to existing ones. Otherwise, this will replace existing archives. ones. Returns: In the optimal case a dict with the following keys: - deposit_id (int): Deposit identifier - deposit_date (date): Deposit date - archive: None (no archive is provided here) Otherwise, a dictionary with the key error and the associated failures, either: - 400 (bad request) if the request is not providing an external identifier - 413 (request entity too large) if the length of the archive exceeds the max size configured - 412 (precondition failed) if the length or md5 hash provided mismatch the reality of the archive - 415 (unsupported media type) if a wrong media type is provided """ content_length = headers['content-length'] if not content_length: return make_error_dict( BAD_REQUEST, 'CONTENT_LENGTH header is mandatory', 'For archive deposit, the ' 'CONTENT_LENGTH header must be sent.') content_disposition = headers['content-disposition'] if not content_disposition: return make_error_dict( BAD_REQUEST, 'CONTENT_DISPOSITION header is mandatory', 'For archive deposit, the ' 'CONTENT_DISPOSITION header must be sent.') packaging = headers['packaging'] if packaging and packaging not in ACCEPT_PACKAGINGS: return make_error_dict( BAD_REQUEST, 'Only packaging %s is supported' % ACCEPT_PACKAGINGS, 'The packaging provided %s is not supported' % packaging) filehandler = req.FILES['file'] precondition_status_response = self._check_preconditions_on( filehandler, headers['content-md5sum'], content_length) if precondition_status_response: return precondition_status_response external_id = headers['slug'] # actual storage of data archive_metadata = filehandler deposit = self._deposit_put(deposit_id=deposit_id, in_progress=headers['in-progress'], external_id=external_id) self._deposit_request_put( deposit, {ARCHIVE_KEY: archive_metadata}, replace_metadata=replace_metadata, replace_archives=replace_archives) return { 'deposit_id': deposit.id, 'deposit_date': deposit.reception_date, 'status': deposit.status, 'archive': filehandler.name, } def _read_metadata(self, metadata_stream): """Given a metadata stream, reads the metadata and returns both the parsed and the raw metadata. """ raw_metadata = metadata_stream.read() metadata = parse_xml(raw_metadata) return raw_metadata, metadata def _multipart_upload(self, req, headers, collection_name, deposit_id=None, replace_metadata=False, replace_archives=False): """Multipart upload supported with exactly: - 1 archive (zip) - 1 atom entry Other than such a request, a 415 response is returned. Args: req (Request): the request holding information to parse and inject in db headers (dict): request headers formatted collection_name (str): the associated client deposit_id (id): deposit identifier if provided replace_metadata (bool): 'Update or add' request to existing deposit. If False (default), this adds new metadata request to existing ones. Otherwise, this will replace existing metadata. replace_archives (bool): 'Update or add' request to existing deposit. If False (default), this adds new archive request to existing ones. Otherwise, this will replace existing archives. ones. Returns: In the optimal case a dict with the following keys: - deposit_id (int): Deposit identifier - deposit_date (date): Deposit date - archive: None (no archive is provided here) Otherwise, a dictionary with the key error and the associated failures, either: - 400 (bad request) if the request is not providing an external identifier - 412 (precondition failed) if the potentially md5 hash provided mismatch the reality of the archive - 413 (request entity too large) if the length of the archive exceeds the max size configured - 415 (unsupported media type) if a wrong media type is provided """ external_id = headers['slug'] content_types_present = set() data = { 'application/zip': None, # expected either zip 'application/x-tar': None, # or x-tar 'application/atom+xml': None, } for key, value in req.FILES.items(): fh = value if fh.content_type in content_types_present: return make_error_dict( ERROR_CONTENT, 'Only 1 application/zip (or application/x-tar) archive ' 'and 1 atom+xml entry is supported (as per sword2.0 ' 'specification)', 'You provided more than 1 application/(zip|x-tar) ' 'or more than 1 application/atom+xml content-disposition ' 'header in the multipart deposit') content_types_present.add(fh.content_type) data[fh.content_type] = fh if len(content_types_present) != 2: return make_error_dict( ERROR_CONTENT, 'You must provide both 1 application/zip (or ' 'application/x-tar) and 1 atom+xml entry for multipart ' 'deposit', 'You need to provide only 1 application/(zip|x-tar) ' 'and 1 application/atom+xml content-disposition header ' 'in the multipart deposit') filehandler = data['application/zip'] if not filehandler: filehandler = data['application/x-tar'] precondition_status_response = self._check_preconditions_on( filehandler, headers['content-md5sum']) if precondition_status_response: return precondition_status_response try: raw_metadata, metadata = self._read_metadata( data['application/atom+xml']) except ParserError: return make_error_dict( PARSING_ERROR, 'Malformed xml metadata', "The xml received is malformed. " "Please ensure your metadata file is correctly formatted.") # actual storage of data deposit = self._deposit_put(deposit_id=deposit_id, in_progress=headers['in-progress'], external_id=external_id) deposit_request_data = { ARCHIVE_KEY: filehandler, METADATA_KEY: metadata, RAW_METADATA_KEY: raw_metadata, } self._deposit_request_put( deposit, deposit_request_data, replace_metadata, replace_archives) return { 'deposit_id': deposit.id, 'deposit_date': deposit.reception_date, 'archive': filehandler.name, 'status': deposit.status, } def _atom_entry(self, req, headers, collection_name, deposit_id=None, replace_metadata=False, replace_archives=False): """Atom entry deposit. Args: req (Request): the request holding information to parse and inject in db headers (dict): request headers formatted collection_name (str): the associated client deposit_id (id): deposit identifier if provided replace_metadata (bool): 'Update or add' request to existing deposit. If False (default), this adds new metadata request to existing ones. Otherwise, this will replace existing metadata. replace_archives (bool): 'Update or add' request to existing deposit. If False (default), this adds new archive request to existing ones. Otherwise, this will replace existing archives. ones. Returns: In the optimal case a dict with the following keys: - deposit_id: deposit id associated to the deposit - deposit_date: date of the deposit - archive: None (no archive is provided here) Otherwise, a dictionary with the key error and the associated failures, either: - 400 (bad request) if the request is not providing an external identifier - 400 (bad request) if the request's body is empty - 415 (unsupported media type) if a wrong media type is provided """ try: raw_metadata, metadata = self._read_metadata(req.data) except ParserError: return make_error_dict( BAD_REQUEST, 'Malformed xml metadata', "The xml received is malformed. " "Please ensure your metadata file is correctly formatted.") if not metadata: return make_error_dict( BAD_REQUEST, 'Empty body request is not supported', 'Atom entry deposit is supposed to send for metadata. ' 'If the body is empty, there is no metadata.') external_id = metadata.get('external_identifier', headers['slug']) deposit = self._deposit_put(deposit_id=deposit_id, in_progress=headers['in-progress'], external_id=external_id) self._deposit_request_put( deposit, {METADATA_KEY: metadata, RAW_METADATA_KEY: raw_metadata}, replace_metadata, replace_archives) return { 'deposit_id': deposit.id, 'deposit_date': deposit.reception_date, 'archive': None, 'status': deposit.status, } def _empty_post(self, req, headers, collection_name, deposit_id): """Empty post to finalize an empty deposit. Args: req (Request): the request holding information to parse and inject in db headers (dict): request headers formatted collection_name (str): the associated client deposit_id (id): deposit identifier Returns: Dictionary of result with the deposit's id, the date it was completed and no archive. """ deposit = Deposit.objects.get(pk=deposit_id) deposit.complete_date = timezone.now() deposit.status = DEPOSIT_STATUS_DEPOSITED deposit.save() return { 'deposit_id': deposit_id, 'deposit_date': deposit.complete_date, 'status': deposit.status, 'archive': None, } def _make_iris(self, req, collection_name, deposit_id): """Define the IRI endpoints Args: req (Request): The initial request collection_name (str): client/collection's name deposit_id (id): Deposit identifier Returns: Dictionary of keys with the iris' urls. """ args = [collection_name, deposit_id] return { iri: req.build_absolute_uri(reverse(iri, args=args)) for iri in [EM_IRI, EDIT_SE_IRI, CONT_FILE_IRI, STATE_IRI] } def additional_checks(self, req, headers, collection_name, deposit_id=None): """Permit the child class to enrich additional checks. Returns: dict with 'error' detailing the problem. """ return {} def checks(self, req, collection_name, deposit_id=None): try: self._collection = DepositCollection.objects.get( name=collection_name) except DepositCollection.DoesNotExist: return make_error_dict( NOT_FOUND, 'Unknown collection name %s' % collection_name) username = req.user.username if username: # unauthenticated request can have the username empty try: self._client = DepositClient.objects.get(username=username) except DepositClient.DoesNotExist: return make_error_dict(NOT_FOUND, 'Unknown client name %s' % username) if self._collection.id not in self._client.collections: return make_error_dict( FORBIDDEN, 'Client %s cannot access collection %s' % ( username, collection_name)) if deposit_id: try: deposit = Deposit.objects.get(pk=deposit_id) except Deposit.DoesNotExist: return make_error_dict( NOT_FOUND, 'Deposit with id %s does not exist' % deposit_id) checks = self.restrict_access(req, deposit) if checks: return checks headers = self._read_headers(req) if headers['on-behalf-of']: return make_error_dict(MEDIATION_NOT_ALLOWED, 'Mediation is not supported.') checks = self.additional_checks(req, headers, collection_name, deposit_id) if 'error' in checks: return checks return {'headers': headers} def restrict_access(self, req, deposit=None): if deposit: if (req.method != 'GET' and deposit.status != DEPOSIT_STATUS_PARTIAL): summary = "You can only act on deposit with status '%s'" % ( DEPOSIT_STATUS_PARTIAL, ) description = "This deposit has status '%s'" % deposit.status return make_error_dict( BAD_REQUEST, summary=summary, verbose_description=description) def _basic_not_allowed_method(self, req, method): return make_error_response( req, METHOD_NOT_ALLOWED, '%s method is not supported on this endpoint' % method) def get(self, req, *args, **kwargs): return self._basic_not_allowed_method(req, 'GET') def post(self, req, *args, **kwargs): return self._basic_not_allowed_method(req, 'POST') def put(self, req, *args, **kwargs): return self._basic_not_allowed_method(req, 'PUT') def delete(self, req, *args, **kwargs): return self._basic_not_allowed_method(req, 'DELETE') class SWHGetDepositAPI(SWHBaseDeposit, metaclass=ABCMeta): """Mixin for class to support GET method. """ def get(self, req, collection_name, deposit_id, format=None): """Endpoint to create/add resources to deposit. Returns: 200 response when no error during routine occurred 400 if the deposit does not belong to the collection 404 if the deposit or the collection does not exist """ checks = self.checks(req, collection_name, deposit_id) if 'error' in checks: return make_error_response_from_dict(req, checks['error']) r = self.process_get( req, collection_name, deposit_id) if isinstance(r, tuple): status, content, content_type = r return HttpResponse(content, status=status, content_type=content_type) return r @abstractmethod def process_get(self, req, collection_name, deposit_id): """Routine to deal with the deposit's get processing. Returns: Tuple status, stream of content, content-type """ pass class SWHPostDepositAPI(SWHBaseDeposit, metaclass=ABCMeta): """Mixin for class to support DELETE method. """ def post(self, req, collection_name, deposit_id=None, format=None): """Endpoint to create/add resources to deposit. Returns: 204 response when no error during routine occurred. 400 if the deposit does not belong to the collection 404 if the deposit or the collection does not exist """ checks = self.checks(req, collection_name, deposit_id) if 'error' in checks: return make_error_response_from_dict(req, checks['error']) headers = checks['headers'] _status, _iri_key, data = self.process_post( req, headers, collection_name, deposit_id) error = data.get('error') if error: return make_error_response_from_dict(req, error) data['packagings'] = ACCEPT_PACKAGINGS iris = self._make_iris(req, collection_name, data['deposit_id']) data.update(iris) response = render(req, 'deposit/deposit_receipt.xml', context=data, content_type='application/xml', status=_status) response._headers['location'] = 'Location', data[_iri_key] return response @abstractmethod def process_post(self, req, headers, collection_name, deposit_id=None): """Routine to deal with the deposit's processing. Returns Tuple of: - response status code (200, 201, etc...) - key iri (EM_IRI, EDIT_SE_IRI, etc...) - dictionary of the processing result """ pass class SWHPutDepositAPI(SWHBaseDeposit, metaclass=ABCMeta): """Mixin for class to support PUT method. """ def put(self, req, collection_name, deposit_id, format=None): """Endpoint to update deposit resources. Returns: 204 response when no error during routine occurred. 400 if the deposit does not belong to the collection 404 if the deposit or the collection does not exist """ checks = self.checks(req, collection_name, deposit_id) if 'error' in checks: return make_error_response_from_dict(req, checks['error']) headers = checks['headers'] data = self.process_put(req, headers, collection_name, deposit_id) error = data.get('error') if error: return make_error_response_from_dict(req, error) return HttpResponse(status=status.HTTP_204_NO_CONTENT) @abstractmethod def process_put(self, req, headers, collection_name, deposit_id): """Routine to deal with updating a deposit in some way. Returns dictionary of the processing result """ pass class SWHDeleteDepositAPI(SWHBaseDeposit, metaclass=ABCMeta): """Mixin for class to support DELETE method. """ def delete(self, req, collection_name, deposit_id): """Endpoint to delete some deposit's resources (archives, deposit). Returns: 204 response when no error during routine occurred. 400 if the deposit does not belong to the collection 404 if the deposit or the collection does not exist """ checks = self.checks(req, collection_name, deposit_id) if 'error' in checks: return make_error_response_from_dict(req, checks['error']) data = self.process_delete(req, collection_name, deposit_id) error = data.get('error') if error: return make_error_response_from_dict(req, error) return HttpResponse(status=status.HTTP_204_NO_CONTENT) @abstractmethod def process_delete(self, req, collection_name, deposit_id): """Routine to delete a resource. This is mostly not allowed except for the EM_IRI (cf. .api.deposit_update.SWHUpdateArchiveDeposit) """ pass diff --git a/swh/deposit/api/private/__init__.py b/swh/deposit/api/private/__init__.py index 986a5351..d257572c 100644 --- a/swh/deposit/api/private/__init__.py +++ b/swh/deposit/api/private/__init__.py @@ -1,51 +1,92 @@ -# Copyright (C) 2017-2018 The Software Heritage developers +# Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.deposit import utils from ...config import METADATA_TYPE from ...models import DepositRequest, Deposit +from rest_framework.permissions import AllowAny + +from swh.deposit.api.common import SWHAPIView +from swh.deposit.errors import make_error_dict, NOT_FOUND + class DepositReadMixin: """Deposit Read mixin """ def _deposit_requests(self, deposit, request_type): """Given a deposit, yields its associated deposit_request Args: deposit (Deposit): Deposit to list requests for request_type (str): 'archive' or 'metadata' Yields: deposit requests of type request_type associated to the deposit """ if isinstance(deposit, int): deposit = Deposit.objects.get(pk=deposit) deposit_requests = DepositRequest.objects.filter( type=request_type, deposit=deposit).order_by('id') for deposit_request in deposit_requests: yield deposit_request def _metadata_get(self, deposit): """Given a deposit, aggregate all metadata requests. Args: deposit (Deposit): The deposit instance to extract metadata from. Returns: metadata dict from the deposit. """ metadata = (m.metadata for m in self._deposit_requests( deposit, request_type=METADATA_TYPE)) return utils.merge(*metadata) + + +class SWHPrivateAPIView(SWHAPIView): + """Mixin intended as private api (so no authentication) based API view + (for the private ones). + + """ + authentication_classes = () + permission_classes = (AllowAny, ) + + def checks(self, req, collection_name, deposit_id=None): + """Override default checks implementation to allow empty collection. + + """ + if deposit_id: + try: + Deposit.objects.get(pk=deposit_id) + except Deposit.DoesNotExist: + return make_error_dict( + NOT_FOUND, + 'Deposit with id %s does not exist' % + deposit_id) + + headers = self._read_headers(req) + checks = self.additional_checks( + req, headers, collection_name, deposit_id) + if 'error' in checks: + return checks + + return {'headers': headers} + + def get(self, req, collection_name=None, deposit_id=None, format=None): + return super().get(req, collection_name, deposit_id, format) + + def put(self, req, collection_name=None, deposit_id=None, format=None): + return super().put(req, collection_name, deposit_id, format) diff --git a/swh/deposit/api/private/deposit_check.py b/swh/deposit/api/private/deposit_check.py index 4b170aad..8961d914 100644 --- a/swh/deposit/api/private/deposit_check.py +++ b/swh/deposit/api/private/deposit_check.py @@ -1,209 +1,209 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json import re import tarfile import zipfile from rest_framework import status -from . import DepositReadMixin -from ..common import SWHGetDepositAPI, SWHPrivateAPIView +from . import DepositReadMixin, SWHPrivateAPIView +from ..common import SWHGetDepositAPI from ...config import DEPOSIT_STATUS_VERIFIED, DEPOSIT_STATUS_REJECTED from ...config import ARCHIVE_TYPE from ...models import Deposit MANDATORY_FIELDS_MISSING = 'Mandatory fields are missing' ALTERNATE_FIELDS_MISSING = 'Mandatory alternate fields are missing' MANDATORY_ARCHIVE_UNREADABLE = 'At least one of its associated archives is not readable' # noqa MANDATORY_ARCHIVE_INVALID = 'Mandatory archive is invalid (i.e contains only one archive)' # noqa MANDATORY_ARCHIVE_UNSUPPORTED = 'Mandatory archive type is not supported' MANDATORY_ARCHIVE_MISSING = 'Deposit without archive is rejected' ARCHIVE_EXTENSIONS = [ 'zip', 'tar', 'tar.gz', 'xz', 'tar.xz', 'bz2', 'tar.bz2', 'Z', 'tar.Z', 'tgz', '7z' ] PATTERN_ARCHIVE_EXTENSION = re.compile( r'.*\.(%s)$' % '|'.join(ARCHIVE_EXTENSIONS)) -class SWHChecksDeposit(SWHGetDepositAPI, SWHPrivateAPIView, DepositReadMixin): +class SWHChecksDeposit(SWHPrivateAPIView, SWHGetDepositAPI, DepositReadMixin): """Dedicated class to read a deposit's raw archives content. Only GET is supported. """ def _check_deposit_archives(self, deposit): """Given a deposit, check each deposit request of type archive. Args: The deposit to check archives for Returns tuple (status, error_detail): True, None if all archives are ok, (False, ) otherwise. """ requests = list(self._deposit_requests( deposit, request_type=ARCHIVE_TYPE)) if len(requests) == 0: # no associated archive is refused return False, { 'archive': [{ 'summary': MANDATORY_ARCHIVE_MISSING, }] } errors = [] for archive_request in requests: check, error_message = self._check_archive(archive_request) if not check: errors.append({ 'summary': error_message, 'fields': [archive_request.id] }) if not errors: return True, None return False, { 'archive': errors } def _check_archive(self, archive_request): """Check that a deposit associated archive is ok: - readable - supported archive format - valid content: the archive does not contain a single archive file If any of those checks are not ok, return the corresponding failing check. Args: archive_path (DepositRequest): Archive to check Returns: (True, None) if archive is check compliant, (False, ) otherwise. """ archive_path = archive_request.archive.path try: if zipfile.is_zipfile(archive_path): with zipfile.ZipFile(archive_path) as f: files = f.namelist() elif tarfile.is_tarfile(archive_path): with tarfile.open(archive_path) as f: files = f.getnames() else: return False, MANDATORY_ARCHIVE_UNSUPPORTED except Exception: return False, MANDATORY_ARCHIVE_UNREADABLE if len(files) > 1: return True, None element = files[0] if PATTERN_ARCHIVE_EXTENSION.match(element): # archive in archive! return False, MANDATORY_ARCHIVE_INVALID return True, None def _check_metadata(self, metadata): """Check to execute on all metadata for mandatory field presence. Args: metadata (dict): Metadata dictionary to check for mandatory fields Returns: tuple (status, error_detail): True, None if metadata are ok (False, ) otherwise. """ required_fields = { 'author': False, } alternate_fields = { ('name', 'title'): False, # alternate field, at least one # of them must be present } for field, value in metadata.items(): for name in required_fields: if name in field: required_fields[name] = True for possible_names in alternate_fields: for possible_name in possible_names: if possible_name in field: alternate_fields[possible_names] = True continue mandatory_result = [k for k, v in required_fields.items() if not v] optional_result = [ ' or '.join(k) for k, v in alternate_fields.items() if not v] if mandatory_result == [] and optional_result == []: return True, None detail = [] if mandatory_result != []: detail.append({ 'summary': MANDATORY_FIELDS_MISSING, 'fields': mandatory_result }) if optional_result != []: detail.append({ 'summary': ALTERNATE_FIELDS_MISSING, 'fields': optional_result, }) return False, { 'metadata': detail } def process_get(self, req, collection_name, deposit_id): """Build a unique tarball from the multiple received and stream that content to the client. Args: req (Request): collection_name (str): Collection owning the deposit deposit_id (id): Deposit concerned by the reading Returns: Tuple status, stream of content, content-type """ deposit = Deposit.objects.get(pk=deposit_id) metadata = self._metadata_get(deposit) problems = {} # will check each deposit's associated request (both of type # archive and metadata) for errors archives_status, error_detail = self._check_deposit_archives(deposit) if not archives_status: problems.update(error_detail) metadata_status, error_detail = self._check_metadata(metadata) if not metadata_status: problems.update(error_detail) deposit_status = archives_status and metadata_status # if any problems arose, the deposit is rejected if not deposit_status: deposit.status = DEPOSIT_STATUS_REJECTED deposit.status_detail = problems response = { 'status': deposit.status, 'details': deposit.status_detail, } else: deposit.status = DEPOSIT_STATUS_VERIFIED response = { 'status': deposit.status, } deposit.save() return status.HTTP_200_OK, json.dumps(response), 'application/json' diff --git a/swh/deposit/api/private/deposit_list.py b/swh/deposit/api/private/deposit_list.py index a03d5a1a..f3e3b1ad 100644 --- a/swh/deposit/api/private/deposit_list.py +++ b/swh/deposit/api/private/deposit_list.py @@ -1,48 +1,48 @@ -# Copyright (C) 2018 The Software Heritage developers +# Copyright (C) 2018-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from rest_framework.fields import _UnvalidatedField from rest_framework.generics import ListAPIView from rest_framework.pagination import PageNumberPagination from rest_framework import serializers -from ..common import SWHPrivateAPIView +from . import SWHPrivateAPIView from ..converters import convert_status_detail from ...models import Deposit class DefaultPagination(PageNumberPagination): page_size = 100 page_size_query_param = 'page_size' class StatusDetailField(_UnvalidatedField): """status_detail field is a dict, we want a simple message instead. So, we reuse the convert_status_detail from deposit_status endpoint to that effect. """ def to_representation(self, value): return convert_status_detail(value) class DepositSerializer(serializers.ModelSerializer): status_detail = StatusDetailField() class Meta: model = Deposit fields = '__all__' class DepositList(ListAPIView, SWHPrivateAPIView): """Deposit request class to list the deposit's status per page. HTTP verbs supported: GET """ queryset = Deposit.objects.all().order_by('id') serializer_class = DepositSerializer pagination_class = DefaultPagination diff --git a/swh/deposit/api/private/deposit_read.py b/swh/deposit/api/private/deposit_read.py index 1df08f36..8b834b04 100644 --- a/swh/deposit/api/private/deposit_read.py +++ b/swh/deposit/api/private/deposit_read.py @@ -1,234 +1,234 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json import os import shutil import tempfile from contextlib import contextmanager from django.http import FileResponse from rest_framework import status from swh.core import tarball from swh.model import identifiers from swh.deposit.utils import normalize_date from swh.deposit import utils -from . import DepositReadMixin +from . import DepositReadMixin, SWHPrivateAPIView from ...config import SWH_PERSON, ARCHIVE_TYPE -from ..common import SWHGetDepositAPI, SWHPrivateAPIView +from ..common import SWHGetDepositAPI from ...models import Deposit @contextmanager def aggregate_tarballs(extraction_dir, archive_paths): """Aggregate multiple tarballs into one and returns this new archive's path. Args: extraction_dir (path): Path to use for the tarballs computation archive_paths ([str]): Deposit's archive paths Returns: Tuple (directory to clean up, archive path (aggregated or not)) """ if len(archive_paths) > 1: # need to rebuild one archive from multiple ones os.makedirs(extraction_dir, 0o755, exist_ok=True) dir_path = tempfile.mkdtemp(prefix='swh.deposit-', dir=extraction_dir) # root folder to build an aggregated tarball aggregated_tarball_rootdir = os.path.join(dir_path, 'aggregate') os.makedirs(aggregated_tarball_rootdir, 0o755, exist_ok=True) # uncompress in a temporary location all archives for archive_path in archive_paths: tarball.uncompress(archive_path, aggregated_tarball_rootdir) # Aggregate into one big tarball the multiple smaller ones temp_tarpath = tarball.compress( aggregated_tarball_rootdir + '.zip', nature='zip', dirpath_or_files=aggregated_tarball_rootdir) # can already clean up temporary directory shutil.rmtree(aggregated_tarball_rootdir) try: yield temp_tarpath finally: shutil.rmtree(dir_path) else: # only 1 archive, no need to do fancy actions (and no cleanup step) yield archive_paths[0] -class SWHDepositReadArchives(SWHGetDepositAPI, SWHPrivateAPIView, +class SWHDepositReadArchives(SWHPrivateAPIView, SWHGetDepositAPI, DepositReadMixin): """Dedicated class to read a deposit's raw archives content. Only GET is supported. """ ADDITIONAL_CONFIG = { 'extraction_dir': ('str', '/tmp/swh-deposit/archive/'), } def __init__(self): super().__init__() self.extraction_dir = self.config['extraction_dir'] if not os.path.exists(self.extraction_dir): os.makedirs(self.extraction_dir) def process_get(self, req, collection_name, deposit_id): """Build a unique tarball from the multiple received and stream that content to the client. Args: req (Request): collection_name (str): Collection owning the deposit deposit_id (id): Deposit concerned by the reading Returns: Tuple status, stream of content, content-type """ archive_paths = [r.archive.path for r in self._deposit_requests( deposit_id, request_type=ARCHIVE_TYPE)] with aggregate_tarballs(self.extraction_dir, archive_paths) as path: return FileResponse(open(path, 'rb'), status=status.HTTP_200_OK, content_type='application/octet-stream') -class SWHDepositReadMetadata(SWHGetDepositAPI, SWHPrivateAPIView, +class SWHDepositReadMetadata(SWHPrivateAPIView, SWHGetDepositAPI, DepositReadMixin): """Class in charge of aggregating metadata on a deposit. """ ADDITIONAL_CONFIG = { 'provider': ('dict', { # 'provider_name': '', # those are not set since read from the # 'provider_url': '', # deposit's client 'provider_type': 'deposit_client', 'metadata': {} }), 'tool': ('dict', { 'name': 'swh-deposit', 'version': '0.0.1', 'configuration': { 'sword_version': '2' } }) } def __init__(self): super().__init__() self.provider = self.config['provider'] self.tool = self.config['tool'] def _normalize_dates(self, deposit, metadata): """Normalize the date to use as a tuple of author date, committer date from the incoming metadata. Args: deposit (Deposit): Deposit model representation metadata (Dict): Metadata dict representation Returns: Tuple of author date, committer date. Those dates are swh normalized. """ commit_date = metadata.get('codemeta:datePublished') author_date = metadata.get('codemeta:dateCreated') if author_date and commit_date: pass elif commit_date: author_date = commit_date elif author_date: commit_date = author_date else: author_date = deposit.complete_date commit_date = deposit.complete_date return ( normalize_date(author_date), normalize_date(commit_date) ) def metadata_read(self, deposit): """Read and aggregate multiple data on deposit into one unified data dictionary. Args: deposit (Deposit): Deposit concerned by the data aggregation. Returns: Dictionary of data representing the deposit to inject in swh. """ metadata = self._metadata_get(deposit) # Read information metadata data = { 'origin': { 'type': 'deposit', 'url': utils.origin_url_from(deposit), } } # revision fullname = deposit.client.username author_committer = SWH_PERSON # metadata provider self.provider['provider_name'] = deposit.client.last_name self.provider['provider_url'] = deposit.client.provider_url revision_type = 'tar' revision_msg = '%s: Deposit %s in collection %s' % ( fullname, deposit.id, deposit.collection.name) author_date, commit_date = self._normalize_dates(deposit, metadata) data['revision'] = { 'synthetic': True, 'date': author_date, 'committer_date': commit_date, 'author': author_committer, 'committer': author_committer, 'type': revision_type, 'message': revision_msg, 'metadata': metadata, } if deposit.parent: swh_persistent_id = deposit.parent.swh_id persistent_identifier = identifiers.parse_persistent_identifier( swh_persistent_id) parent_revision = persistent_identifier.object_id data['revision']['parents'] = [parent_revision] data['branch_name'] = 'master' data['origin_metadata'] = { 'provider': self.provider, 'tool': self.tool, 'metadata': metadata } return data def process_get(self, req, collection_name, deposit_id): deposit = Deposit.objects.get(pk=deposit_id) data = self.metadata_read(deposit) d = {} if data: d = json.dumps(data) return status.HTTP_200_OK, d, 'application/json' diff --git a/swh/deposit/api/private/deposit_update_status.py b/swh/deposit/api/private/deposit_update_status.py index 8b907a4c..208a49eb 100644 --- a/swh/deposit/api/private/deposit_update_status.py +++ b/swh/deposit/api/private/deposit_update_status.py @@ -1,89 +1,83 @@ -# Copyright (C) 2017-2018 The Software Heritage developers +# Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from rest_framework.parsers import JSONParser from swh.model.identifiers import ( persistent_identifier, REVISION, DIRECTORY ) -from ..common import SWHPutDepositAPI, SWHPrivateAPIView +from . import SWHPrivateAPIView +from ..common import SWHPutDepositAPI from ...errors import make_error_dict, BAD_REQUEST from ...models import Deposit, DEPOSIT_STATUS_DETAIL from ...models import DEPOSIT_STATUS_LOAD_SUCCESS -class SWHUpdateStatusDeposit(SWHPutDepositAPI, SWHPrivateAPIView): +class SWHUpdateStatusDeposit(SWHPrivateAPIView, SWHPutDepositAPI): """Deposit request class to update the deposit's status. HTTP verbs supported: PUT """ parser_classes = (JSONParser, ) def additional_checks(self, req, headers, collection_name, deposit_id=None): """Enrich existing checks to the default ones. New checks: - Ensure the status is provided - Ensure it exists """ data = req.data status = data.get('status') if not status: msg = 'The status key is mandatory with possible values %s' % list( DEPOSIT_STATUS_DETAIL.keys()) return make_error_dict(BAD_REQUEST, msg) if status not in DEPOSIT_STATUS_DETAIL: msg = 'Possible status in %s' % list(DEPOSIT_STATUS_DETAIL.keys()) return make_error_dict(BAD_REQUEST, msg) if status == DEPOSIT_STATUS_LOAD_SUCCESS: swh_id = data.get('revision_id') if not swh_id: msg = 'Updating status to %s requires a revision_id key' % ( status, ) return make_error_dict(BAD_REQUEST, msg) return {} - def restrict_access(self, req, deposit=None): - """Remove restriction modification to 'partial' deposit. - Update is possible regardless of the existing status. - - """ - return None - def process_put(self, req, headers, collection_name, deposit_id): """Update the deposit's status Returns: 204 No content """ deposit = Deposit.objects.get(pk=deposit_id) deposit.status = req.data['status'] # checks already done before origin_url = req.data.get('origin_url') dir_id = req.data.get('directory_id') if dir_id: deposit.swh_id = persistent_identifier(DIRECTORY, dir_id) deposit.swh_id_context = persistent_identifier( DIRECTORY, dir_id, metadata={'origin': origin_url}) rev_id = req.data.get('revision_id') if rev_id: deposit.swh_anchor_id = persistent_identifier( REVISION, rev_id) deposit.swh_anchor_id_context = persistent_identifier( REVISION, rev_id, metadata={'origin': origin_url}) deposit.save() return {} diff --git a/swh/deposit/api/private/urls.py b/swh/deposit/api/private/urls.py index 14335f25..f3a0363c 100644 --- a/swh/deposit/api/private/urls.py +++ b/swh/deposit/api/private/urls.py @@ -1,41 +1,62 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.conf.urls import url from ...config import ( PRIVATE_GET_RAW_CONTENT, PRIVATE_PUT_DEPOSIT, PRIVATE_GET_DEPOSIT_METADATA, PRIVATE_CHECK_DEPOSIT, PRIVATE_LIST_DEPOSITS ) from .deposit_read import SWHDepositReadArchives from .deposit_read import SWHDepositReadMetadata from .deposit_update_status import SWHUpdateStatusDeposit from .deposit_check import SWHChecksDeposit from .deposit_list import DepositList urlpatterns = [ # Retrieve deposit's raw archives' content # -> GET url(r'^(?P[^/]+)/(?P[^/]+)/raw/$', SWHDepositReadArchives.as_view(), name=PRIVATE_GET_RAW_CONTENT), # Update deposit's status # -> PUT url(r'^(?P[^/]+)/(?P[^/]+)/update/$', SWHUpdateStatusDeposit.as_view(), name=PRIVATE_PUT_DEPOSIT), # Retrieve metadata information on a specific deposit # -> GET url(r'^(?P[^/]+)/(?P[^/]+)/meta/$', SWHDepositReadMetadata.as_view(), name=PRIVATE_GET_DEPOSIT_METADATA), # Check archive and metadata information on a specific deposit # -> GET url(r'^(?P[^/]+)/(?P[^/]+)/check/$', SWHChecksDeposit.as_view(), name=PRIVATE_CHECK_DEPOSIT), + # Retrieve deposit's raw archives' content + # -> GET + url(r'^(?P[^/]+)/raw/$', + SWHDepositReadArchives.as_view(), + name=PRIVATE_GET_RAW_CONTENT+'-nc'), + # Update deposit's status + # -> PUT + url(r'^(?P[^/]+)/update/$', + SWHUpdateStatusDeposit.as_view(), + name=PRIVATE_PUT_DEPOSIT+'-nc'), + # Retrieve metadata information on a specific deposit + # -> GET + url(r'^(?P[^/]+)/meta/$', + SWHDepositReadMetadata.as_view(), + name=PRIVATE_GET_DEPOSIT_METADATA+'-nc'), + # Check archive and metadata information on a specific deposit + # -> GET + url(r'^(?P[^/]+)/check/$', + SWHChecksDeposit.as_view(), + name=PRIVATE_CHECK_DEPOSIT+'-nc'), + url(r'^deposits/$', DepositList.as_view(), name=PRIVATE_LIST_DEPOSITS) ] diff --git a/swh/deposit/cli/admin.py b/swh/deposit/cli/admin.py index 364ee32e..b59268cb 100644 --- a/swh/deposit/cli/admin.py +++ b/swh/deposit/cli/admin.py @@ -1,254 +1,254 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import click from swh.deposit.config import setup_django_for from swh.deposit.cli import deposit @deposit.group('admin') @click.option('--config-file', '-C', default=None, type=click.Path(exists=True, dir_okay=False,), help="Optional extra configuration file.") @click.option('--platform', default='development', type=click.Choice(['development', 'production']), help='development or production platform') @click.pass_context def admin(ctx, config_file, platform): """Server administration tasks (manipulate user or collections)""" # configuration happens here setup_django_for(platform, config_file=config_file) @admin.group('user') @click.pass_context def user(ctx): """Manipulate user.""" # configuration happens here pass def _create_collection(name): """Create the collection with name if it does not exist. Args: name (str): collection's name Returns: collection (DepositCollection): the existing collection object (created or not) """ # to avoid loading too early django namespaces from swh.deposit.models import DepositCollection try: collection = DepositCollection.objects.get(name=name) click.echo('Collection %s exists, nothing to do.' % name) except DepositCollection.DoesNotExist: click.echo('Create new collection %s' % name) collection = DepositCollection.objects.create(name=name) click.echo('Collection %s created' % name) return collection @user.command('create') @click.option('--username', required=True, help="User's name") @click.option('--password', required=True, help="Desired user's password (plain).") @click.option('--firstname', default='', help="User's first name") @click.option('--lastname', default='', help="User's last name") @click.option('--email', default='', help="User's email") @click.option('--collection', help="User's collection") @click.option('--provider-url', default='', help="Provider URL") @click.option('--domain', default='', help="The domain") @click.pass_context def user_create(ctx, username, password, firstname, lastname, email, collection, provider_url, domain): """Create a user with some needed information (password, collection) If the collection does not exist, the collection is then created alongside. - The password is stored encrypted using django's utilies. + The password is stored encrypted using django's utilities. """ # to avoid loading too early django namespaces from swh.deposit.models import DepositClient # If collection is not provided, fallback to username if not collection: collection = username click.echo('collection: %s' % collection) # create the collection if it does not exist collection = _create_collection(collection) # user create/update try: user = DepositClient.objects.get(username=username) click.echo('User %s exists, updating information.' % user) user.set_password(password) except DepositClient.DoesNotExist: click.echo('Create new user %s' % username) user = DepositClient.objects.create_user( username=username, password=password) user.collections = [collection.id] user.first_name = firstname user.last_name = lastname user.email = email user.is_active = True user.provider_url = provider_url user.domain = domain user.save() click.echo('Information registered for user %s' % user) @user.command('list') @click.pass_context def user_list(ctx): """List existing users. This entrypoint is not paginated yet as there is not a lot of entry. """ # to avoid loading too early django namespaces from swh.deposit.models import DepositClient users = DepositClient.objects.all() if not users: output = 'Empty user list' else: output = '\n'.join((user.username for user in users)) click.echo(output) @user.command('exists') @click.argument('username', required=True) @click.pass_context def user_exists(ctx, username): """Check if user exists. """ # to avoid loading too early django namespaces from swh.deposit.models import DepositClient try: DepositClient.objects.get(username=username) click.echo('User %s exists.' % username) ctx.exit(0) except DepositClient.DoesNotExist: click.echo('User %s does not exist.' % username) ctx.exit(1) @admin.group('collection') @click.pass_context def collection(ctx): """Manipulate collections.""" pass @collection.command('create') @click.option('--name', required=True, help="Collection's name") @click.pass_context def collection_create(ctx, name): _create_collection(name) @collection.command('list') @click.pass_context def collection_list(ctx): """List existing collections. This entrypoint is not paginated yet as there is not a lot of entry. """ # to avoid loading too early django namespaces from swh.deposit.models import DepositCollection collections = DepositCollection.objects.all() if not collections: output = 'Empty collection list' else: output = '\n'.join((col.name for col in collections)) click.echo(output) @admin.group('deposit') @click.pass_context -def deposit(ctx): +def adm_deposit(ctx): """Manipulate deposit.""" pass -@deposit.command('reschedule') +@adm_deposit.command('reschedule') @click.option('--deposit-id', required=True, help="Deposit identifier") @click.pass_context -def deposit_reschedule(ctx, deposit_id): +def adm_deposit_reschedule(ctx, deposit_id): """Reschedule the deposit loading This will: - check the deposit's status to something reasonable (failed or done). That means that the checks have passed alright but something went wrong during the loading (failed: loading failed, done: loading ok, still for some reasons as in bugs, we need to reschedule it) - reset the deposit's status to 'verified' (prior to any loading but after the checks which are fine) and removes the different archives' identifiers (swh-id, ...) - trigger back the loading task through the scheduler """ # to avoid loading too early django namespaces from datetime import datetime from swh.deposit.models import Deposit from swh.deposit.config import ( DEPOSIT_STATUS_LOAD_SUCCESS, DEPOSIT_STATUS_LOAD_FAILURE, DEPOSIT_STATUS_VERIFIED, SWHDefaultConfig, ) try: deposit = Deposit.objects.get(pk=deposit_id) except Deposit.DoesNotExist: click.echo('Deposit %s does not exist.' % deposit_id) ctx.exit(1) # Check the deposit is in a reasonable state accepted_statuses = [ DEPOSIT_STATUS_LOAD_SUCCESS, DEPOSIT_STATUS_LOAD_FAILURE ] if deposit.status == DEPOSIT_STATUS_VERIFIED: click.echo('Deposit %s\'s status already set for rescheduling.' % ( deposit_id)) ctx.exit(0) if deposit.status not in accepted_statuses: click.echo('Deposit %s\'s status be one of %s.' % ( deposit_id, ', '.join(accepted_statuses))) ctx.exit(1) task_id = deposit.load_task_id if not task_id: click.echo('Deposit %s cannot be rescheduled. It misses the ' 'associated task.' % deposit_id) ctx.exit(1) # Reset the deposit's state deposit.swh_id = None deposit.swh_id_context = None deposit.swh_anchor_id = None deposit.swh_anchor_id_context = None deposit.status = DEPOSIT_STATUS_VERIFIED deposit.save() # Trigger back the deposit scheduler = SWHDefaultConfig().scheduler scheduler.set_status_tasks( [task_id], status='next_run_not_scheduled', next_run=datetime.now()) diff --git a/swh/deposit/cli/client.py b/swh/deposit/cli/client.py index 130b7f7b..2962f636 100644 --- a/swh/deposit/cli/client.py +++ b/swh/deposit/cli/client.py @@ -1,394 +1,394 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os import logging import tempfile import uuid import click import xmltodict from swh.deposit.client import PublicApiDepositClient from swh.deposit.cli import deposit logger = logging.getLogger(__name__) class InputError(ValueError): """Input script error """ pass def generate_slug(): """Generate a slug (sample purposes). """ return str(uuid.uuid4()) def _url(url): """Force the /1 api version at the end of the url (avoiding confusing issues without it). Args: url (str): api url used by cli users Returns: Top level api url to actually request """ if not url.endswith('/1'): url = '%s/1' % url return url def generate_metadata_file(name, external_id, authors): """Generate a temporary metadata file with the minimum required metadata This generates a xml file in a temporary location and returns the path to that file. This is up to the client of that function to clean up the temporary file. Args: name (str): Software's name external_id (str): External identifier (slug) or generated one authors (List[str]): List of author names Returns: Filepath to the metadata generated file """ _, tmpfile = tempfile.mkstemp(prefix='swh.deposit.cli.') # generate a metadata file with the minimum required metadata codemetadata = { 'entry': { '@xmlns': "http://www.w3.org/2005/Atom", '@xmlns:codemeta': "https://doi.org/10.5063/SCHEMA/CODEMETA-2.0", 'codemeta:name': name, 'codemeta:identifier': external_id, 'codemeta:author': [{ 'codemeta:name': author_name } for author_name in authors], }, } logging.debug('Temporary file: %s', tmpfile) logging.debug('Metadata dict to generate as xml: %s', codemetadata) s = xmltodict.unparse(codemetadata, pretty=True) logging.debug('Metadata dict as xml generated: %s', s) with open(tmpfile, 'w') as fp: fp.write(s) return tmpfile def _cleanup_tempfile(config): """Clean up the temporary metadata file generated. Args: config (Dict): A configuration dict with 2 important keys for that routine, 'cleanup_tempfile' (bool) and 'metadata' (path to eventually clean up) """ if config['cleanup_tempfile']: path = config['metadata'] if os.path.exists(path): os.unlink(path) def _client(url, username, password): """Instantiate a client to access the deposit api server Args: url (str): Deposit api server username (str): User password (str): User's password """ client = PublicApiDepositClient({ 'url': url, 'auth': { 'username': username, 'password': password }, }) return client def _collection(client): """Retrieve the client's collection """ # retrieve user's collection sd_content = client.service_document() if 'error' in sd_content: raise InputError('Service document retrieval: %s' % ( sd_content['error'], )) collection = sd_content[ 'service']['workspace']['collection']['sword:name'] return collection def client_command_parse_input( username, password, archive, metadata, archive_deposit, metadata_deposit, collection, slug, partial, deposit_id, replace, url, name, authors): """Parse the client subcommand options and make sure the combination is acceptable*. If not, an InputError exception is raised explaining the issue. By acceptable, we mean: - A multipart deposit (create or update) requires: - an existing software archive - an existing metadata file or author(s) and name provided in params - A binary deposit (create/update) requires an existing software archive - A metadata deposit (create/update) requires an existing metadata file or author(s) and name provided in params - A deposit update requires a deposit_id This will not prevent all failure cases though. The remaining errors are already dealt with by the underlying api client. Raises: InputError explaining the issue Returns: dict with the following keys: 'archive': the software archive to deposit 'username': username 'password': associated password 'metadata': the metadata file to deposit 'collection': the username's associated client 'slug': the slug or external id identifying the deposit to make 'partial': if the deposit is partial or not 'client': instantiated class 'url': deposit's server main entry point 'deposit_type': deposit's type (binary, multipart, metadata) 'deposit_id': optional deposit identifier """ cleanup_tempfile = False try: if archive_deposit and metadata_deposit: # too many flags use, remove redundant ones (-> multipart deposit) archive_deposit = False metadata_deposit = False if not slug: # generate one as this is mandatory slug = generate_slug() if not metadata and name and authors: metadata = generate_metadata_file(name, slug, authors) cleanup_tempfile = True if metadata_deposit: archive = None if archive_deposit: metadata = None if metadata_deposit and not metadata: raise InputError( "Metadata deposit must be provided for metadata " "deposit (either a filepath or --name and --author)") if not archive and not metadata: raise InputError( 'Please provide an actionable command. See --help for more ' 'information') if replace and not deposit_id: raise InputError( 'To update an existing deposit, you must provide its id') client = _client(url, username, password) if not collection: collection = _collection(client) return { 'archive': archive, 'username': username, 'password': password, 'metadata': metadata, 'cleanup_tempfile': cleanup_tempfile, 'collection': collection, 'slug': slug, 'in_progress': partial, 'client': client, 'url': url, 'deposit_id': deposit_id, 'replace': replace, } except Exception: # to be clean, cleanup prior to raise _cleanup_tempfile({ 'cleanup_tempfile': cleanup_tempfile, 'metadata': metadata }) raise def _subdict(d, keys): 'return a dict from d with only given keys' return {k: v for k, v in d.items() if k in keys} def deposit_create(config, logger): """Delegate the actual deposit to the deposit client. """ logger.debug('Create deposit') client = config['client'] keys = ('collection', 'archive', 'metadata', 'slug', 'in_progress') return client.deposit_create( **_subdict(config, keys)) def deposit_update(config, logger): """Delegate the actual deposit to the deposit client. """ logger.debug('Update deposit') client = config['client'] keys = ('collection', 'deposit_id', 'archive', 'metadata', 'slug', 'in_progress', 'replace') return client.deposit_update( **_subdict(config, keys)) @deposit.command() -@click.option('--username', required=1, +@click.option('--username', required=True, help="(Mandatory) User's name") -@click.option('--password', required=1, +@click.option('--password', required=True, help="(Mandatory) User's associated password") @click.option('--archive', type=click.Path(exists=True), help='(Optional) Software archive to deposit') @click.option('--metadata', type=click.Path(exists=True), help="(Optional) Path to xml metadata file. If not provided, this will use a file named .metadata.xml") # noqa @click.option('--archive-deposit/--no-archive-deposit', default=False, help='(Optional) Software archive only deposit') @click.option('--metadata-deposit/--no-metadata-deposit', default=False, help='(Optional) Metadata only deposit') @click.option('--collection', help="(Optional) User's collection. If not provided, this will be fetched.") # noqa @click.option('--slug', help="""(Optional) External system information identifier. If not provided, it will be generated""") # noqa @click.option('--partial/--no-partial', default=False, help='(Optional) The deposit will be partial, other deposits will have to take place to finalize it.') # noqa @click.option('--deposit-id', default=None, help='(Optional) Update an existing partial deposit with its identifier') # noqa @click.option('--replace/--no-replace', default=False, help='(Optional) Update by replacing existing metadata to a deposit') # noqa @click.option('--url', default='https://deposit.softwareheritage.org', help="(Optional) Deposit server api endpoint. By default, https://deposit.softwareheritage.org/1") # noqa @click.option('--verbose/--no-verbose', default=False, help='Verbose mode') @click.option('--name', help='Software name') @click.option('--author', multiple=True, help='Software author(s), this can be repeated as many times' ' as there are authors') @click.pass_context def upload(ctx, username, password, archive=None, metadata=None, archive_deposit=False, metadata_deposit=False, collection=None, slug=None, partial=False, deposit_id=None, replace=False, url='https://deposit.softwareheritage.org', verbose=False, name=None, author=None): """Software Heritage Public Deposit Client Create/Update deposit through the command line. More documentation can be found at https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html. """ url = _url(url) config = {} try: logger.debug('Parsing cli options') config = client_command_parse_input( username, password, archive, metadata, archive_deposit, metadata_deposit, collection, slug, partial, deposit_id, replace, url, name, author) except InputError as e: msg = 'Problem during parsing options: %s' % e r = { 'error': msg, } logger.info(r) return 1 try: if verbose: logger.info("Parsed configuration: %s" % ( config, )) deposit_id = config['deposit_id'] if deposit_id: r = deposit_update(config, logger) else: r = deposit_create(config, logger) logger.info(r) finally: _cleanup_tempfile(config) @deposit.command() @click.option('--url', default='https://deposit.softwareheritage.org', help="(Optional) Deposit server api endpoint. By default, " "https://deposit.softwareheritage.org/1") -@click.option('--username', required=1, +@click.option('--username', required=True, help="(Mandatory) User's name") -@click.option('--password', required=1, +@click.option('--password', required=True, help="(Mandatory) User's associated password") @click.option('--deposit-id', default=None, - required=1, + required=True, help="Deposit identifier.") @click.pass_context def status(ctx, url, username, password, deposit_id): """Deposit's status """ url = _url(url) logger.debug('Status deposit') try: client = _client(url, username, password) collection = _collection(client) except InputError as e: msg = 'Problem during parsing options: %s' % e r = { 'error': msg, } logger.info(r) return 1 r = client.deposit_status( collection=collection, deposit_id=deposit_id) logger.info(r) diff --git a/swh/deposit/loader/__init__.py b/swh/deposit/loader/__init__.py index e69de29b..6e1af4af 100644 --- a/swh/deposit/loader/__init__.py +++ b/swh/deposit/loader/__init__.py @@ -0,0 +1,7 @@ +# Copyright (C) 2019 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + + +def register(): + return {'task_modules': ['%s.tasks' % __name__]} diff --git a/swh/deposit/migrations/0017_auto_20190925_0906.py b/swh/deposit/migrations/0017_auto_20190925_0906.py new file mode 100644 index 00000000..15235bf8 --- /dev/null +++ b/swh/deposit/migrations/0017_auto_20190925_0906.py @@ -0,0 +1,21 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.23 on 2019-09-25 09:06 +from __future__ import unicode_literals + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('deposit', '0016_auto_20190507_1408'), + ] + + operations = [ + migrations.AlterField( + model_name='deposit', + name='parent', + field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.PROTECT, to='deposit.Deposit'), + ), + ] diff --git a/swh/deposit/models.py b/swh/deposit/models.py index 5f188341..3ff3bdd0 100644 --- a/swh/deposit/models.py +++ b/swh/deposit/models.py @@ -1,220 +1,220 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information # Generated from: # cd swh_deposit && \ # python3 -m manage inspectdb from django.contrib.postgres.fields import JSONField, ArrayField from django.contrib.auth.models import User, UserManager from django.db import models from django.utils.timezone import now from .config import ( DEPOSIT_STATUS_VERIFIED, DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_PARTIAL, DEPOSIT_STATUS_LOAD_SUCCESS, DEPOSIT_STATUS_LOAD_FAILURE, DEPOSIT_STATUS_REJECTED, ARCHIVE_TYPE, METADATA_TYPE ) class Dbversion(models.Model): """Db version """ version = models.IntegerField(primary_key=True) release = models.DateTimeField(default=now, null=True) description = models.TextField(blank=True, null=True) class Meta: db_table = 'dbversion' def __str__(self): return str({ 'version': self.version, 'release': self.release, 'description': self.description }) """Possible status""" DEPOSIT_STATUS = [ (DEPOSIT_STATUS_PARTIAL, DEPOSIT_STATUS_PARTIAL), ('expired', 'expired'), (DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_DEPOSITED), (DEPOSIT_STATUS_VERIFIED, DEPOSIT_STATUS_VERIFIED), (DEPOSIT_STATUS_REJECTED, DEPOSIT_STATUS_REJECTED), ('loading', 'loading'), (DEPOSIT_STATUS_LOAD_SUCCESS, DEPOSIT_STATUS_LOAD_SUCCESS), (DEPOSIT_STATUS_LOAD_FAILURE, DEPOSIT_STATUS_LOAD_FAILURE), ] """Possible status and the detailed meaning.""" DEPOSIT_STATUS_DETAIL = { DEPOSIT_STATUS_PARTIAL: 'Deposit is partially received. To finalize it, ' 'In-Progress header should be false', 'expired': 'Deposit has been there too long and is now ' 'deemed ready to be garbage collected', DEPOSIT_STATUS_DEPOSITED: 'Deposit is ready for additional checks ' '(tarball ok, metadata, etc...)', DEPOSIT_STATUS_VERIFIED: 'Deposit is fully received, checked, and ' 'ready for loading', DEPOSIT_STATUS_REJECTED: 'Deposit failed the checks', 'loading': "Loading is ongoing on swh's side", DEPOSIT_STATUS_LOAD_SUCCESS: 'The deposit has been successfully ' 'loaded into the Software Heritage archive', DEPOSIT_STATUS_LOAD_FAILURE: 'The deposit loading into the ' 'Software Heritage archive failed', } class DepositClient(User): """Deposit client """ collections = ArrayField(models.IntegerField(), null=True) objects = UserManager() provider_url = models.TextField(null=False) domain = models.TextField(null=False) class Meta: db_table = 'deposit_client' def __str__(self): return str({ 'id': self.id, 'collections': self.collections, 'username': super().username, 'domain': self.domain, 'provider_url': self.provider_url, }) class Deposit(models.Model): """Deposit reception table """ id = models.BigAutoField(primary_key=True) # First deposit reception date reception_date = models.DateTimeField(auto_now_add=True) # Date when the deposit is deemed complete and ready for loading complete_date = models.DateTimeField(null=True) # collection concerned by the deposit collection = models.ForeignKey( 'DepositCollection', models.DO_NOTHING) # Deposit's external identifier external_id = models.TextField() # Deposit client client = models.ForeignKey('DepositClient', models.DO_NOTHING) # SWH's loading result identifier swh_id = models.TextField(blank=True, null=True) swh_id_context = models.TextField(blank=True, null=True) swh_anchor_id = models.TextField(blank=True, null=True) swh_anchor_id_context = models.TextField(blank=True, null=True) # Deposit's status regarding loading status = models.TextField( choices=DEPOSIT_STATUS, default=DEPOSIT_STATUS_PARTIAL) status_detail = JSONField(null=True) # deposit can have one parent - parent = models.ForeignKey('self', null=True) + parent = models.ForeignKey('self', on_delete=models.PROTECT, null=True) check_task_id = models.TextField( blank=True, null=True, verbose_name="Scheduler's associated checking task id" ) load_task_id = models.TextField( blank=True, null=True, verbose_name="Scheduler's associated loading task id" ) class Meta: db_table = 'deposit' def __str__(self): d = { 'id': self.id, 'reception_date': self.reception_date, 'collection': self.collection.name, 'external_id': self.external_id, 'client': self.client.username, 'status': self.status, } if self.status in (DEPOSIT_STATUS_REJECTED): d['status_detail'] = self.status_detail return str(d) def client_directory_path(instance, filename): """Callable to upload archive in MEDIA_ROOT/user_/ Args: instance (DepositRequest): DepositRequest concerned by the upload filename (str): Filename of the uploaded file Returns: A path to be prefixed by the MEDIA_ROOT to access physically to the file uploaded. """ return 'client_{0}/{1}'.format(instance.deposit.client.id, filename) REQUEST_TYPES = [(ARCHIVE_TYPE, ARCHIVE_TYPE), (METADATA_TYPE, METADATA_TYPE)] class DepositRequest(models.Model): """Deposit request associated to one deposit. """ id = models.BigAutoField(primary_key=True) # Deposit concerned by the request deposit = models.ForeignKey(Deposit, models.DO_NOTHING) date = models.DateTimeField(auto_now_add=True) # Deposit request information on the data to inject # this can be null when type is 'archive' metadata = JSONField(null=True) raw_metadata = models.TextField(null=True) # this can be null when type is 'metadata' archive = models.FileField(null=True, upload_to=client_directory_path) type = models.CharField(max_length=8, choices=REQUEST_TYPES, null=True) class Meta: db_table = 'deposit_request' def __str__(self): meta = None if self.metadata: from json import dumps meta = dumps(self.metadata) archive_name = None if self.archive: archive_name = self.archive.name return str({ 'id': self.id, 'deposit': self.deposit, 'metadata': meta, 'archive': archive_name }) class DepositCollection(models.Model): id = models.BigAutoField(primary_key=True) # Human readable name for the collection type e.g HAL, arXiv, etc... name = models.TextField() class Meta: db_table = 'deposit_collection' def __str__(self): return str({'id': self.id, 'name': self.name}) diff --git a/swh/deposit/signals.py b/swh/deposit/signals.py index b861b37b..0d8e480d 100644 --- a/swh/deposit/signals.py +++ b/swh/deposit/signals.py @@ -1,109 +1,104 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """Module in charge of defining some uncoupled actions on deposit. Typically, checking that the archives deposited are ok are not directly testing in the request/answer to avoid too long computations. So this is done in the deposit_on_status_ready_for_check callback. """ +from swh.deposit import utils + from django.db.models.signals import post_save from django.dispatch import receiver from .models import Deposit from .config import SWHDefaultConfig, DEPOSIT_STATUS_VERIFIED from .config import DEPOSIT_STATUS_DEPOSITED def schedule_task(scheduler, task): """Schedule the task and return its identifier Args: task (dict): Task to schedule Returns: The task identifier """ tasks = scheduler.create_tasks([task]) if tasks: created_task = tasks[0] return created_task['id'] @receiver(post_save, sender=Deposit) def post_deposit_save(sender, instance, created, raw, using, update_fields, **kwargs): """When a deposit is saved, check for the deposit's status change and schedule actions accordingly. When the status passes to deposited, schedule checks. When the status pass to ready, schedule loading. Otherwise, do nothing. Args: sender (Deposit): The model class instance (Deposit): The actual instance being saved created (bool): True if a new record was created raw (bool): True if the model is saved exactly as presented (i.e. when loading a fixture). One should not query/modify other records in the database as the database might not be in a consistent state yet using: The database alias being used update_fields: The set of fields to update as passed to Model.save(), or None if update_fields wasn’t passed to save() """ default_config = SWHDefaultConfig() if not default_config.config['checks']: return if instance.status not in {DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_VERIFIED}: return from django.urls import reverse from swh.scheduler.utils import create_oneshot_task_dict args = [instance.collection.name, instance.id] # In the following, we are checking the instance.*task_id are not already # populated because the `instance.save()` call will also trigger a call to # that very function. if (instance.status == DEPOSIT_STATUS_DEPOSITED and not instance.check_task_id): # schedule deposit's checks from swh.deposit.config import PRIVATE_CHECK_DEPOSIT check_url = reverse(PRIVATE_CHECK_DEPOSIT, args=args) task = create_oneshot_task_dict('check-deposit', deposit_check_url=check_url) check_task_id = schedule_task(default_config.scheduler, task) instance.check_task_id = check_task_id instance.save() elif (instance.status == DEPOSIT_STATUS_VERIFIED and not instance.load_task_id): - # schedule deposit loading - from swh.deposit.config import PRIVATE_GET_RAW_CONTENT - from swh.deposit.config import PRIVATE_GET_DEPOSIT_METADATA - from swh.deposit.config import PRIVATE_PUT_DEPOSIT - archive_url = reverse(PRIVATE_GET_RAW_CONTENT, args=args) - meta_url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=args) - update_url = reverse(PRIVATE_PUT_DEPOSIT, args=args) - - task = create_oneshot_task_dict('load-deposit', - archive_url=archive_url, - deposit_meta_url=meta_url, - deposit_update_url=update_url) + + url = utils.origin_url_from(instance) + + task = create_oneshot_task_dict( + 'load-deposit', url=url, deposit_id=instance.id) load_task_id = schedule_task(default_config.scheduler, task) instance.load_task_id = load_task_id instance.save() diff --git a/swh/deposit/tests/api/test_deposit_check.py b/swh/deposit/tests/api/test_deposit_private_check.py similarity index 92% rename from swh/deposit/tests/api/test_deposit_check.py rename to swh/deposit/tests/api/test_deposit_private_check.py index 680cb034..86d1d607 100644 --- a/swh/deposit/tests/api/test_deposit_check.py +++ b/swh/deposit/tests/api/test_deposit_private_check.py @@ -1,234 +1,236 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest from django.urls import reverse import pytest from rest_framework import status from rest_framework.test import APITestCase from swh.deposit.config import ( DEPOSIT_STATUS_VERIFIED, PRIVATE_CHECK_DEPOSIT, DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_REJECTED ) from swh.deposit.api.private.deposit_check import ( SWHChecksDeposit, MANDATORY_ARCHIVE_INVALID, MANDATORY_FIELDS_MISSING, MANDATORY_ARCHIVE_UNSUPPORTED, ALTERNATE_FIELDS_MISSING, MANDATORY_ARCHIVE_MISSING ) from swh.deposit.models import Deposit from ..common import BasicTestCase, WithAuthTestCase, CommonCreationRoutine from ..common import FileSystemCreationRoutine @pytest.mark.fs class CheckDepositTest(APITestCase, WithAuthTestCase, BasicTestCase, CommonCreationRoutine, FileSystemCreationRoutine): """Check deposit endpoints. """ def setUp(self): super().setUp() + def private_deposit_url(self, deposit_id): + return reverse(PRIVATE_CHECK_DEPOSIT, + args=[self.collection.name, deposit_id]) + def test_deposit_ok(self): """Proper deposit should succeed the checks (-> status ready) """ deposit_id = self.create_simple_binary_deposit(status_partial=True) deposit_id = self.update_binary_deposit(deposit_id, status_partial=False) deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(deposit.status, DEPOSIT_STATUS_DEPOSITED) - url = reverse(PRIVATE_CHECK_DEPOSIT, - args=[self.collection.name, deposit.id]) - + url = self.private_deposit_url(deposit.id) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) data = response.json() self.assertEqual(data['status'], DEPOSIT_STATUS_VERIFIED) deposit = Deposit.objects.get(pk=deposit.id) self.assertEqual(deposit.status, DEPOSIT_STATUS_VERIFIED) def test_deposit_invalid_tarball(self): """Deposit with tarball (of 1 tarball) should fail the checks: rejected """ for archive_extension in ['zip', 'tar', 'tar.gz', 'tar.bz2', 'tar.xz']: deposit_id = self.create_deposit_archive_with_archive( archive_extension) deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(DEPOSIT_STATUS_DEPOSITED, deposit.status) - url = reverse(PRIVATE_CHECK_DEPOSIT, - args=[self.collection.name, deposit.id]) - + url = self.private_deposit_url(deposit.id) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) data = response.json() self.assertEqual(data['status'], DEPOSIT_STATUS_REJECTED) details = data['details'] # archive checks failure self.assertEqual(len(details['archive']), 1) self.assertEqual(details['archive'][0]['summary'], MANDATORY_ARCHIVE_INVALID) deposit = Deposit.objects.get(pk=deposit.id) self.assertEqual(deposit.status, DEPOSIT_STATUS_REJECTED) def test_deposit_ko_missing_tarball(self): """Deposit without archive should fail the checks: rejected """ deposit_id = self.create_deposit_ready() # no archive, only atom deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(DEPOSIT_STATUS_DEPOSITED, deposit.status) - url = reverse(PRIVATE_CHECK_DEPOSIT, - args=[self.collection.name, deposit.id]) - + url = self.private_deposit_url(deposit.id) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) data = response.json() self.assertEqual(data['status'], DEPOSIT_STATUS_REJECTED) details = data['details'] # archive checks failure self.assertEqual(len(details['archive']), 1) self.assertEqual(details['archive'][0]['summary'], MANDATORY_ARCHIVE_MISSING) deposit = Deposit.objects.get(pk=deposit.id) self.assertEqual(deposit.status, DEPOSIT_STATUS_REJECTED) def test_deposit_ko_unsupported_tarball(self): """Deposit with an unsupported tarball should fail the checks: rejected """ deposit_id = self.create_deposit_with_invalid_archive() deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(DEPOSIT_STATUS_DEPOSITED, deposit.status) - url = reverse(PRIVATE_CHECK_DEPOSIT, - args=[self.collection.name, deposit.id]) - + url = self.private_deposit_url(deposit.id) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) data = response.json() self.assertEqual(data['status'], DEPOSIT_STATUS_REJECTED) details = data['details'] # archive checks failure self.assertEqual(len(details['archive']), 1) self.assertEqual(details['archive'][0]['summary'], MANDATORY_ARCHIVE_UNSUPPORTED) # metadata check failure self.assertEqual(len(details['metadata']), 2) mandatory = details['metadata'][0] self.assertEqual(mandatory['summary'], MANDATORY_FIELDS_MISSING) self.assertEqual(set(mandatory['fields']), set(['author'])) alternate = details['metadata'][1] self.assertEqual(alternate['summary'], ALTERNATE_FIELDS_MISSING) self.assertEqual(alternate['fields'], ['name or title']) deposit = Deposit.objects.get(pk=deposit.id) self.assertEqual(deposit.status, DEPOSIT_STATUS_REJECTED) def test_check_deposit_metadata_ok(self): """Proper deposit should succeed the checks (-> status ready) with all **MUST** metadata using the codemeta metadata test set """ deposit_id = self.create_simple_binary_deposit(status_partial=True) deposit_id_metadata = self.add_metadata_to_deposit(deposit_id) self.assertEqual(deposit_id, deposit_id_metadata) deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(deposit.status, DEPOSIT_STATUS_DEPOSITED) - url = reverse(PRIVATE_CHECK_DEPOSIT, - args=[self.collection.name, deposit.id]) + url = self.private_deposit_url(deposit.id) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) data = response.json() self.assertEqual(data['status'], DEPOSIT_STATUS_VERIFIED) deposit = Deposit.objects.get(pk=deposit.id) self.assertEqual(deposit.status, DEPOSIT_STATUS_VERIFIED) +@pytest.mark.fs +class CheckDepositTest2(CheckDepositTest): + def private_deposit_url(self, deposit_id): + return reverse(PRIVATE_CHECK_DEPOSIT+'-nc', + args=[deposit_id]) + + class CheckMetadata(unittest.TestCase, SWHChecksDeposit): def test_check_metadata_ok(self): actual_check, detail = self._check_metadata({ 'url': 'something', 'external_identifier': 'something-else', 'name': 'foo', 'author': 'someone', }) self.assertTrue(actual_check) self.assertIsNone(detail) def test_check_metadata_ok2(self): actual_check, detail = self._check_metadata({ 'url': 'something', 'external_identifier': 'something-else', 'title': 'bar', 'author': 'someone', }) self.assertTrue(actual_check) self.assertIsNone(detail) def test_check_metadata_ko(self): """Missing optional field should be caught """ actual_check, error_detail = self._check_metadata({ 'url': 'something', 'external_identifier': 'something-else', 'author': 'someone', }) expected_error = { 'metadata': [{ 'summary': 'Mandatory alternate fields are missing', 'fields': ['name or title'], }] } self.assertFalse(actual_check) self.assertEqual(error_detail, expected_error) def test_check_metadata_ko2(self): """Missing mandatory fields should be caught """ actual_check, error_detail = self._check_metadata({ 'url': 'something', 'external_identifier': 'something-else', 'title': 'foobar', }) expected_error = { 'metadata': [{ 'summary': 'Mandatory fields are missing', 'fields': ['author'], }] } self.assertFalse(actual_check) self.assertEqual(error_detail, expected_error) diff --git a/swh/deposit/tests/api/test_deposit_read_archive.py b/swh/deposit/tests/api/test_deposit_private_read_archive.py similarity index 68% rename from swh/deposit/tests/api/test_deposit_read_archive.py rename to swh/deposit/tests/api/test_deposit_private_read_archive.py index 07d16c3f..b4ec2f41 100644 --- a/swh/deposit/tests/api/test_deposit_read_archive.py +++ b/swh/deposit/tests/api/test_deposit_private_read_archive.py @@ -1,125 +1,98 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import hashlib import os from django.urls import reverse import pytest from rest_framework import status from rest_framework.test import APITestCase from swh.core import tarball from swh.deposit.config import PRIVATE_GET_RAW_CONTENT from swh.deposit.tests import TEST_CONFIG from ..common import BasicTestCase, WithAuthTestCase, CommonCreationRoutine from ..common import FileSystemCreationRoutine, create_arborescence_archive @pytest.mark.fs class DepositReadArchivesTest(APITestCase, WithAuthTestCase, BasicTestCase, CommonCreationRoutine, FileSystemCreationRoutine): def setUp(self): super().setUp() self.archive2 = create_arborescence_archive( self.root_path, 'archive2', 'file2', b'some other content in file') self.workdir = os.path.join(self.root_path, 'workdir') + def private_deposit_url(self, deposit_id): + return reverse(PRIVATE_GET_RAW_CONTENT, + args=[self.collection.name, deposit_id]) + def test_access_to_existing_deposit_with_one_archive(self): """Access to deposit should stream a 200 response with its raw content """ deposit_id = self.create_simple_binary_deposit() - url = reverse(PRIVATE_GET_RAW_CONTENT, - args=[self.collection.name, deposit_id]) - + url = self.private_deposit_url(deposit_id) r = self.client.get(url) self.assertEqual(r.status_code, status.HTTP_200_OK) self.assertEqual(r._headers['content-type'][1], 'application/octet-stream') # read the stream data = b''.join(r.streaming_content) actual_sha1 = hashlib.sha1(data).hexdigest() self.assertEqual(actual_sha1, self.archive['sha1sum']) # this does not touch the extraction dir so this should stay empty self.assertEqual(os.listdir(TEST_CONFIG['extraction_dir']), []) def _check_tarball_consistency(self, actual_sha1): tarball.uncompress(self.archive['path'], self.workdir) self.assertEqual(os.listdir(self.workdir), ['file1']) tarball.uncompress(self.archive2['path'], self.workdir) lst = set(os.listdir(self.workdir)) self.assertEqual(lst, {'file1', 'file2'}) new_path = self.workdir + '.zip' tarball.compress(new_path, 'zip', self.workdir) with open(new_path, 'rb') as f: h = hashlib.sha1(f.read()).hexdigest() self.assertEqual(actual_sha1, h) self.assertNotEqual(actual_sha1, self.archive['sha1sum']) self.assertNotEqual(actual_sha1, self.archive2['sha1sum']) def test_access_to_existing_deposit_with_multiple_archives(self): """Access to deposit should stream a 200 response with its raw contents """ deposit_id = self.create_complex_binary_deposit() - - url = reverse(PRIVATE_GET_RAW_CONTENT, - args=[self.collection.name, deposit_id]) - + url = self.private_deposit_url(deposit_id) r = self.client.get(url) self.assertEqual(r.status_code, status.HTTP_200_OK) self.assertEqual(r._headers['content-type'][1], 'application/octet-stream') # read the stream data = b''.join(r.streaming_content) actual_sha1 = hashlib.sha1(data).hexdigest() self._check_tarball_consistency(actual_sha1) # this touches the extraction directory but should clean up # after itself self.assertEqual(os.listdir(TEST_CONFIG['extraction_dir']), []) -class DepositReadArchivesFailureTest(APITestCase, WithAuthTestCase, - BasicTestCase, CommonCreationRoutine): - def test_access_to_nonexisting_deposit_returns_404_response(self): - """Read unknown collection should return a 404 response - - """ - unknown_id = '999' - url = reverse(PRIVATE_GET_RAW_CONTENT, - args=[self.collection.name, unknown_id]) - - response = self.client.get(url) - self.assertEqual(response.status_code, - status.HTTP_404_NOT_FOUND) - self.assertIn('Deposit with id %s does not exist' % unknown_id, - response.content.decode('utf-8')) - - def test_access_to_nonexisting_collection_returns_404_response(self): - """Read unknown deposit should return a 404 response - - """ - collection_name = 'non-existing' - deposit_id = self.create_deposit_partial() - url = reverse(PRIVATE_GET_RAW_CONTENT, - args=[collection_name, deposit_id]) - - response = self.client.get(url) - self.assertEqual(response.status_code, - status.HTTP_404_NOT_FOUND) - self.assertIn('Unknown collection name %s' % collection_name, - response.content.decode('utf-8')) +@pytest.mark.fs +class DepositReadArchivesTest2(DepositReadArchivesTest): + def private_deposit_url(self, deposit_id): + return reverse(PRIVATE_GET_RAW_CONTENT+'-nc', args=[deposit_id]) diff --git a/swh/deposit/tests/api/test_deposit_read_metadata.py b/swh/deposit/tests/api/test_deposit_private_read_metadata.py similarity index 94% rename from swh/deposit/tests/api/test_deposit_read_metadata.py rename to swh/deposit/tests/api/test_deposit_private_read_metadata.py index 0e7e38aa..e4110a5a 100644 --- a/swh/deposit/tests/api/test_deposit_read_metadata.py +++ b/swh/deposit/tests/api/test_deposit_private_read_metadata.py @@ -1,661 +1,644 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.urls import reverse from rest_framework import status from rest_framework.test import APITestCase from swh.deposit.models import Deposit from swh.deposit.config import PRIVATE_GET_DEPOSIT_METADATA from swh.deposit.config import DEPOSIT_STATUS_LOAD_SUCCESS from swh.deposit.config import DEPOSIT_STATUS_PARTIAL from ...config import SWH_PERSON from ..common import BasicTestCase, WithAuthTestCase, CommonCreationRoutine class DepositReadMetadataTest(APITestCase, WithAuthTestCase, BasicTestCase, CommonCreationRoutine): """Deposit access to read metadata information on deposit. """ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.template_metadata = """ Composing a Web of Audio Applications hal hal-01243065 hal-01243065 https://hal-test.archives-ouvertes.fr/hal-01243065 test DSP programming this is the description 1 phpstorm stable php python C GNU General Public License v3.0 only CeCILL Free Software License Agreement v1.1 HAL hal@ccsd.cnrs.fr Morane Gruenpeter %s """ + def private_deposit_url(self, deposit_id): + return reverse(PRIVATE_GET_DEPOSIT_METADATA, + args=[self.collection.name, deposit_id]) + def test_read_metadata(self): """Private metadata read api to existing deposit should return metadata """ deposit_id = self.create_deposit_partial() - url = reverse(PRIVATE_GET_DEPOSIT_METADATA, - args=[self.collection.name, deposit_id]) + url = self.private_deposit_url(deposit_id) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(response._headers['content-type'][1], 'application/json') data = response.json() expected_meta = { 'origin': { 'url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id', 'type': 'deposit' }, 'origin_metadata': { 'metadata': { '@xmlns': ['http://www.w3.org/2005/Atom'], 'author': ['some awesome author', 'another one', 'no one'], 'codemeta:dateCreated': '2017-10-07T15:17:08Z', 'external_identifier': 'some-external-id', 'url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id' }, 'provider': { 'provider_name': 'hal', 'provider_type': 'deposit_client', 'provider_url': 'https://hal-test.archives-ouvertes.fr/', 'metadata': {} }, 'tool': { 'name': 'swh-deposit', 'version': '0.0.1', 'configuration': { 'sword_version': '2' } } }, 'revision': { 'synthetic': True, 'committer_date': { 'timestamp': { 'seconds': 1507389428, 'microseconds': 0 }, 'offset': 0, 'negative_utc': False }, 'message': 'hal: Deposit %s in collection hal' % deposit_id, 'author': SWH_PERSON, 'committer': SWH_PERSON, 'date': { 'timestamp': { 'seconds': 1507389428, 'microseconds': 0 }, 'offset': 0, 'negative_utc': False }, 'metadata': { '@xmlns': ['http://www.w3.org/2005/Atom'], 'author': ['some awesome author', 'another one', 'no one'], 'external_identifier': 'some-external-id', 'codemeta:dateCreated': '2017-10-07T15:17:08Z', 'url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id' }, 'type': 'tar' }, 'branch_name': 'master', } self.assertEqual(data, expected_meta) def test_read_metadata_revision_with_parent(self): """Private read metadata to a deposit (with parent) returns metadata """ swh_id = 'da78a9d4cf1d5d29873693fd496142e3a18c20fa' swh_persistent_id = 'swh:1:rev:%s' % swh_id deposit_id1 = self.create_deposit_with_status( status=DEPOSIT_STATUS_LOAD_SUCCESS, external_id='some-external-id', swh_id=swh_persistent_id) deposit_parent = Deposit.objects.get(pk=deposit_id1) self.assertEqual(deposit_parent.swh_id, swh_persistent_id) self.assertEqual(deposit_parent.external_id, 'some-external-id') self.assertEqual(deposit_parent.status, DEPOSIT_STATUS_LOAD_SUCCESS) deposit_id = self.create_deposit_partial( external_id='some-external-id') deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(deposit.external_id, 'some-external-id') self.assertEqual(deposit.swh_id, None) self.assertEqual(deposit.parent, deposit_parent) self.assertEqual(deposit.status, DEPOSIT_STATUS_PARTIAL) - url = reverse(PRIVATE_GET_DEPOSIT_METADATA, - args=[self.collection.name, deposit_id]) + url = self.private_deposit_url(deposit_id) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(response._headers['content-type'][1], 'application/json') data = response.json() expected_meta = { 'origin': { 'url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id', 'type': 'deposit' }, 'origin_metadata': { 'metadata': { '@xmlns': ['http://www.w3.org/2005/Atom'], 'author': ['some awesome author', 'another one', 'no one'], 'codemeta:dateCreated': '2017-10-07T15:17:08Z', 'external_identifier': 'some-external-id', 'url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id' }, 'provider': { 'provider_name': 'hal', 'provider_type': 'deposit_client', 'provider_url': 'https://hal-test.archives-ouvertes.fr/', 'metadata': {} }, 'tool': { 'name': 'swh-deposit', 'version': '0.0.1', 'configuration': { 'sword_version': '2' } } }, 'revision': { 'synthetic': True, 'date': { 'timestamp': { 'seconds': 1507389428, 'microseconds': 0 }, 'offset': 0, 'negative_utc': False }, 'committer_date': { 'timestamp': { 'seconds': 1507389428, 'microseconds': 0 }, 'offset': 0, 'negative_utc': False }, 'author': SWH_PERSON, 'committer': SWH_PERSON, 'type': 'tar', 'message': 'hal: Deposit %s in collection hal' % deposit_id, 'metadata': { '@xmlns': ['http://www.w3.org/2005/Atom'], 'author': ['some awesome author', 'another one', 'no one'], 'codemeta:dateCreated': '2017-10-07T15:17:08Z', 'external_identifier': 'some-external-id', 'url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id' }, 'parents': [swh_id] }, 'branch_name': 'master', } self.assertEqual(data, expected_meta) def test_read_metadata_3(self): """date(Created|Published) provided, uses author/committer date """ # add metadata to the deposit with datePublished and dateCreated codemeta_entry_data = self.template_metadata % """ 2015-04-06T17:08:47+02:00 2017-05-03T16:08:47+02:00 """ deposit_id = self.create_deposit_partial_with_data_in_args( codemeta_entry_data) - - url = reverse(PRIVATE_GET_DEPOSIT_METADATA, - args=[self.collection.name, deposit_id]) - + url = self.private_deposit_url(deposit_id) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(response._headers['content-type'][1], 'application/json') data = response.json() expected_origin = { 'type': 'deposit', 'url': 'https://hal-test.archives-ouvertes.fr/hal-01243065' } expected_metadata = { '@xmlns': 'http://www.w3.org/2005/Atom', '@xmlns:codemeta': 'https://doi.org/10.5063/SCHEMA/CODEMETA-2.0', 'author': { 'email': 'hal@ccsd.cnrs.fr', 'name': 'HAL' }, 'client': 'hal', 'codemeta:applicationCategory': 'test', 'codemeta:author': { 'codemeta:name': 'Morane Gruenpeter' }, 'codemeta:dateCreated': '2015-04-06T17:08:47+02:00', 'codemeta:datePublished': '2017-05-03T16:08:47+02:00', 'codemeta:description': 'this is the description', 'codemeta:developmentStatus': 'stable', 'codemeta:keywords': 'DSP programming', 'codemeta:license': [ { 'codemeta:name': 'GNU General Public License v3.0 only' }, { 'codemeta:name': 'CeCILL Free Software License Agreement v1.1' } ], 'codemeta:programmingLanguage': [ 'php', 'python', 'C' ], 'codemeta:runtimePlatform': 'phpstorm', 'codemeta:url': 'https://hal-test.archives-ouvertes.fr/hal-01243065', # noqa 'codemeta:version': '1', 'external_identifier': 'hal-01243065', 'id': 'hal-01243065', 'title': 'Composing a Web of Audio Applications' } expected_origin_metadata = { 'metadata': expected_metadata, 'provider': { 'metadata': {}, 'provider_name': 'hal', 'provider_type': 'deposit_client', 'provider_url': 'https://hal-test.archives-ouvertes.fr/' }, 'tool': { 'configuration': { 'sword_version': '2' }, 'name': 'swh-deposit', 'version': '0.0.1' } } expected_revision = { 'author': { 'email': 'robot@softwareheritage.org', 'fullname': 'Software Heritage', 'name': 'Software Heritage' }, 'committer': { 'email': 'robot@softwareheritage.org', 'fullname': 'Software Heritage', 'name': 'Software Heritage' }, 'committer_date': { 'negative_utc': False, 'offset': 120, 'timestamp': { 'microseconds': 0, 'seconds': 1493820527 } }, 'date': { 'negative_utc': False, 'offset': 120, 'timestamp': { 'microseconds': 0, 'seconds': 1428332927 } }, 'message': 'hal: Deposit %s in collection hal' % deposit_id, 'metadata': expected_metadata, 'synthetic': True, 'type': 'tar' } expected_meta = { 'branch_name': 'master', 'origin': expected_origin, 'origin_metadata': expected_origin_metadata, 'revision': expected_revision, } self.assertEqual(data, expected_meta) def test_read_metadata_4(self): """dateCreated/datePublished not provided, revision uses complete_date """ codemeta_entry_data = self.template_metadata % '' deposit_id = self.create_deposit_partial_with_data_in_args( codemeta_entry_data) # will use the deposit completed date as fallback date deposit = Deposit.objects.get(pk=deposit_id) deposit.complete_date = '2016-04-06' deposit.save() - url = reverse(PRIVATE_GET_DEPOSIT_METADATA, - args=[self.collection.name, deposit_id]) - + url = self.private_deposit_url(deposit_id) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(response._headers['content-type'][1], 'application/json') data = response.json() expected_origin = { 'type': 'deposit', 'url': 'https://hal-test.archives-ouvertes.fr/hal-01243065' } expected_metadata = { '@xmlns': 'http://www.w3.org/2005/Atom', '@xmlns:codemeta': 'https://doi.org/10.5063/SCHEMA/CODEMETA-2.0', 'author': { 'email': 'hal@ccsd.cnrs.fr', 'name': 'HAL' }, 'client': 'hal', 'codemeta:applicationCategory': 'test', 'codemeta:author': { 'codemeta:name': 'Morane Gruenpeter' }, 'codemeta:description': 'this is the description', 'codemeta:developmentStatus': 'stable', 'codemeta:keywords': 'DSP programming', 'codemeta:license': [ { 'codemeta:name': 'GNU General Public License v3.0 only' }, { 'codemeta:name': 'CeCILL Free Software License Agreement v1.1' } ], 'codemeta:programmingLanguage': [ 'php', 'python', 'C' ], 'codemeta:runtimePlatform': 'phpstorm', 'codemeta:url': 'https://hal-test.archives-ouvertes.fr/hal-01243065', # noqa 'codemeta:version': '1', 'external_identifier': 'hal-01243065', 'id': 'hal-01243065', 'title': 'Composing a Web of Audio Applications' } expected_origin_metadata = { 'metadata': expected_metadata, 'provider': { 'metadata': {}, 'provider_name': 'hal', 'provider_type': 'deposit_client', 'provider_url': 'https://hal-test.archives-ouvertes.fr/' }, 'tool': { 'configuration': { 'sword_version': '2' }, 'name': 'swh-deposit', 'version': '0.0.1' } } expected_revision = { 'author': { 'email': 'robot@softwareheritage.org', 'fullname': 'Software Heritage', 'name': 'Software Heritage' }, 'committer': { 'email': 'robot@softwareheritage.org', 'fullname': 'Software Heritage', 'name': 'Software Heritage' }, 'committer_date': { 'negative_utc': False, 'offset': 0, 'timestamp': { 'microseconds': 0, 'seconds': 1459900800 } }, 'date': { 'negative_utc': False, 'offset': 0, 'timestamp': { 'microseconds': 0, 'seconds': 1459900800 } }, 'message': 'hal: Deposit %s in collection hal' % deposit_id, 'metadata': expected_metadata, 'synthetic': True, 'type': 'tar' } expected_meta = { 'branch_name': 'master', 'origin': expected_origin, 'origin_metadata': expected_origin_metadata, 'revision': expected_revision, } self.assertEqual(data, expected_meta) def test_read_metadata_5(self): """dateCreated/datePublished provided, revision uses author/committer date If multiple dateCreated provided, the first occurrence (of dateCreated) is selected. If multiple datePublished provided, the first occurrence (of datePublished) is selected. """ # add metadata to the deposit with multiple datePublished/dateCreated codemeta_entry_data = self.template_metadata % """ 2015-04-06T17:08:47+02:00 2017-05-03T16:08:47+02:00 2016-04-06T17:08:47+02:00 2018-05-03T16:08:47+02:00 """ deposit_id = self.create_deposit_partial_with_data_in_args( codemeta_entry_data) - - url = reverse(PRIVATE_GET_DEPOSIT_METADATA, - args=[self.collection.name, deposit_id]) - + url = self.private_deposit_url(deposit_id) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(response._headers['content-type'][1], 'application/json') data = response.json() expected_origin = { 'type': 'deposit', 'url': 'https://hal-test.archives-ouvertes.fr/hal-01243065' } expected_metadata = { '@xmlns': 'http://www.w3.org/2005/Atom', '@xmlns:codemeta': 'https://doi.org/10.5063/SCHEMA/CODEMETA-2.0', 'author': { 'email': 'hal@ccsd.cnrs.fr', 'name': 'HAL' }, 'client': 'hal', 'codemeta:applicationCategory': 'test', 'codemeta:author': { 'codemeta:name': 'Morane Gruenpeter' }, 'codemeta:dateCreated': [ '2015-04-06T17:08:47+02:00', '2016-04-06T17:08:47+02:00', ], 'codemeta:datePublished': [ '2017-05-03T16:08:47+02:00', '2018-05-03T16:08:47+02:00', ], 'codemeta:description': 'this is the description', 'codemeta:developmentStatus': 'stable', 'codemeta:keywords': 'DSP programming', 'codemeta:license': [ { 'codemeta:name': 'GNU General Public License v3.0 only' }, { 'codemeta:name': 'CeCILL Free Software License Agreement v1.1' } ], 'codemeta:programmingLanguage': [ 'php', 'python', 'C' ], 'codemeta:runtimePlatform': 'phpstorm', 'codemeta:url': 'https://hal-test.archives-ouvertes.fr/hal-01243065', # noqa 'codemeta:version': '1', 'external_identifier': 'hal-01243065', 'id': 'hal-01243065', 'title': 'Composing a Web of Audio Applications' } expected_origin_metadata = { 'metadata': expected_metadata, 'provider': { 'metadata': {}, 'provider_name': 'hal', 'provider_type': 'deposit_client', 'provider_url': 'https://hal-test.archives-ouvertes.fr/' }, 'tool': { 'configuration': { 'sword_version': '2' }, 'name': 'swh-deposit', 'version': '0.0.1' } } expected_revision = { 'author': { 'email': 'robot@softwareheritage.org', 'fullname': 'Software Heritage', 'name': 'Software Heritage' }, 'committer': { 'email': 'robot@softwareheritage.org', 'fullname': 'Software Heritage', 'name': 'Software Heritage' }, 'committer_date': { 'negative_utc': False, 'offset': 120, 'timestamp': { 'microseconds': 0, 'seconds': 1493820527 } }, 'date': { 'negative_utc': False, 'offset': 120, 'timestamp': { 'microseconds': 0, 'seconds': 1428332927 } }, 'message': 'hal: Deposit %s in collection hal' % deposit_id, 'metadata': expected_metadata, 'synthetic': True, 'type': 'tar' } expected_meta = { 'branch_name': 'master', 'origin': expected_origin, 'origin_metadata': expected_origin_metadata, 'revision': expected_revision, } self.assertEqual(data, expected_meta) def test_access_to_nonexisting_deposit_returns_404_response(self): """Read unknown collection should return a 404 response """ unknown_id = '999' - url = reverse(PRIVATE_GET_DEPOSIT_METADATA, - args=[self.collection.name, unknown_id]) - + url = self.private_deposit_url(unknown_id) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) self.assertIn('Deposit with id %s does not exist' % unknown_id, response.content.decode('utf-8')) - def test_access_to_nonexisting_collection_returns_404_response(self): - """Read unknown deposit should return a 404 response - """ - collection_name = 'non-existing' - deposit_id = self.create_deposit_partial() - url = reverse(PRIVATE_GET_DEPOSIT_METADATA, - args=[collection_name, deposit_id]) - - response = self.client.get(url) - self.assertEqual(response.status_code, - status.HTTP_404_NOT_FOUND) - self.assertIn('Unknown collection name %s' % collection_name, - response.content.decode('utf-8'),) +class DepositReadMetadataTest2(DepositReadMetadataTest): + def private_deposit_url(self, deposit_id): + return reverse(PRIVATE_GET_DEPOSIT_METADATA+'-nc', + args=[deposit_id]) diff --git a/swh/deposit/tests/api/test_deposit_update_status.py b/swh/deposit/tests/api/test_deposit_private_update_status.py similarity index 87% rename from swh/deposit/tests/api/test_deposit_update_status.py rename to swh/deposit/tests/api/test_deposit_private_update_status.py index d338ceab..7e6185f3 100644 --- a/swh/deposit/tests/api/test_deposit_update_status.py +++ b/swh/deposit/tests/api/test_deposit_private_update_status.py @@ -1,130 +1,134 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json from django.urls import reverse from rest_framework import status from rest_framework.test import APITestCase from swh.deposit.models import Deposit, DEPOSIT_STATUS_DETAIL from swh.deposit.config import PRIVATE_PUT_DEPOSIT, DEPOSIT_STATUS_VERIFIED from swh.deposit.config import DEPOSIT_STATUS_LOAD_SUCCESS from ..common import BasicTestCase class UpdateDepositStatusTest(APITestCase, BasicTestCase): """Update the deposit's status scenario """ def setUp(self): super().setUp() deposit = Deposit(status=DEPOSIT_STATUS_VERIFIED, collection=self.collection, client=self.user) deposit.save() self.deposit = Deposit.objects.get(pk=deposit.id) assert self.deposit.status == DEPOSIT_STATUS_VERIFIED + def private_deposit_url(self, deposit_id): + return reverse(PRIVATE_PUT_DEPOSIT, + args=[self.collection.name, deposit_id]) + def test_update_deposit_status(self): """Existing status for update should return a 204 response """ - url = reverse(PRIVATE_PUT_DEPOSIT, - args=[self.collection.name, self.deposit.id]) + url = self.private_deposit_url(self.deposit.id) possible_status = set(DEPOSIT_STATUS_DETAIL.keys()) - set( [DEPOSIT_STATUS_LOAD_SUCCESS]) for _status in possible_status: response = self.client.put( url, content_type='application/json', data=json.dumps({'status': _status})) self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) deposit = Deposit.objects.get(pk=self.deposit.id) self.assertEqual(deposit.status, _status) def test_update_deposit_status_with_info(self): """Existing status for update with info should return a 204 response """ - url = reverse(PRIVATE_PUT_DEPOSIT, - args=[self.collection.name, self.deposit.id]) + url = self.private_deposit_url(self.deposit.id) expected_status = DEPOSIT_STATUS_LOAD_SUCCESS origin_url = 'something' directory_id = '42a13fc721c8716ff695d0d62fc851d641f3a12b' revision_id = '47dc6b4636c7f6cba0df83e3d5490bf4334d987e' expected_swh_id = 'swh:1:dir:%s' % directory_id expected_swh_id_context = 'swh:1:dir:%s;origin=%s' % ( directory_id, origin_url) expected_swh_anchor_id = 'swh:1:rev:%s' % revision_id expected_swh_anchor_id_context = 'swh:1:rev:%s;origin=%s' % ( revision_id, origin_url) response = self.client.put( url, content_type='application/json', data=json.dumps({ 'status': expected_status, 'revision_id': revision_id, 'directory_id': directory_id, 'origin_url': origin_url, })) self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) deposit = Deposit.objects.get(pk=self.deposit.id) self.assertEqual(deposit.status, expected_status) self.assertEqual(deposit.swh_id, expected_swh_id) self.assertEqual(deposit.swh_id_context, expected_swh_id_context) self.assertEqual(deposit.swh_anchor_id, expected_swh_anchor_id) self.assertEqual(deposit.swh_anchor_id_context, expected_swh_anchor_id_context) def test_update_deposit_status_will_fail_with_unknown_status(self): """Unknown status for update should return a 400 response """ - url = reverse(PRIVATE_PUT_DEPOSIT, - args=[self.collection.name, self.deposit.id]) + url = self.private_deposit_url(self.deposit.id) response = self.client.put( url, content_type='application/json', data=json.dumps({'status': 'unknown'})) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) def test_update_deposit_status_will_fail_with_no_status_key(self): """No status provided for update should return a 400 response """ - url = reverse(PRIVATE_PUT_DEPOSIT, - args=[self.collection.name, self.deposit.id]) + url = self.private_deposit_url(self.deposit.id) response = self.client.put( url, content_type='application/json', data=json.dumps({'something': 'something'})) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) def test_update_deposit_status_success_without_swh_id_fail(self): """Providing successful status without swh_id should return a 400 """ - url = reverse(PRIVATE_PUT_DEPOSIT, - args=[self.collection.name, self.deposit.id]) + url = self.private_deposit_url(self.deposit.id) response = self.client.put( url, content_type='application/json', data=json.dumps({'status': DEPOSIT_STATUS_LOAD_SUCCESS})) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + + +class UpdateDepositStatusTest2(UpdateDepositStatusTest): + def private_deposit_url(self, deposit_id): + return reverse(PRIVATE_PUT_DEPOSIT+'-nc', args=[deposit_id]) diff --git a/version.txt b/version.txt index 3730c4b0..c23a90be 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -v0.0.73-0-ge2ef7bc \ No newline at end of file +v0.0.74-0-g12e3966 \ No newline at end of file