diff --git a/MANIFEST.in b/MANIFEST.in index 6c3de5d3..1546d7f9 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,11 +1,8 @@ include Makefile -include requirements.txt -include requirements-test.txt -include requirements-swh.txt -include requirements-server.txt -include requirements-swh-server.txt +include requirements*.txt include version.txt recursive-include swh/deposit/static * recursive-include swh/deposit/fixtures * recursive-include swh/deposit/templates * recursive-include swh/deposit/tests/*/data * +recursive-include swh py.typed diff --git a/PKG-INFO b/PKG-INFO index 7fab7fcb..18c86c1e 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,35 +1,35 @@ Metadata-Version: 2.1 Name: swh.deposit -Version: 0.0.75 +Version: 0.0.76 Summary: Software Heritage Deposit Server Home-page: https://forge.softwareheritage.org/source/swh-deposit/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-deposit Description: # swh-deposit This is [Software Heritage](https://www.softwareheritage.org)'s [SWORD 2.0](http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html) Server implementation, as well as a simple client to upload deposits on the server. **S.W.O.R.D** (**S**imple **W**eb-Service **O**ffering **R**epository **D**eposit) is an interoperability standard for digital file deposit. This implementation will permit interaction between a client (a repository) and a server (SWH repository) to permit deposits of software source code archives and associated metadata. The documentation is at ./docs/README-specification.md Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Description-Content-Type: text/markdown Provides-Extra: testing Provides-Extra: server diff --git a/requirements-test.txt b/requirements-test.txt index 91ffadc6..b5e4fdab 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,5 +1,6 @@ -pytest<4 +pytest pytest-django swh.scheduler[testing] pytest-postgresql >= 2.1.0 requests_mock +django-stubs diff --git a/swh.deposit.egg-info/PKG-INFO b/swh.deposit.egg-info/PKG-INFO index 7fab7fcb..18c86c1e 100644 --- a/swh.deposit.egg-info/PKG-INFO +++ b/swh.deposit.egg-info/PKG-INFO @@ -1,35 +1,35 @@ Metadata-Version: 2.1 Name: swh.deposit -Version: 0.0.75 +Version: 0.0.76 Summary: Software Heritage Deposit Server Home-page: https://forge.softwareheritage.org/source/swh-deposit/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-deposit Description: # swh-deposit This is [Software Heritage](https://www.softwareheritage.org)'s [SWORD 2.0](http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html) Server implementation, as well as a simple client to upload deposits on the server. **S.W.O.R.D** (**S**imple **W**eb-Service **O**ffering **R**epository **D**eposit) is an interoperability standard for digital file deposit. This implementation will permit interaction between a client (a repository) and a server (SWH repository) to permit deposits of software source code archives and associated metadata. The documentation is at ./docs/README-specification.md Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Description-Content-Type: text/markdown Provides-Extra: testing Provides-Extra: server diff --git a/swh.deposit.egg-info/SOURCES.txt b/swh.deposit.egg-info/SOURCES.txt index 23144468..5ec1f4a2 100644 --- a/swh.deposit.egg-info/SOURCES.txt +++ b/swh.deposit.egg-info/SOURCES.txt @@ -1,152 +1,153 @@ MANIFEST.in Makefile README.md requirements-server.txt requirements-swh-server.txt requirements-swh.txt requirements-test.txt requirements.txt setup.py version.txt swh/__init__.py swh.deposit.egg-info/PKG-INFO swh.deposit.egg-info/SOURCES.txt swh.deposit.egg-info/dependency_links.txt swh.deposit.egg-info/entry_points.txt swh.deposit.egg-info/requires.txt swh.deposit.egg-info/top_level.txt swh/deposit/__init__.py swh/deposit/apps.py swh/deposit/auth.py swh/deposit/config.py swh/deposit/errors.py swh/deposit/manage.py swh/deposit/models.py swh/deposit/parsers.py +swh/deposit/py.typed swh/deposit/signals.py swh/deposit/urls.py swh/deposit/utils.py swh/deposit/api/__init__.py swh/deposit/api/common.py swh/deposit/api/converters.py swh/deposit/api/deposit.py swh/deposit/api/deposit_content.py swh/deposit/api/deposit_status.py swh/deposit/api/deposit_update.py swh/deposit/api/service_document.py swh/deposit/api/urls.py swh/deposit/api/private/__init__.py swh/deposit/api/private/deposit_check.py swh/deposit/api/private/deposit_list.py swh/deposit/api/private/deposit_read.py swh/deposit/api/private/deposit_update_status.py swh/deposit/api/private/urls.py swh/deposit/cli/__init__.py swh/deposit/cli/admin.py swh/deposit/cli/client.py swh/deposit/client/__init__.py swh/deposit/fixtures/__init__.py swh/deposit/fixtures/deposit_data.yaml swh/deposit/loader/__init__.py swh/deposit/loader/checker.py swh/deposit/loader/loader.py swh/deposit/loader/tasks.py swh/deposit/migrations/0001_initial.py swh/deposit/migrations/0002_depositrequest_archive.py swh/deposit/migrations/0003_temporaryarchive.py swh/deposit/migrations/0004_delete_temporaryarchive.py swh/deposit/migrations/0005_auto_20171019_1436.py swh/deposit/migrations/0006_depositclient_url.py swh/deposit/migrations/0007_auto_20171129_1609.py swh/deposit/migrations/0008_auto_20171130_1513.py swh/deposit/migrations/0009_deposit_parent.py swh/deposit/migrations/0010_auto_20180110_0953.py swh/deposit/migrations/0011_auto_20180115_1510.py swh/deposit/migrations/0012_deposit_status_detail.py swh/deposit/migrations/0013_depositrequest_raw_metadata.py swh/deposit/migrations/0014_auto_20180720_1221.py swh/deposit/migrations/0015_depositrequest_typemigration.py swh/deposit/migrations/0016_auto_20190507_1408.py swh/deposit/migrations/0017_auto_20190925_0906.py swh/deposit/migrations/__init__.py swh/deposit/settings/__init__.py swh/deposit/settings/common.py swh/deposit/settings/development.py swh/deposit/settings/production.py swh/deposit/settings/testing.py swh/deposit/static/robots.txt swh/deposit/static/css/bootstrap-responsive.min.css swh/deposit/static/css/style.css swh/deposit/static/img/arrow-up-small.png swh/deposit/static/img/swh-logo-deposit.png swh/deposit/static/img/swh-logo-deposit.svg swh/deposit/static/img/icons/swh-logo-32x32.png swh/deposit/static/img/icons/swh-logo-deposit-180x180.png swh/deposit/static/img/icons/swh-logo-deposit-192x192.png swh/deposit/static/img/icons/swh-logo-deposit-270x270.png swh/deposit/templates/__init__.py swh/deposit/templates/homepage.html swh/deposit/templates/layout.html swh/deposit/templates/deposit/__init__.py swh/deposit/templates/deposit/content.xml swh/deposit/templates/deposit/deposit_receipt.xml swh/deposit/templates/deposit/error.xml swh/deposit/templates/deposit/service_document.xml swh/deposit/templates/deposit/status.xml swh/deposit/templates/rest_framework/api.html swh/deposit/tests/__init__.py swh/deposit/tests/common.py swh/deposit/tests/conftest.py swh/deposit/tests/test_common.py swh/deposit/tests/test_utils.py swh/deposit/tests/api/__init__.py swh/deposit/tests/api/conftest.py swh/deposit/tests/api/test_converters.py swh/deposit/tests/api/test_deposit.py swh/deposit/tests/api/test_deposit_atom.py swh/deposit/tests/api/test_deposit_binary.py swh/deposit/tests/api/test_deposit_delete.py swh/deposit/tests/api/test_deposit_list.py swh/deposit/tests/api/test_deposit_multipart.py swh/deposit/tests/api/test_deposit_private_check.py swh/deposit/tests/api/test_deposit_private_read_archive.py swh/deposit/tests/api/test_deposit_private_read_metadata.py swh/deposit/tests/api/test_deposit_private_update_status.py swh/deposit/tests/api/test_deposit_status.py swh/deposit/tests/api/test_deposit_update.py swh/deposit/tests/api/test_parser.py swh/deposit/tests/api/test_service_document.py swh/deposit/tests/api/data/atom/codemeta-sample.xml swh/deposit/tests/api/data/atom/entry-data-badly-formatted.xml swh/deposit/tests/api/data/atom/entry-data-deposit-binary.xml swh/deposit/tests/api/data/atom/entry-data-empty-body.xml swh/deposit/tests/api/data/atom/entry-data-ko.xml swh/deposit/tests/api/data/atom/entry-data-minimal.xml swh/deposit/tests/api/data/atom/entry-data-parsing-error-prone.xml swh/deposit/tests/api/data/atom/entry-data0.xml swh/deposit/tests/api/data/atom/entry-data1.xml swh/deposit/tests/api/data/atom/entry-data2.xml swh/deposit/tests/api/data/atom/entry-data3.xml swh/deposit/tests/api/data/atom/entry-update-in-place.xml swh/deposit/tests/api/data/atom/error-with-decimal.xml swh/deposit/tests/api/data/atom/metadata.xml swh/deposit/tests/api/data/atom/tei-sample.xml swh/deposit/tests/cli/__init__.py swh/deposit/tests/cli/test_client.py swh/deposit/tests/loader/__init__.py swh/deposit/tests/loader/common.py swh/deposit/tests/loader/conftest.py swh/deposit/tests/loader/test_checker.py swh/deposit/tests/loader/test_client.py swh/deposit/tests/loader/test_loader.py swh/deposit/tests/loader/test_tasks.py swh/deposit/tests/loader/data/http_example.org/hello.json swh/deposit/tests/loader/data/http_example.org/hello_you swh/deposit/tests/loader/data/https_deposit.softwareheritage.org/1_private_test_1_check swh/deposit/tests/loader/data/https_deposit.softwareheritage.org/1_private_test_2_check swh/deposit/tests/loader/data/https_deposit.softwareheritage.org/1_private_test_999_meta swh/deposit/tests/loader/data/https_deposit.softwareheritage.org/1_private_test_999_raw swh/deposit/tests/loader/data/https_deposit.softwareheritage.org/1_private_test_999_update swh/deposit/tests/loader/data/https_nowhere.org/1_private_test_1_check swh/deposit/tests/loader/data/https_nowhere.org/1_private_test_1_metadata swh/deposit/tests/loader/data/https_nowhere.org/1_private_test_1_raw \ No newline at end of file diff --git a/swh.deposit.egg-info/requires.txt b/swh.deposit.egg-info/requires.txt index 49d935de..c6857e04 100644 --- a/swh.deposit.egg-info/requires.txt +++ b/swh.deposit.egg-info/requires.txt @@ -1,29 +1,30 @@ vcversioner click xmltodict iso8601 requests swh.core>=0.0.75 [server] Django<2.0 djangorestframework swh.core[http] swh.loader.tar>=0.0.39 swh.loader.core>=0.0.43 swh.scheduler>=0.0.39 swh.model>=0.0.26 [testing] -pytest<4 +pytest pytest-django swh.scheduler[testing] pytest-postgresql>=2.1.0 requests_mock +django-stubs Django<2.0 djangorestframework swh.core[http] swh.loader.tar>=0.0.39 swh.loader.core>=0.0.43 swh.scheduler>=0.0.39 swh.model>=0.0.26 diff --git a/swh/__init__.py b/swh/__init__.py index 69e3be50..f14e1965 100644 --- a/swh/__init__.py +++ b/swh/__init__.py @@ -1 +1,4 @@ -__path__ = __import__('pkgutil').extend_path(__path__, __name__) +from pkgutil import extend_path +from typing import Iterable + +__path__ = extend_path(__path__, __name__) # type: Iterable[str] diff --git a/swh/deposit/api/common.py b/swh/deposit/api/common.py index c5cc631c..f867d5b4 100644 --- a/swh/deposit/api/common.py +++ b/swh/deposit/api/common.py @@ -1,884 +1,886 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import hashlib +from typing import Any, Tuple + from abc import ABCMeta, abstractmethod from django.urls import reverse from django.http import HttpResponse from django.shortcuts import render from django.utils import timezone from rest_framework import status from rest_framework.authentication import BasicAuthentication from rest_framework.permissions import IsAuthenticated from rest_framework.views import APIView from swh.model import hashutil from ..config import ( SWHDefaultConfig, EDIT_SE_IRI, EM_IRI, CONT_FILE_IRI, ARCHIVE_KEY, METADATA_KEY, RAW_METADATA_KEY, STATE_IRI, DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_PARTIAL, DEPOSIT_STATUS_LOAD_SUCCESS, ARCHIVE_TYPE, METADATA_TYPE ) from ..errors import ( MAX_UPLOAD_SIZE_EXCEEDED, BAD_REQUEST, ERROR_CONTENT, CHECKSUM_MISMATCH, make_error_dict, MEDIATION_NOT_ALLOWED, make_error_response_from_dict, FORBIDDEN, NOT_FOUND, make_error_response, METHOD_NOT_ALLOWED, ParserError, PARSING_ERROR ) from ..models import ( Deposit, DepositRequest, DepositCollection, DepositClient ) from ..parsers import parse_xml ACCEPT_PACKAGINGS = ['http://purl.org/net/sword/package/SimpleZip'] ACCEPT_ARCHIVE_CONTENT_TYPES = ['application/zip', 'application/x-tar'] class SWHAPIView(APIView): """Mixin intended as a based API view to enforce the basic authentication check """ - authentication_classes = (BasicAuthentication, ) + authentication_classes = (BasicAuthentication, ) # type: Tuple[Any, ...] permission_classes = (IsAuthenticated, ) class SWHBaseDeposit(SWHDefaultConfig, SWHAPIView, metaclass=ABCMeta): """Base deposit request class sharing multiple common behaviors. """ def _read_headers(self, req): """Read and unify the necessary headers from the request (those are not stored in the same location or not properly formatted). Args: req (Request): Input request Returns: Dictionary with the following keys (some associated values may be None): - content-type - content-length - in-progress - content-disposition - packaging - slug - on-behalf-of """ meta = req._request.META content_type = req.content_type content_length = meta.get('CONTENT_LENGTH') if content_length and isinstance(content_length, str): content_length = int(content_length) # final deposit if not provided in_progress = meta.get('HTTP_IN_PROGRESS', False) content_disposition = meta.get('HTTP_CONTENT_DISPOSITION') if isinstance(in_progress, str): in_progress = in_progress.lower() == 'true' content_md5sum = meta.get('HTTP_CONTENT_MD5') if content_md5sum: content_md5sum = bytes.fromhex(content_md5sum) packaging = meta.get('HTTP_PACKAGING') slug = meta.get('HTTP_SLUG') on_behalf_of = meta.get('HTTP_ON_BEHALF_OF') metadata_relevant = meta.get('HTTP_METADATA_RELEVANT') return { 'content-type': content_type, 'content-length': content_length, 'in-progress': in_progress, 'content-disposition': content_disposition, 'content-md5sum': content_md5sum, 'packaging': packaging, 'slug': slug, 'on-behalf-of': on_behalf_of, 'metadata-relevant': metadata_relevant, } def _compute_md5(self, filehandler): """Compute uploaded file's md5 sum. Args: filehandler (InMemoryUploadedFile): the file to compute the md5 hash Returns: the md5 checksum (str) """ h = hashlib.md5() for chunk in filehandler: h.update(chunk) return h.digest() def _deposit_put(self, deposit_id=None, in_progress=False, external_id=None): """Save/Update a deposit in db. Args: deposit_id (int): deposit identifier in_progress (dict): The deposit's status external_id (str): The external identifier to associate to the deposit Returns: The Deposit instance saved or updated. """ if in_progress is False: complete_date = timezone.now() status_type = DEPOSIT_STATUS_DEPOSITED else: complete_date = None status_type = DEPOSIT_STATUS_PARTIAL if not deposit_id: try: # find a deposit parent (same external id, status load # to success) deposit_parent = Deposit.objects.filter( external_id=external_id, status=DEPOSIT_STATUS_LOAD_SUCCESS).order_by('-id')[0:1].get() # noqa except Deposit.DoesNotExist: deposit_parent = None deposit = Deposit(collection=self._collection, external_id=external_id, complete_date=complete_date, status=status_type, client=self._client, parent=deposit_parent) else: deposit = Deposit.objects.get(pk=deposit_id) # update metadata deposit.complete_date = complete_date deposit.status = status_type deposit.save() return deposit def _deposit_request_put(self, deposit, deposit_request_data, replace_metadata=False, replace_archives=False): """Save a deposit request with metadata attached to a deposit. Args: deposit (Deposit): The deposit concerned by the request deposit_request_data (dict): The dictionary with at most 2 deposit request types (archive, metadata) to associate to the deposit replace_metadata (bool): Flag defining if we add or update existing metadata to the deposit replace_archives (bool): Flag defining if we add or update archives to existing deposit Returns: None """ if replace_metadata: DepositRequest.objects.filter( deposit=deposit, type=METADATA_TYPE).delete() if replace_archives: DepositRequest.objects.filter( deposit=deposit, type=ARCHIVE_TYPE).delete() deposit_request = None archive_file = deposit_request_data.get(ARCHIVE_KEY) if archive_file: deposit_request = DepositRequest( type=ARCHIVE_TYPE, deposit=deposit, archive=archive_file) deposit_request.save() metadata = deposit_request_data.get(METADATA_KEY) if metadata: raw_metadata = deposit_request_data.get(RAW_METADATA_KEY) deposit_request = DepositRequest( type=METADATA_TYPE, deposit=deposit, metadata=metadata, raw_metadata=raw_metadata) deposit_request.save() assert deposit_request is not None def _delete_archives(self, collection_name, deposit_id): """Delete archives reference from the deposit id. """ try: deposit = Deposit.objects.get(pk=deposit_id) except Deposit.DoesNotExist: return make_error_dict( NOT_FOUND, 'The deposit %s does not exist' % deposit_id) DepositRequest.objects.filter( deposit=deposit, type=ARCHIVE_TYPE).delete() return {} def _delete_deposit(self, collection_name, deposit_id): """Delete deposit reference. Args: collection_name (str): Client's name deposit_id (id): The deposit to delete Returns Empty dict when ok. Dict with error key to describe the failure. """ try: deposit = Deposit.objects.get(pk=deposit_id) except Deposit.DoesNotExist: return make_error_dict( NOT_FOUND, 'The deposit %s does not exist' % deposit_id) if deposit.collection.name != collection_name: summary = 'Cannot delete a deposit from another collection' description = "Deposit %s does not belong to the collection %s" % ( deposit_id, collection_name) return make_error_dict( BAD_REQUEST, summary=summary, verbose_description=description) DepositRequest.objects.filter(deposit=deposit).delete() deposit.delete() return {} def _check_preconditions_on(self, filehandler, md5sum, content_length=None): """Check preconditions on provided file are respected. That is the length and/or the md5sum hash match the file's content. Args: filehandler (InMemoryUploadedFile): The file to check md5sum (hex str): md5 hash expected from the file's content content_length (int): the expected length if provided. Returns: Either none if no error or a dictionary with a key error detailing the problem. """ if content_length: if content_length > self.config['max_upload_size']: return make_error_dict( MAX_UPLOAD_SIZE_EXCEEDED, 'Upload size limit exceeded (max %s bytes).' % self.config['max_upload_size'], 'Please consider sending the archive in ' 'multiple steps.') length = filehandler.size if length != content_length: return make_error_dict(status.HTTP_412_PRECONDITION_FAILED, 'Wrong length') if md5sum: _md5sum = self._compute_md5(filehandler) if _md5sum != md5sum: return make_error_dict( CHECKSUM_MISMATCH, 'Wrong md5 hash', 'The checksum sent %s and the actual checksum ' '%s does not match.' % (hashutil.hash_to_hex(md5sum), hashutil.hash_to_hex(_md5sum))) return None def _binary_upload(self, req, headers, collection_name, deposit_id=None, replace_metadata=False, replace_archives=False): """Binary upload routine. Other than such a request, a 415 response is returned. Args: req (Request): the request holding information to parse and inject in db headers (dict): request headers formatted collection_name (str): the associated client deposit_id (id): deposit identifier if provided replace_metadata (bool): 'Update or add' request to existing deposit. If False (default), this adds new metadata request to existing ones. Otherwise, this will replace existing metadata. replace_archives (bool): 'Update or add' request to existing deposit. If False (default), this adds new archive request to existing ones. Otherwise, this will replace existing archives. ones. Returns: In the optimal case a dict with the following keys: - deposit_id (int): Deposit identifier - deposit_date (date): Deposit date - archive: None (no archive is provided here) Otherwise, a dictionary with the key error and the associated failures, either: - 400 (bad request) if the request is not providing an external identifier - 413 (request entity too large) if the length of the archive exceeds the max size configured - 412 (precondition failed) if the length or md5 hash provided mismatch the reality of the archive - 415 (unsupported media type) if a wrong media type is provided """ content_length = headers['content-length'] if not content_length: return make_error_dict( BAD_REQUEST, 'CONTENT_LENGTH header is mandatory', 'For archive deposit, the ' 'CONTENT_LENGTH header must be sent.') content_disposition = headers['content-disposition'] if not content_disposition: return make_error_dict( BAD_REQUEST, 'CONTENT_DISPOSITION header is mandatory', 'For archive deposit, the ' 'CONTENT_DISPOSITION header must be sent.') packaging = headers['packaging'] if packaging and packaging not in ACCEPT_PACKAGINGS: return make_error_dict( BAD_REQUEST, 'Only packaging %s is supported' % ACCEPT_PACKAGINGS, 'The packaging provided %s is not supported' % packaging) filehandler = req.FILES['file'] precondition_status_response = self._check_preconditions_on( filehandler, headers['content-md5sum'], content_length) if precondition_status_response: return precondition_status_response external_id = headers['slug'] # actual storage of data archive_metadata = filehandler deposit = self._deposit_put(deposit_id=deposit_id, in_progress=headers['in-progress'], external_id=external_id) self._deposit_request_put( deposit, {ARCHIVE_KEY: archive_metadata}, replace_metadata=replace_metadata, replace_archives=replace_archives) return { 'deposit_id': deposit.id, 'deposit_date': deposit.reception_date, 'status': deposit.status, 'archive': filehandler.name, } def _read_metadata(self, metadata_stream): """Given a metadata stream, reads the metadata and returns both the parsed and the raw metadata. """ raw_metadata = metadata_stream.read() metadata = parse_xml(raw_metadata) return raw_metadata, metadata def _multipart_upload(self, req, headers, collection_name, deposit_id=None, replace_metadata=False, replace_archives=False): """Multipart upload supported with exactly: - 1 archive (zip) - 1 atom entry Other than such a request, a 415 response is returned. Args: req (Request): the request holding information to parse and inject in db headers (dict): request headers formatted collection_name (str): the associated client deposit_id (id): deposit identifier if provided replace_metadata (bool): 'Update or add' request to existing deposit. If False (default), this adds new metadata request to existing ones. Otherwise, this will replace existing metadata. replace_archives (bool): 'Update or add' request to existing deposit. If False (default), this adds new archive request to existing ones. Otherwise, this will replace existing archives. ones. Returns: In the optimal case a dict with the following keys: - deposit_id (int): Deposit identifier - deposit_date (date): Deposit date - archive: None (no archive is provided here) Otherwise, a dictionary with the key error and the associated failures, either: - 400 (bad request) if the request is not providing an external identifier - 412 (precondition failed) if the potentially md5 hash provided mismatch the reality of the archive - 413 (request entity too large) if the length of the archive exceeds the max size configured - 415 (unsupported media type) if a wrong media type is provided """ external_id = headers['slug'] content_types_present = set() data = { 'application/zip': None, # expected either zip 'application/x-tar': None, # or x-tar 'application/atom+xml': None, } for key, value in req.FILES.items(): fh = value if fh.content_type in content_types_present: return make_error_dict( ERROR_CONTENT, 'Only 1 application/zip (or application/x-tar) archive ' 'and 1 atom+xml entry is supported (as per sword2.0 ' 'specification)', 'You provided more than 1 application/(zip|x-tar) ' 'or more than 1 application/atom+xml content-disposition ' 'header in the multipart deposit') content_types_present.add(fh.content_type) data[fh.content_type] = fh if len(content_types_present) != 2: return make_error_dict( ERROR_CONTENT, 'You must provide both 1 application/zip (or ' 'application/x-tar) and 1 atom+xml entry for multipart ' 'deposit', 'You need to provide only 1 application/(zip|x-tar) ' 'and 1 application/atom+xml content-disposition header ' 'in the multipart deposit') filehandler = data['application/zip'] if not filehandler: filehandler = data['application/x-tar'] precondition_status_response = self._check_preconditions_on( filehandler, headers['content-md5sum']) if precondition_status_response: return precondition_status_response try: raw_metadata, metadata = self._read_metadata( data['application/atom+xml']) except ParserError: return make_error_dict( PARSING_ERROR, 'Malformed xml metadata', "The xml received is malformed. " "Please ensure your metadata file is correctly formatted.") # actual storage of data deposit = self._deposit_put(deposit_id=deposit_id, in_progress=headers['in-progress'], external_id=external_id) deposit_request_data = { ARCHIVE_KEY: filehandler, METADATA_KEY: metadata, RAW_METADATA_KEY: raw_metadata, } self._deposit_request_put( deposit, deposit_request_data, replace_metadata, replace_archives) return { 'deposit_id': deposit.id, 'deposit_date': deposit.reception_date, 'archive': filehandler.name, 'status': deposit.status, } def _atom_entry(self, req, headers, collection_name, deposit_id=None, replace_metadata=False, replace_archives=False): """Atom entry deposit. Args: req (Request): the request holding information to parse and inject in db headers (dict): request headers formatted collection_name (str): the associated client deposit_id (id): deposit identifier if provided replace_metadata (bool): 'Update or add' request to existing deposit. If False (default), this adds new metadata request to existing ones. Otherwise, this will replace existing metadata. replace_archives (bool): 'Update or add' request to existing deposit. If False (default), this adds new archive request to existing ones. Otherwise, this will replace existing archives. ones. Returns: In the optimal case a dict with the following keys: - deposit_id: deposit id associated to the deposit - deposit_date: date of the deposit - archive: None (no archive is provided here) Otherwise, a dictionary with the key error and the associated failures, either: - 400 (bad request) if the request is not providing an external identifier - 400 (bad request) if the request's body is empty - 415 (unsupported media type) if a wrong media type is provided """ try: raw_metadata, metadata = self._read_metadata(req.data) except ParserError: return make_error_dict( BAD_REQUEST, 'Malformed xml metadata', "The xml received is malformed. " "Please ensure your metadata file is correctly formatted.") if not metadata: return make_error_dict( BAD_REQUEST, 'Empty body request is not supported', 'Atom entry deposit is supposed to send for metadata. ' 'If the body is empty, there is no metadata.') external_id = metadata.get('external_identifier', headers['slug']) deposit = self._deposit_put(deposit_id=deposit_id, in_progress=headers['in-progress'], external_id=external_id) self._deposit_request_put( deposit, {METADATA_KEY: metadata, RAW_METADATA_KEY: raw_metadata}, replace_metadata, replace_archives) return { 'deposit_id': deposit.id, 'deposit_date': deposit.reception_date, 'archive': None, 'status': deposit.status, } def _empty_post(self, req, headers, collection_name, deposit_id): """Empty post to finalize an empty deposit. Args: req (Request): the request holding information to parse and inject in db headers (dict): request headers formatted collection_name (str): the associated client deposit_id (id): deposit identifier Returns: Dictionary of result with the deposit's id, the date it was completed and no archive. """ deposit = Deposit.objects.get(pk=deposit_id) deposit.complete_date = timezone.now() deposit.status = DEPOSIT_STATUS_DEPOSITED deposit.save() return { 'deposit_id': deposit_id, 'deposit_date': deposit.complete_date, 'status': deposit.status, 'archive': None, } def _make_iris(self, req, collection_name, deposit_id): """Define the IRI endpoints Args: req (Request): The initial request collection_name (str): client/collection's name deposit_id (id): Deposit identifier Returns: Dictionary of keys with the iris' urls. """ args = [collection_name, deposit_id] return { iri: req.build_absolute_uri(reverse(iri, args=args)) for iri in [EM_IRI, EDIT_SE_IRI, CONT_FILE_IRI, STATE_IRI] } def additional_checks(self, req, headers, collection_name, deposit_id=None): """Permit the child class to enrich additional checks. Returns: dict with 'error' detailing the problem. """ return {} def checks(self, req, collection_name, deposit_id=None): try: self._collection = DepositCollection.objects.get( name=collection_name) except DepositCollection.DoesNotExist: return make_error_dict( NOT_FOUND, 'Unknown collection name %s' % collection_name) username = req.user.username if username: # unauthenticated request can have the username empty try: self._client = DepositClient.objects.get(username=username) except DepositClient.DoesNotExist: return make_error_dict(NOT_FOUND, 'Unknown client name %s' % username) if self._collection.id not in self._client.collections: return make_error_dict( FORBIDDEN, 'Client %s cannot access collection %s' % ( username, collection_name)) if deposit_id: try: deposit = Deposit.objects.get(pk=deposit_id) except Deposit.DoesNotExist: return make_error_dict( NOT_FOUND, 'Deposit with id %s does not exist' % deposit_id) checks = self.restrict_access(req, deposit) if checks: return checks headers = self._read_headers(req) if headers['on-behalf-of']: return make_error_dict(MEDIATION_NOT_ALLOWED, 'Mediation is not supported.') checks = self.additional_checks(req, headers, collection_name, deposit_id) if 'error' in checks: return checks return {'headers': headers} def restrict_access(self, req, deposit=None): if deposit: if (req.method != 'GET' and deposit.status != DEPOSIT_STATUS_PARTIAL): summary = "You can only act on deposit with status '%s'" % ( DEPOSIT_STATUS_PARTIAL, ) description = "This deposit has status '%s'" % deposit.status return make_error_dict( BAD_REQUEST, summary=summary, verbose_description=description) def _basic_not_allowed_method(self, req, method): return make_error_response( req, METHOD_NOT_ALLOWED, '%s method is not supported on this endpoint' % method) def get(self, req, *args, **kwargs): return self._basic_not_allowed_method(req, 'GET') def post(self, req, *args, **kwargs): return self._basic_not_allowed_method(req, 'POST') def put(self, req, *args, **kwargs): return self._basic_not_allowed_method(req, 'PUT') def delete(self, req, *args, **kwargs): return self._basic_not_allowed_method(req, 'DELETE') class SWHGetDepositAPI(SWHBaseDeposit, metaclass=ABCMeta): """Mixin for class to support GET method. """ def get(self, req, collection_name, deposit_id, format=None): """Endpoint to create/add resources to deposit. Returns: 200 response when no error during routine occurred 400 if the deposit does not belong to the collection 404 if the deposit or the collection does not exist """ checks = self.checks(req, collection_name, deposit_id) if 'error' in checks: return make_error_response_from_dict(req, checks['error']) r = self.process_get( req, collection_name, deposit_id) if isinstance(r, tuple): status, content, content_type = r return HttpResponse(content, status=status, content_type=content_type) return r @abstractmethod def process_get(self, req, collection_name, deposit_id): """Routine to deal with the deposit's get processing. Returns: Tuple status, stream of content, content-type """ pass class SWHPostDepositAPI(SWHBaseDeposit, metaclass=ABCMeta): """Mixin for class to support DELETE method. """ def post(self, req, collection_name, deposit_id=None, format=None): """Endpoint to create/add resources to deposit. Returns: 204 response when no error during routine occurred. 400 if the deposit does not belong to the collection 404 if the deposit or the collection does not exist """ checks = self.checks(req, collection_name, deposit_id) if 'error' in checks: return make_error_response_from_dict(req, checks['error']) headers = checks['headers'] _status, _iri_key, data = self.process_post( req, headers, collection_name, deposit_id) error = data.get('error') if error: return make_error_response_from_dict(req, error) data['packagings'] = ACCEPT_PACKAGINGS iris = self._make_iris(req, collection_name, data['deposit_id']) data.update(iris) response = render(req, 'deposit/deposit_receipt.xml', context=data, content_type='application/xml', status=_status) response._headers['location'] = 'Location', data[_iri_key] return response @abstractmethod def process_post(self, req, headers, collection_name, deposit_id=None): """Routine to deal with the deposit's processing. Returns Tuple of: - response status code (200, 201, etc...) - key iri (EM_IRI, EDIT_SE_IRI, etc...) - dictionary of the processing result """ pass class SWHPutDepositAPI(SWHBaseDeposit, metaclass=ABCMeta): """Mixin for class to support PUT method. """ def put(self, req, collection_name, deposit_id, format=None): """Endpoint to update deposit resources. Returns: 204 response when no error during routine occurred. 400 if the deposit does not belong to the collection 404 if the deposit or the collection does not exist """ checks = self.checks(req, collection_name, deposit_id) if 'error' in checks: return make_error_response_from_dict(req, checks['error']) headers = checks['headers'] data = self.process_put(req, headers, collection_name, deposit_id) error = data.get('error') if error: return make_error_response_from_dict(req, error) return HttpResponse(status=status.HTTP_204_NO_CONTENT) @abstractmethod def process_put(self, req, headers, collection_name, deposit_id): """Routine to deal with updating a deposit in some way. Returns dictionary of the processing result """ pass class SWHDeleteDepositAPI(SWHBaseDeposit, metaclass=ABCMeta): """Mixin for class to support DELETE method. """ def delete(self, req, collection_name, deposit_id): """Endpoint to delete some deposit's resources (archives, deposit). Returns: 204 response when no error during routine occurred. 400 if the deposit does not belong to the collection 404 if the deposit or the collection does not exist """ checks = self.checks(req, collection_name, deposit_id) if 'error' in checks: return make_error_response_from_dict(req, checks['error']) data = self.process_delete(req, collection_name, deposit_id) error = data.get('error') if error: return make_error_response_from_dict(req, error) return HttpResponse(status=status.HTTP_204_NO_CONTENT) @abstractmethod def process_delete(self, req, collection_name, deposit_id): """Routine to delete a resource. This is mostly not allowed except for the EM_IRI (cf. .api.deposit_update.SWHUpdateArchiveDeposit) """ pass diff --git a/swh/deposit/api/private/__init__.py b/swh/deposit/api/private/__init__.py index d257572c..4ed1154d 100644 --- a/swh/deposit/api/private/__init__.py +++ b/swh/deposit/api/private/__init__.py @@ -1,92 +1,94 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.deposit import utils from ...config import METADATA_TYPE from ...models import DepositRequest, Deposit from rest_framework.permissions import AllowAny from swh.deposit.api.common import SWHAPIView from swh.deposit.errors import make_error_dict, NOT_FOUND class DepositReadMixin: """Deposit Read mixin """ def _deposit_requests(self, deposit, request_type): """Given a deposit, yields its associated deposit_request Args: deposit (Deposit): Deposit to list requests for request_type (str): 'archive' or 'metadata' Yields: deposit requests of type request_type associated to the deposit """ if isinstance(deposit, int): deposit = Deposit.objects.get(pk=deposit) deposit_requests = DepositRequest.objects.filter( type=request_type, deposit=deposit).order_by('id') for deposit_request in deposit_requests: yield deposit_request def _metadata_get(self, deposit): """Given a deposit, aggregate all metadata requests. Args: deposit (Deposit): The deposit instance to extract metadata from. Returns: metadata dict from the deposit. """ metadata = (m.metadata for m in self._deposit_requests( deposit, request_type=METADATA_TYPE)) return utils.merge(*metadata) class SWHPrivateAPIView(SWHAPIView): """Mixin intended as private api (so no authentication) based API view (for the private ones). """ authentication_classes = () permission_classes = (AllowAny, ) def checks(self, req, collection_name, deposit_id=None): """Override default checks implementation to allow empty collection. """ if deposit_id: try: Deposit.objects.get(pk=deposit_id) except Deposit.DoesNotExist: return make_error_dict( NOT_FOUND, 'Deposit with id %s does not exist' % deposit_id) headers = self._read_headers(req) checks = self.additional_checks( req, headers, collection_name, deposit_id) if 'error' in checks: return checks return {'headers': headers} - def get(self, req, collection_name=None, deposit_id=None, format=None): + def get(self, req, collection_name=None, deposit_id=None, format=None, + *args, **kwargs): return super().get(req, collection_name, deposit_id, format) - def put(self, req, collection_name=None, deposit_id=None, format=None): + def put(self, req, collection_name=None, deposit_id=None, format=None, + *args, **kwargs): return super().put(req, collection_name, deposit_id, format) diff --git a/swh/deposit/config.py b/swh/deposit/config.py index eab7deea..b7a1b6a0 100644 --- a/swh/deposit/config.py +++ b/swh/deposit/config.py @@ -1,109 +1,111 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os import logging +from typing import Any, Dict, Tuple + from swh.core.config import SWHConfig from swh.scheduler import get_scheduler # IRIs (Internationalized Resource identifier) sword 2.0 specified EDIT_SE_IRI = 'edit_se_iri' EM_IRI = 'em_iri' CONT_FILE_IRI = 'cont_file_iri' SD_IRI = 'servicedocument' COL_IRI = 'upload' STATE_IRI = 'state_iri' PRIVATE_GET_RAW_CONTENT = 'private-download' PRIVATE_CHECK_DEPOSIT = 'check-deposit' PRIVATE_PUT_DEPOSIT = 'private-update' PRIVATE_GET_DEPOSIT_METADATA = 'private-read' PRIVATE_LIST_DEPOSITS = 'private-deposit-list' ARCHIVE_KEY = 'archive' METADATA_KEY = 'metadata' RAW_METADATA_KEY = 'raw-metadata' ARCHIVE_TYPE = 'archive' METADATA_TYPE = 'metadata' AUTHORIZED_PLATFORMS = ['development', 'production', 'testing'] DEPOSIT_STATUS_REJECTED = 'rejected' DEPOSIT_STATUS_PARTIAL = 'partial' DEPOSIT_STATUS_DEPOSITED = 'deposited' DEPOSIT_STATUS_VERIFIED = 'verified' DEPOSIT_STATUS_LOAD_SUCCESS = 'done' DEPOSIT_STATUS_LOAD_FAILURE = 'failed' # Revision author for deposit SWH_PERSON = { 'name': 'Software Heritage', 'fullname': 'Software Heritage', 'email': 'robot@softwareheritage.org' } def setup_django_for(platform=None, config_file=None): """Setup function for command line tools (swh.deposit.create_user) to initialize the needed db access. Note: Do not import any django related module prior to this function call. Otherwise, this will raise an django.core.exceptions.ImproperlyConfigured error message. Args: platform (str): the platform the scheduling is running config_file (str): Extra configuration file (typically for the production platform) Raises: ValueError in case of wrong platform inputs. """ if platform is not None: if platform not in AUTHORIZED_PLATFORMS: raise ValueError('Platform should be one of %s' % AUTHORIZED_PLATFORMS) if 'DJANGO_SETTINGS_MODULE' not in os.environ: os.environ['DJANGO_SETTINGS_MODULE'] = ( 'swh.deposit.settings.%s' % platform) if config_file: os.environ.setdefault('SWH_CONFIG_FILENAME', config_file) import django django.setup() class SWHDefaultConfig(SWHConfig): """Mixin intended to enrich views with SWH configuration. """ CONFIG_BASE_FILENAME = 'deposit/server' DEFAULT_CONFIG = { 'max_upload_size': ('int', 209715200), 'checks': ('bool', True), 'scheduler': ('dict', { 'cls': 'remote', 'args': { 'url': 'http://localhost:5008/' } }) } - ADDITIONAL_CONFIG = {} + ADDITIONAL_CONFIG = {} # type: Dict[str, Tuple[str, Any]] def __init__(self, **config): super().__init__() self.config = self.parse_config_file( additional_configs=[self.ADDITIONAL_CONFIG]) self.config.update(config) self.log = logging.getLogger('swh.deposit') if self.config['checks']: self.scheduler = get_scheduler(**self.config['scheduler']) diff --git a/swh/deposit/migrations/0001_initial.py b/swh/deposit/migrations/0001_initial.py index 9e4a59c0..1d49e299 100644 --- a/swh/deposit/migrations/0001_initial.py +++ b/swh/deposit/migrations/0001_initial.py @@ -1,109 +1,138 @@ # -*- coding: utf-8 -*- # Generated by Django 1.10.7 on 2017-09-24 10:03 from __future__ import unicode_literals from django.conf import settings import django.contrib.auth.models import django.contrib.postgres.fields import django.contrib.postgres.fields.jsonb from django.db import migrations, models import django.db.models.deletion import django.utils.timezone class Migration(migrations.Migration): initial = True dependencies = [ ('auth', '0008_alter_user_username_max_length'), ] operations = [ migrations.CreateModel( name='Dbversion', fields=[ - ('version', models.IntegerField(primary_key=True, serialize=False)), - ('release', models.DateTimeField(default=django.utils.timezone.now, null=True)), + ('version', models.IntegerField( + primary_key=True, serialize=False)), + ('release', models.DateTimeField( + default=django.utils.timezone.now, null=True)), ('description', models.TextField(blank=True, null=True)), ], options={ 'db_table': 'dbversion', }, ), migrations.CreateModel( name='Deposit', fields=[ ('id', models.BigAutoField(primary_key=True, serialize=False)), ('reception_date', models.DateTimeField(auto_now_add=True)), ('complete_date', models.DateTimeField(null=True)), ('external_id', models.TextField()), ('swh_id', models.TextField(blank=True, null=True)), - ('status', models.TextField(choices=[('partial', 'partial'), ('expired', 'expired'), ('ready', 'ready'), ('injecting', 'injecting'), ('success', 'success'), ('failure', 'failure')], default='partial')), + ('status', models.TextField( + choices=[('partial', 'partial'), + ('expired', 'expired'), + ('ready', 'ready'), + ('injecting', 'injecting'), + ('success', 'success'), + ('failure', 'failure')], + default='partial')), ], options={ 'db_table': 'deposit', }, ), migrations.CreateModel( name='DepositClient', fields=[ - ('user_ptr', models.OneToOneField(auto_created=True, on_delete=django.db.models.deletion.CASCADE, parent_link=True, primary_key=True, serialize=False, to=settings.AUTH_USER_MODEL)), - ('collections', django.contrib.postgres.fields.ArrayField(base_field=models.IntegerField(), null=True, size=None)), + ('user_ptr', models.OneToOneField( + auto_created=True, + on_delete=django.db.models.deletion.CASCADE, + parent_link=True, + primary_key=True, + serialize=False, + to=settings.AUTH_USER_MODEL)), + ('collections', + django.contrib.postgres.fields.ArrayField( + base_field=models.IntegerField(), + null=True, + size=None)), ], options={ 'db_table': 'deposit_client', }, bases=('auth.user',), managers=[ ('objects', django.contrib.auth.models.UserManager()), ], ), migrations.CreateModel( name='DepositCollection', fields=[ ('id', models.BigAutoField(primary_key=True, serialize=False)), ('name', models.TextField()), ], options={ 'db_table': 'deposit_collection', }, ), migrations.CreateModel( name='DepositRequest', fields=[ ('id', models.BigAutoField(primary_key=True, serialize=False)), ('date', models.DateTimeField(auto_now_add=True)), - ('metadata', django.contrib.postgres.fields.jsonb.JSONField(null=True)), - ('deposit', models.ForeignKey(on_delete=django.db.models.deletion.DO_NOTHING, to='deposit.Deposit')), + ('metadata', + django.contrib.postgres.fields.jsonb.JSONField(null=True)), + ('deposit', + models.ForeignKey( + on_delete=django.db.models.deletion.DO_NOTHING, + to='deposit.Deposit')), ], options={ 'db_table': 'deposit_request', }, ), migrations.CreateModel( name='DepositRequestType', fields=[ ('id', models.BigAutoField(primary_key=True, serialize=False)), ('name', models.TextField()), ], options={ 'db_table': 'deposit_request_type', }, ), migrations.AddField( model_name='depositrequest', name='type', - field=models.ForeignKey(on_delete=django.db.models.deletion.DO_NOTHING, to='deposit.DepositRequestType'), + field=models.ForeignKey( + on_delete=django.db.models.deletion.DO_NOTHING, + to='deposit.DepositRequestType'), ), migrations.AddField( model_name='deposit', name='client', - field=models.ForeignKey(on_delete=django.db.models.deletion.DO_NOTHING, to='deposit.DepositClient'), + field=models.ForeignKey( + on_delete=django.db.models.deletion.DO_NOTHING, + to='deposit.DepositClient'), ), migrations.AddField( model_name='deposit', name='collection', - field=models.ForeignKey(on_delete=django.db.models.deletion.DO_NOTHING, to='deposit.DepositCollection'), + field=models.ForeignKey( + on_delete=django.db.models.deletion.DO_NOTHING, + to='deposit.DepositCollection'), ), ] diff --git a/swh/deposit/migrations/0002_depositrequest_archive.py b/swh/deposit/migrations/0002_depositrequest_archive.py index d99eb60d..ad78ba4f 100644 --- a/swh/deposit/migrations/0002_depositrequest_archive.py +++ b/swh/deposit/migrations/0002_depositrequest_archive.py @@ -1,21 +1,23 @@ # -*- coding: utf-8 -*- # Generated by Django 1.10.7 on 2017-10-05 10:36 from __future__ import unicode_literals from django.db import migrations, models import swh.deposit.models class Migration(migrations.Migration): dependencies = [ ('deposit', '0001_initial'), ] operations = [ migrations.AddField( model_name='depositrequest', name='archive', - field=models.FileField(null=True, upload_to=swh.deposit.models.client_directory_path), + field=models.FileField( + null=True, + upload_to=swh.deposit.models.client_directory_path), ), ] diff --git a/swh/deposit/migrations/0005_auto_20171019_1436.py b/swh/deposit/migrations/0005_auto_20171019_1436.py index 17195ad2..75e9901a 100644 --- a/swh/deposit/migrations/0005_auto_20171019_1436.py +++ b/swh/deposit/migrations/0005_auto_20171019_1436.py @@ -1,20 +1,29 @@ # -*- coding: utf-8 -*- # Generated by Django 1.10.7 on 2017-10-19 14:36 from __future__ import unicode_literals from django.db import migrations, models class Migration(migrations.Migration): dependencies = [ ('deposit', '0004_delete_temporaryarchive'), ] operations = [ migrations.AlterField( model_name='deposit', name='status', - field=models.TextField(choices=[('partial', 'partial'), ('expired', 'expired'), ('ready-for-checks', 'ready-for-checks'), ('ready', 'ready'), ('rejected', 'rejected'), ('injecting', 'injecting'), ('success', 'success'), ('failure', 'failure')], default='partial'), + field=models.TextField( + choices=[('partial', 'partial'), + ('expired', 'expired'), + ('ready-for-checks', 'ready-for-checks'), + ('ready', 'ready'), + ('rejected', 'rejected'), + ('injecting', 'injecting'), + ('success', 'success'), + ('failure', 'failure')], + default='partial'), ), ] diff --git a/swh/deposit/migrations/0006_depositclient_url.py b/swh/deposit/migrations/0006_depositclient_url.py index df96866f..dedb4155 100644 --- a/swh/deposit/migrations/0006_depositclient_url.py +++ b/swh/deposit/migrations/0006_depositclient_url.py @@ -1,21 +1,22 @@ # -*- coding: utf-8 -*- # Generated by Django 1.10.7 on 2017-11-07 13:12 from __future__ import unicode_literals from django.db import migrations, models class Migration(migrations.Migration): dependencies = [ ('deposit', '0005_auto_20171019_1436'), ] operations = [ migrations.AddField( model_name='depositclient', name='url', - field=models.TextField(default='https://hal.archives-ouvertes.fr/'), + field=models.TextField( + default='https://hal.archives-ouvertes.fr/'), preserve_default=False, ), ] diff --git a/swh/deposit/migrations/0008_auto_20171130_1513.py b/swh/deposit/migrations/0008_auto_20171130_1513.py index 20e5afba..e8da74c2 100644 --- a/swh/deposit/migrations/0008_auto_20171130_1513.py +++ b/swh/deposit/migrations/0008_auto_20171130_1513.py @@ -1,20 +1,29 @@ # -*- coding: utf-8 -*- # Generated by Django 1.10.7 on 2017-11-30 15:13 from __future__ import unicode_literals from django.db import migrations, models class Migration(migrations.Migration): dependencies = [ ('deposit', '0007_auto_20171129_1609'), ] operations = [ migrations.AlterField( model_name='deposit', name='status', - field=models.TextField(choices=[('partial', 'partial'), ('expired', 'expired'), ('ready-for-checks', 'ready-for-checks'), ('ready-for-load', 'ready-for-load'), ('rejected', 'rejected'), ('loading', 'loading'), ('success', 'success'), ('failure', 'failure')], default='partial'), + field=models.TextField( + choices=[('partial', 'partial'), + ('expired', 'expired'), + ('ready-for-checks', 'ready-for-checks'), + ('ready-for-load', 'ready-for-load'), + ('rejected', 'rejected'), + ('loading', 'loading'), + ('success', 'success'), + ('failure', 'failure')], + default='partial'), ), ] diff --git a/swh/deposit/migrations/0009_deposit_parent.py b/swh/deposit/migrations/0009_deposit_parent.py index 9cab9fe9..e2fb56a1 100644 --- a/swh/deposit/migrations/0009_deposit_parent.py +++ b/swh/deposit/migrations/0009_deposit_parent.py @@ -1,21 +1,24 @@ # -*- coding: utf-8 -*- # Generated by Django 1.10.7 on 2017-12-04 12:49 from __future__ import unicode_literals from django.db import migrations, models import django.db.models.deletion class Migration(migrations.Migration): dependencies = [ ('deposit', '0008_auto_20171130_1513'), ] operations = [ migrations.AddField( model_name='deposit', name='parent', - field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.CASCADE, to='deposit.Deposit'), + field=models.ForeignKey( + null=True, + on_delete=django.db.models.deletion.CASCADE, + to='deposit.Deposit'), ), ] diff --git a/swh/deposit/migrations/0011_auto_20180115_1510.py b/swh/deposit/migrations/0011_auto_20180115_1510.py index 1265ba2e..3c1645cb 100644 --- a/swh/deposit/migrations/0011_auto_20180115_1510.py +++ b/swh/deposit/migrations/0011_auto_20180115_1510.py @@ -1,20 +1,29 @@ # -*- coding: utf-8 -*- # Generated by Django 1.10.7 on 2018-01-15 15:10 from __future__ import unicode_literals from django.db import migrations, models class Migration(migrations.Migration): dependencies = [ ('deposit', '0010_auto_20180110_0953'), ] operations = [ migrations.AlterField( model_name='deposit', name='status', - field=models.TextField(choices=[('partial', 'partial'), ('expired', 'expired'), ('deposited', 'deposited'), ('verified', 'verified'), ('rejected', 'rejected'), ('loading', 'loading'), ('done', 'done'), ('failed', 'failed')], default='partial'), + field=models.TextField( + choices=[('partial', 'partial'), + ('expired', 'expired'), + ('deposited', 'deposited'), + ('verified', 'verified'), + ('rejected', 'rejected'), + ('loading', 'loading'), + ('done', 'done'), + ('failed', 'failed')], + default='partial'), ), ] diff --git a/swh/deposit/migrations/0015_depositrequest_typemigration.py b/swh/deposit/migrations/0015_depositrequest_typemigration.py index 046c84dd..61942737 100644 --- a/swh/deposit/migrations/0015_depositrequest_typemigration.py +++ b/swh/deposit/migrations/0015_depositrequest_typemigration.py @@ -1,43 +1,46 @@ # -*- coding: utf-8 -*- # Generated by Django 1.11.18 on 2019-04-12 16:40 from __future__ import unicode_literals from django.db import migrations, models def populate_deposit_type2(apps, schema_editor): # We can't import the DepositRequest model directly as it may be a newer # version than this migration expects. We use the historical version. DepositRequest = apps.get_model('deposit', 'DepositRequest') for deposit in DepositRequest.objects.all(): deposit.type2 = deposit.type.name deposit.save() class Migration(migrations.Migration): dependencies = [ ('deposit', '0014_auto_20180720_1221'), ] operations = [ migrations.AddField( model_name='depositrequest', name='type2', - field=models.CharField(choices=[('archive', 'archive'), ('metadata', 'metadata')], max_length=8, null=True), + field=models.CharField( + choices=[('archive', 'archive'), + ('metadata', 'metadata')], + max_length=8, null=True), ), migrations.RunPython(populate_deposit_type2), migrations.RemoveField( model_name='depositrequest', name='type', ), migrations.RenameField( model_name='depositrequest', old_name='type2', new_name='type', ), migrations.DeleteModel( name='DepositRequestType', ), ] diff --git a/swh/deposit/migrations/0016_auto_20190507_1408.py b/swh/deposit/migrations/0016_auto_20190507_1408.py index 40c50d4c..15c578ef 100644 --- a/swh/deposit/migrations/0016_auto_20190507_1408.py +++ b/swh/deposit/migrations/0016_auto_20190507_1408.py @@ -1,25 +1,31 @@ # -*- coding: utf-8 -*- # Generated by Django 1.11.18 on 2019-05-07 14:08 from __future__ import unicode_literals from django.db import migrations, models class Migration(migrations.Migration): dependencies = [ ('deposit', '0015_depositrequest_typemigration'), ] operations = [ migrations.AddField( model_name='deposit', name='check_task_id', - field=models.TextField(blank=True, null=True, verbose_name="Scheduler's associated checking task id"), + field=models.TextField( + blank=True, + null=True, + verbose_name="Scheduler's associated checking task id"), ), migrations.AddField( model_name='deposit', name='load_task_id', - field=models.TextField(blank=True, null=True, verbose_name="Scheduler's associated loading task id"), + field=models.TextField( + blank=True, + null=True, + verbose_name="Scheduler's associated loading task id"), ), ] diff --git a/swh/deposit/migrations/0017_auto_20190925_0906.py b/swh/deposit/migrations/0017_auto_20190925_0906.py index 15235bf8..739cbc78 100644 --- a/swh/deposit/migrations/0017_auto_20190925_0906.py +++ b/swh/deposit/migrations/0017_auto_20190925_0906.py @@ -1,21 +1,24 @@ # -*- coding: utf-8 -*- # Generated by Django 1.11.23 on 2019-09-25 09:06 from __future__ import unicode_literals from django.db import migrations, models import django.db.models.deletion class Migration(migrations.Migration): dependencies = [ ('deposit', '0016_auto_20190507_1408'), ] operations = [ migrations.AlterField( model_name='deposit', name='parent', - field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.PROTECT, to='deposit.Deposit'), + field=models.ForeignKey( + null=True, + on_delete=django.db.models.deletion.PROTECT, + to='deposit.Deposit'), ), ] diff --git a/swh/deposit/models.py b/swh/deposit/models.py index 3ff3bdd0..e7c5440d 100644 --- a/swh/deposit/models.py +++ b/swh/deposit/models.py @@ -1,220 +1,223 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information # Generated from: # cd swh_deposit && \ # python3 -m manage inspectdb from django.contrib.postgres.fields import JSONField, ArrayField from django.contrib.auth.models import User, UserManager from django.db import models from django.utils.timezone import now from .config import ( DEPOSIT_STATUS_VERIFIED, DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_PARTIAL, DEPOSIT_STATUS_LOAD_SUCCESS, DEPOSIT_STATUS_LOAD_FAILURE, DEPOSIT_STATUS_REJECTED, ARCHIVE_TYPE, METADATA_TYPE ) class Dbversion(models.Model): """Db version """ version = models.IntegerField(primary_key=True) release = models.DateTimeField(default=now, null=True) description = models.TextField(blank=True, null=True) class Meta: db_table = 'dbversion' def __str__(self): return str({ 'version': self.version, 'release': self.release, 'description': self.description }) """Possible status""" DEPOSIT_STATUS = [ (DEPOSIT_STATUS_PARTIAL, DEPOSIT_STATUS_PARTIAL), ('expired', 'expired'), (DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_DEPOSITED), (DEPOSIT_STATUS_VERIFIED, DEPOSIT_STATUS_VERIFIED), (DEPOSIT_STATUS_REJECTED, DEPOSIT_STATUS_REJECTED), ('loading', 'loading'), (DEPOSIT_STATUS_LOAD_SUCCESS, DEPOSIT_STATUS_LOAD_SUCCESS), (DEPOSIT_STATUS_LOAD_FAILURE, DEPOSIT_STATUS_LOAD_FAILURE), ] """Possible status and the detailed meaning.""" DEPOSIT_STATUS_DETAIL = { DEPOSIT_STATUS_PARTIAL: 'Deposit is partially received. To finalize it, ' 'In-Progress header should be false', 'expired': 'Deposit has been there too long and is now ' 'deemed ready to be garbage collected', DEPOSIT_STATUS_DEPOSITED: 'Deposit is ready for additional checks ' '(tarball ok, metadata, etc...)', DEPOSIT_STATUS_VERIFIED: 'Deposit is fully received, checked, and ' 'ready for loading', DEPOSIT_STATUS_REJECTED: 'Deposit failed the checks', 'loading': "Loading is ongoing on swh's side", DEPOSIT_STATUS_LOAD_SUCCESS: 'The deposit has been successfully ' 'loaded into the Software Heritage archive', DEPOSIT_STATUS_LOAD_FAILURE: 'The deposit loading into the ' 'Software Heritage archive failed', } class DepositClient(User): """Deposit client """ collections = ArrayField(models.IntegerField(), null=True) - objects = UserManager() + objects = UserManager() # type: ignore + # this typing hint is due to a mypy/django-stubs limitation, + # see https://github.com/typeddjango/django-stubs/issues/174 + provider_url = models.TextField(null=False) domain = models.TextField(null=False) class Meta: db_table = 'deposit_client' def __str__(self): return str({ 'id': self.id, 'collections': self.collections, 'username': super().username, 'domain': self.domain, 'provider_url': self.provider_url, }) class Deposit(models.Model): """Deposit reception table """ id = models.BigAutoField(primary_key=True) # First deposit reception date reception_date = models.DateTimeField(auto_now_add=True) # Date when the deposit is deemed complete and ready for loading complete_date = models.DateTimeField(null=True) # collection concerned by the deposit collection = models.ForeignKey( 'DepositCollection', models.DO_NOTHING) # Deposit's external identifier external_id = models.TextField() # Deposit client client = models.ForeignKey('DepositClient', models.DO_NOTHING) # SWH's loading result identifier swh_id = models.TextField(blank=True, null=True) swh_id_context = models.TextField(blank=True, null=True) swh_anchor_id = models.TextField(blank=True, null=True) swh_anchor_id_context = models.TextField(blank=True, null=True) # Deposit's status regarding loading status = models.TextField( choices=DEPOSIT_STATUS, default=DEPOSIT_STATUS_PARTIAL) status_detail = JSONField(null=True) # deposit can have one parent parent = models.ForeignKey('self', on_delete=models.PROTECT, null=True) check_task_id = models.TextField( blank=True, null=True, verbose_name="Scheduler's associated checking task id" ) load_task_id = models.TextField( blank=True, null=True, verbose_name="Scheduler's associated loading task id" ) class Meta: db_table = 'deposit' def __str__(self): d = { 'id': self.id, 'reception_date': self.reception_date, 'collection': self.collection.name, 'external_id': self.external_id, 'client': self.client.username, 'status': self.status, } if self.status in (DEPOSIT_STATUS_REJECTED): d['status_detail'] = self.status_detail return str(d) def client_directory_path(instance, filename): """Callable to upload archive in MEDIA_ROOT/user_/ Args: instance (DepositRequest): DepositRequest concerned by the upload filename (str): Filename of the uploaded file Returns: A path to be prefixed by the MEDIA_ROOT to access physically to the file uploaded. """ return 'client_{0}/{1}'.format(instance.deposit.client.id, filename) REQUEST_TYPES = [(ARCHIVE_TYPE, ARCHIVE_TYPE), (METADATA_TYPE, METADATA_TYPE)] class DepositRequest(models.Model): """Deposit request associated to one deposit. """ id = models.BigAutoField(primary_key=True) # Deposit concerned by the request deposit = models.ForeignKey(Deposit, models.DO_NOTHING) date = models.DateTimeField(auto_now_add=True) # Deposit request information on the data to inject # this can be null when type is 'archive' metadata = JSONField(null=True) raw_metadata = models.TextField(null=True) # this can be null when type is 'metadata' archive = models.FileField(null=True, upload_to=client_directory_path) type = models.CharField(max_length=8, choices=REQUEST_TYPES, null=True) class Meta: db_table = 'deposit_request' def __str__(self): meta = None if self.metadata: from json import dumps meta = dumps(self.metadata) archive_name = None if self.archive: archive_name = self.archive.name return str({ 'id': self.id, 'deposit': self.deposit, 'metadata': meta, 'archive': archive_name }) class DepositCollection(models.Model): id = models.BigAutoField(primary_key=True) # Human readable name for the collection type e.g HAL, arXiv, etc... name = models.TextField() class Meta: db_table = 'deposit_collection' def __str__(self): return str({'id': self.id, 'name': self.name}) diff --git a/swh/deposit/py.typed b/swh/deposit/py.typed new file mode 100644 index 00000000..1242d432 --- /dev/null +++ b/swh/deposit/py.typed @@ -0,0 +1 @@ +# Marker file for PEP 561. diff --git a/swh/deposit/tests/__init__.py b/swh/deposit/tests/__init__.py index af3302c5..f8ec3db7 100644 --- a/swh/deposit/tests/__init__.py +++ b/swh/deposit/tests/__init__.py @@ -1,73 +1,73 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.deposit.config import setup_django_for from swh.deposit.config import SWHDefaultConfig # noqa from swh.loader.core.loader import BufferedLoader TEST_CONFIG = { 'max_upload_size': 500, 'extraction_dir': '/tmp/swh-deposit/test/extraction-dir', 'checks': False, 'provider': { 'provider_name': '', 'provider_type': 'deposit_client', 'provider_url': '', 'metadata': { } }, 'tool': { 'name': 'swh-deposit', 'version': '0.0.1', 'configuration': { 'sword_version': '2' } } } def parse_deposit_config_file(base_filename=None, config_filename=None, additional_configs=None, global_config=True): return TEST_CONFIG TEST_LOADER_CONFIG = { 'extraction_dir': '/tmp/swh-loader-tar/test/', 'working_dir': '/tmp/swh-loader-tar/test/working-dir', 'debug': False, 'storage': { 'cls': 'memory', 'args': { } }, 'send_contents': True, 'send_directories': True, 'send_revisions': True, 'send_releases': True, 'send_snapshot': True, 'content_size_limit': 100 * 1024 * 1024, 'content_packet_size': 10, 'content_packet_size_bytes': 100 * 1024 * 1024, 'directory_packet_size': 10, 'revision_packet_size': 10, 'release_packet_size': 10, } def parse_loader_config_file(base_filename=None, config_filename=None, additional_configs=None, global_config=True): return TEST_LOADER_CONFIG # monkey patch classes method permits to override, for tests purposes, # the default configuration without side-effect, i.e do not load the # configuration from disk -SWHDefaultConfig.parse_config_file = parse_deposit_config_file -BufferedLoader.parse_config_file = parse_loader_config_file +SWHDefaultConfig.parse_config_file = parse_deposit_config_file # type: ignore +BufferedLoader.parse_config_file = parse_loader_config_file # type: ignore setup_django_for('testing') diff --git a/swh/deposit/tests/api/test_deposit_delete.py b/swh/deposit/tests/api/test_deposit_delete.py index ff6e066c..27cec1b3 100644 --- a/swh/deposit/tests/api/test_deposit_delete.py +++ b/swh/deposit/tests/api/test_deposit_delete.py @@ -1,121 +1,121 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from collections import defaultdict from django.urls import reverse from rest_framework import status -from typing import Mapping +from typing import Dict, Mapping from swh.deposit.config import ( EDIT_SE_IRI, EM_IRI, ARCHIVE_KEY, METADATA_KEY, DEPOSIT_STATUS_DEPOSITED ) from swh.deposit.models import Deposit, DepositRequest def count_deposit_request_types(deposit_requests) -> Mapping[str, int]: - deposit_request_types = defaultdict(int) + deposit_request_types = defaultdict(int) # type: Dict[str, int] for dr in deposit_requests: deposit_request_types[dr.type] += 1 return deposit_request_types def test_delete_archive_on_partial_deposit_works( authenticated_client, partial_deposit_with_metadata, deposit_collection): """Removing partial deposit's archive should return a 204 response """ deposit_id = partial_deposit_with_metadata.id deposit = Deposit.objects.get(pk=deposit_id) deposit_requests = DepositRequest.objects.filter(deposit=deposit) # deposit request type: 'archive', 1 'metadata' deposit_request_types = count_deposit_request_types(deposit_requests) assert deposit_request_types == { ARCHIVE_KEY: 1, METADATA_KEY: 1 } # when update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit_id]) response = authenticated_client.delete(update_uri) # then assert response.status_code == status.HTTP_204_NO_CONTENT deposit = Deposit.objects.get(pk=deposit_id) deposit_requests2 = DepositRequest.objects.filter(deposit=deposit) deposit_request_types = count_deposit_request_types(deposit_requests2) assert deposit_request_types == { METADATA_KEY: 1 } def test_delete_archive_on_undefined_deposit_fails( authenticated_client, deposit_collection, sample_archive): """Delete undefined deposit returns a 404 response """ # when update_uri = reverse(EM_IRI, args=[deposit_collection.name, 999]) response = authenticated_client.delete(update_uri) # then assert response.status_code == status.HTTP_404_NOT_FOUND def test_delete_non_partial_deposit( authenticated_client, deposit_collection, deposited_deposit): """Delete !partial status deposit should return a 400 response """ deposit = deposited_deposit assert deposit.status == DEPOSIT_STATUS_DEPOSITED # when update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit.id]) response = authenticated_client.delete(update_uri) # then assert response.status_code == status.HTTP_400_BAD_REQUEST deposit = Deposit.objects.get(pk=deposit.id) assert deposit is not None def test_delete_partial_deposit( authenticated_client, deposit_collection, partial_deposit): """Delete deposit should return a 204 response """ # given deposit = partial_deposit # when url = reverse(EDIT_SE_IRI, args=[deposit_collection.name, deposit.id]) response = authenticated_client.delete(url) # then assert response.status_code == status.HTTP_204_NO_CONTENT deposit_requests = list(DepositRequest.objects.filter(deposit=deposit)) assert deposit_requests == [] deposits = list(Deposit.objects.filter(pk=deposit.id)) assert deposits == [] def test_delete_on_edit_se_iri_cannot_delete_non_partial_deposit( authenticated_client, deposit_collection, complete_deposit): """Delete !partial deposit should return a 400 response """ # given deposit = complete_deposit # when url = reverse(EDIT_SE_IRI, args=[deposit_collection.name, deposit.id]) response = authenticated_client.delete(url) # then assert response.status_code == status.HTTP_400_BAD_REQUEST deposit = Deposit.objects.get(pk=deposit.id) assert deposit is not None diff --git a/swh/deposit/tests/conftest.py b/swh/deposit/tests/conftest.py index 89d316a0..725bc20a 100644 --- a/swh/deposit/tests/conftest.py +++ b/swh/deposit/tests/conftest.py @@ -1,316 +1,316 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os import base64 import pytest import psycopg2 from django.urls import reverse from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT from rest_framework import status from rest_framework.test import APIClient from typing import Mapping from swh.scheduler.tests.conftest import * # noqa from swh.deposit.parsers import parse_xml from swh.deposit.config import ( COL_IRI, EDIT_SE_IRI, DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_REJECTED, DEPOSIT_STATUS_PARTIAL, DEPOSIT_STATUS_LOAD_SUCCESS, DEPOSIT_STATUS_VERIFIED, DEPOSIT_STATUS_LOAD_FAILURE ) from swh.deposit.tests.common import create_arborescence_archive TEST_USER = { 'username': 'test', 'password': 'password', 'email': 'test@example.org', 'provider_url': 'https://hal-test.archives-ouvertes.fr/', 'domain': 'archives-ouvertes.fr/', 'collection': { 'name': 'test' }, } def execute_sql(sql): """Execute sql to postgres db""" with psycopg2.connect(database='postgres') as conn: conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) cur = conn.cursor() cur.execute(sql) @pytest.hookimpl(tryfirst=True) def pytest_load_initial_conftests(early_config, parser, args): """This hook is done prior to django loading. Used to initialize the deposit's server db. """ - import project.app.signals + import project.app.signals # type: ignore def prepare_db(*args, **kwargs): from django.conf import settings db_name = 'tests' # work around db settings for django for k, v in [ ('ENGINE', 'django.db.backends.postgresql'), ('NAME', 'tests'), ('USER', postgresql_proc.user), # noqa ('HOST', postgresql_proc.host), # noqa ('PORT', postgresql_proc.port), # noqa ]: settings.DATABASES['default'][k] = v execute_sql('DROP DATABASE IF EXISTS %s' % db_name) execute_sql('CREATE DATABASE %s TEMPLATE template0' % db_name) project.app.signals.something = prepare_db @pytest.fixture(autouse=True, scope='session') def swh_proxy(): """Automatically inject this fixture in all tests to ensure no outside connection takes place. """ os.environ['http_proxy'] = 'http://localhost:999' os.environ['https_proxy'] = 'http://localhost:999' def create_deposit_collection(collection_name: str): """Create a deposit collection with name collection_name """ from swh.deposit.models import DepositCollection try: collection = DepositCollection._default_manager.get( name=collection_name) except DepositCollection.DoesNotExist: collection = DepositCollection(name=collection_name) collection.save() return collection def deposit_collection_factory( collection_name=TEST_USER['collection']['name']): @pytest.fixture def _deposit_collection(db, collection_name=collection_name): return create_deposit_collection(collection_name) return _deposit_collection deposit_collection = deposit_collection_factory() deposit_another_collection = deposit_collection_factory('another-collection') @pytest.fixture def deposit_user(db, deposit_collection): """Create/Return the test_user "test" """ from swh.deposit.models import DepositClient try: user = DepositClient._default_manager.get( username=TEST_USER['username']) except DepositClient.DoesNotExist: user = DepositClient._default_manager.create_user( username=TEST_USER['username'], email=TEST_USER['email'], password=TEST_USER['password'], provider_url=TEST_USER['provider_url'], domain=TEST_USER['domain'], ) user.collections = [deposit_collection.id] user.save() return user @pytest.fixture def client(): """Override pytest-django one which does not work for djangorestframework. """ return APIClient() # <- drf's client @pytest.yield_fixture def authenticated_client(client, deposit_user): """Returned a logged client """ _token = '%s:%s' % (deposit_user.username, TEST_USER['password']) token = base64.b64encode(_token.encode('utf-8')) authorization = 'Basic %s' % token.decode('utf-8') client.credentials(HTTP_AUTHORIZATION=authorization) yield client client.logout() @pytest.fixture def sample_archive(tmp_path): """Returns a sample archive """ tmp_path = str(tmp_path) # pytest version limitation in previous version archive = create_arborescence_archive( tmp_path, 'archive1', 'file1', b'some content in file') return archive def create_deposit( authenticated_client, collection_name: str, sample_archive, external_id: str, deposit_status=DEPOSIT_STATUS_DEPOSITED): """Create a skeleton shell deposit """ url = reverse(COL_IRI, args=[collection_name]) # when response = authenticated_client.post( url, content_type='application/zip', # as zip data=sample_archive['data'], # + headers CONTENT_LENGTH=sample_archive['length'], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=sample_archive['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS='false', HTTP_CONTENT_DISPOSITION='attachment; filename=%s' % ( sample_archive['name'])) # then assert response.status_code == status.HTTP_201_CREATED from swh.deposit.models import Deposit deposit = Deposit._default_manager.get(external_id=external_id) if deposit.status != deposit_status: deposit.status = deposit_status deposit.save() assert deposit.status == deposit_status return deposit def create_binary_deposit( authenticated_client, collection_name: str, sample_archive, external_id: str, deposit_status: str = DEPOSIT_STATUS_DEPOSITED, atom_dataset: Mapping[str, bytes] = {}): """Create a deposit with both metadata and archive set. Then alters its status to `deposit_status`. """ deposit = create_deposit( authenticated_client, collection_name, sample_archive, external_id=external_id, deposit_status=DEPOSIT_STATUS_PARTIAL) response = authenticated_client.post( reverse(EDIT_SE_IRI, args=[collection_name, deposit.id]), content_type='application/atom+xml;type=entry', data=atom_dataset['entry-data0'] % deposit.external_id.encode('utf-8'), HTTP_SLUG=deposit.external_id, HTTP_IN_PROGRESS='true') assert response.status_code == status.HTTP_201_CREATED assert deposit.status == DEPOSIT_STATUS_PARTIAL from swh.deposit.models import Deposit deposit = Deposit._default_manager.get(pk=deposit.id) if deposit.status != deposit_status: deposit.status = deposit_status deposit.save() assert deposit.status == deposit_status return deposit def deposit_factory(deposit_status=DEPOSIT_STATUS_DEPOSITED): """Build deposit with a specific status """ @pytest.fixture() def _deposit(sample_archive, deposit_collection, authenticated_client, deposit_status=deposit_status): external_id = 'external-id-%s' % deposit_status return create_deposit( authenticated_client, deposit_collection.name, sample_archive, external_id=external_id, deposit_status=deposit_status ) return _deposit deposited_deposit = deposit_factory() rejected_deposit = deposit_factory(deposit_status=DEPOSIT_STATUS_REJECTED) partial_deposit = deposit_factory(deposit_status=DEPOSIT_STATUS_PARTIAL) verified_deposit = deposit_factory(deposit_status=DEPOSIT_STATUS_VERIFIED) completed_deposit = deposit_factory(deposit_status=DEPOSIT_STATUS_LOAD_SUCCESS) failed_deposit = deposit_factory(deposit_status=DEPOSIT_STATUS_LOAD_FAILURE) @pytest.fixture def partial_deposit_with_metadata( sample_archive, deposit_collection, authenticated_client, atom_dataset): """Returns deposit with archive and metadata provided, status 'partial' """ return create_binary_deposit( authenticated_client, deposit_collection.name, sample_archive, external_id='external-id-partial', deposit_status=DEPOSIT_STATUS_PARTIAL, atom_dataset=atom_dataset ) @pytest.fixture def partial_deposit_only_metadata( deposit_collection, authenticated_client, atom_dataset): response = authenticated_client.post( reverse(COL_IRI, args=[deposit_collection.name]), content_type='application/atom+xml;type=entry', data=atom_dataset['entry-data1'], HTTP_SLUG='external-id-partial', HTTP_IN_PROGRESS=True) assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(response.content) deposit_id = response_content['deposit_id'] from swh.deposit.models import Deposit deposit = Deposit._default_manager.get(pk=deposit_id) assert deposit.status == DEPOSIT_STATUS_PARTIAL return deposit @pytest.fixture def complete_deposit(sample_archive, deposit_collection, authenticated_client): """Returns a completed deposit (load success) """ deposit = create_deposit( authenticated_client, deposit_collection.name, sample_archive, external_id='external-id-complete', deposit_status=DEPOSIT_STATUS_LOAD_SUCCESS ) _swh_id_context = 'https://hal.archives-ouvertes.fr/hal-01727745' deposit.swh_id = 'swh:1:dir:42a13fc721c8716ff695d0d62fc851d641f3a12b' deposit.swh_id_context = '%s;%s' % ( deposit.swh_id, _swh_id_context) deposit.swh_anchor_id = \ 'swh:rev:1:548b3c0a2bb43e1fca191e24b5803ff6b3bc7c10' deposit.swh_anchor_id_context = '%s;%s' % ( deposit.swh_anchor_id, _swh_id_context) deposit.save() return deposit @pytest.fixture() def tmp_path(tmp_path): return str(tmp_path) # issue with oldstable's pytest version diff --git a/swh/deposit/tests/loader/conftest.py b/swh/deposit/tests/loader/conftest.py index 98e4223e..cd5081f0 100644 --- a/swh/deposit/tests/loader/conftest.py +++ b/swh/deposit/tests/loader/conftest.py @@ -1,67 +1,67 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import re import os import pytest import yaml from functools import partial from swh.core.pytest_plugin import get_response_cb from swh.scheduler.tests.conftest import * # noqa from swh.storage.tests.conftest import * # noqa from swh.deposit.loader.checker import DepositChecker from swh.deposit.loader.loader import DepositLoader -@pytest.fixture(scope='session') +@pytest.fixture(scope='session') # type: ignore # expected redefinition def celery_includes(): return [ 'swh.deposit.loader.tasks', ] @pytest.fixture def swh_config(tmp_path, swh_storage_postgresql, monkeypatch): storage_config = { 'url': 'https://deposit.softwareheritage.org/', 'storage': { 'cls': 'local', 'args': { 'db': swh_storage_postgresql.dsn, 'objstorage': { 'cls': 'memory', 'args': {} }, }, }, } conffile = os.path.join(tmp_path, 'deposit.yml') with open(conffile, 'w') as f: f.write(yaml.dump(storage_config)) monkeypatch.setenv('SWH_CONFIG_FILENAME', conffile) return conffile @pytest.fixture def deposit_checker(swh_config): return DepositChecker() @pytest.fixture def deposit_loader(swh_config): return DepositLoader() @pytest.fixture def requests_mock_datadir(datadir, requests_mock_datadir): """Override default behavior to deal with put method """ cb = partial(get_response_cb, datadir=datadir) requests_mock_datadir.put(re.compile('https://'), body=cb) return requests_mock_datadir diff --git a/version.txt b/version.txt index 344ebe65..b1a6380d 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -v0.0.75-0-geb60bf5 \ No newline at end of file +v0.0.76-0-gb646d12 \ No newline at end of file