diff --git a/PKG-INFO b/PKG-INFO index b505874f..a1cf90f3 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,10 +1,10 @@ Metadata-Version: 1.0 Name: swh.deposit -Version: 0.0.52 +Version: 0.0.53 Summary: Software Heritage Deposit Server Home-page: https://forge.softwareheritage.org/source/swh-deposit/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Description: UNKNOWN Platform: UNKNOWN diff --git a/debian/control b/debian/control index fab46535..d9e6b216 100644 --- a/debian/control +++ b/debian/control @@ -1,56 +1,56 @@ Source: swh-deposit Maintainer: Software Heritage developers Section: python Priority: optional Build-Depends: debhelper (>= 9), dh-python (>= 2), python3-setuptools, python3-all, python3-nose, python3-django-nose, python3-vcversioner, python3-swh.core (>= 0.0.36~), - python3-swh.model (>= 0.0.21~), + python3-swh.model (>= 0.0.25~), python3-swh.loader.core (>= 0.0.32~), python3-swh.loader.tar (>= 0.0.35~), python3-swh.scheduler (>= 0.0.26~), python3-django, python3-click, python3-vcversioner, python3-djangorestframework, - python3-djangorestframework-xml, python3-requests, python3-lxml, + python3-xmltodict, patool Standards-Version: 3.9.6 Homepage: https://forge.softwareheritage.org/source/swh-deposit/ Package: python3-swh.deposit Architecture: all Depends: python3-swh.core (>= 0.0.36~), - python3-swh.model (>= 0.0.21~), + python3-swh.model (>= 0.0.25~), python3-swh.scheduler (>= 0.0.26~), patool, ${misc:Depends}, ${python3:Depends} Description: Software Heritage Deposit Server Package: python3-swh.deposit.client Architecture: all Depends: python3-swh.core (>= 0.0.36~), - python3-swh.model (>= 0.0.21~), + python3-swh.model (>= 0.0.25~), python3-requests, python3-lxml, ${misc:Depends}, ${python3:Depends} Description: Software Heritage Deposit Api Client Package: python3-swh.deposit.loader Conflict: python3-swh.deposit.injection Architecture: all Depends: python3-swh.deposit.client (= ${binary:Version}), python3-swh.core (>= 0.0.36~), - python3-swh.model (>= 0.0.21~), + python3-swh.model (>= 0.0.25~), python3-swh.loader.core (>= 0.0.32~), python3-swh.loader.tar (>= 0.0.35~), python3-swh.scheduler (>= 0.0.26~), ${misc:Depends}, ${python3:Depends} Description: Software Heritage Deposit Loader diff --git a/requirements-swh.txt b/requirements-swh.txt index 1f63fe7e..29ec1fa7 100644 --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -1,5 +1,5 @@ swh.core >= 0.0.36 swh.loader.tar >= 0.0.35 swh.loader.core >= 0.0.32 swh.scheduler >= 0.0.26 -swh.model >= 0.0.21 +swh.model >= 0.0.25 diff --git a/requirements.txt b/requirements.txt index 0d52e480..b3db84ea 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ vcversioner click Django djangorestframework -djangorestframework-xml lxml +xmltodict diff --git a/swh.deposit.egg-info/PKG-INFO b/swh.deposit.egg-info/PKG-INFO index b505874f..a1cf90f3 100644 --- a/swh.deposit.egg-info/PKG-INFO +++ b/swh.deposit.egg-info/PKG-INFO @@ -1,10 +1,10 @@ Metadata-Version: 1.0 Name: swh.deposit -Version: 0.0.52 +Version: 0.0.53 Summary: Software Heritage Deposit Server Home-page: https://forge.softwareheritage.org/source/swh-deposit/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Description: UNKNOWN Platform: UNKNOWN diff --git a/swh.deposit.egg-info/SOURCES.txt b/swh.deposit.egg-info/SOURCES.txt index fe5ecaff..f662065f 100644 --- a/swh.deposit.egg-info/SOURCES.txt +++ b/swh.deposit.egg-info/SOURCES.txt @@ -1,152 +1,153 @@ .gitignore AUTHORS LICENSE MANIFEST.in Makefile Makefile.local README.md requirements-swh.txt requirements.txt setup.py version.txt bin/Makefile bin/content.sh bin/create_deposit.sh bin/create_deposit_atom.sh bin/create_deposit_with_metadata.sh bin/default-setup bin/download-deposit-archive.sh bin/home.sh bin/replace-deposit-archive.sh bin/service-document.sh bin/status.sh bin/swh-deposit bin/update-deposit-with-another-archive.sh bin/update-status.sh debian/changelog debian/compat debian/control debian/copyright debian/rules debian/source/format docs/.gitignore docs/Makefile docs/blueprint.rst docs/conf.py docs/dev-info.rst docs/getting-started.rst docs/index.rst docs/metadata.rst docs/spec-api.rst docs/spec-loading.rst docs/sys-info.rst docs/_static/.placeholder docs/_templates/.placeholder docs/endpoints/collection.rst docs/endpoints/content.rst docs/endpoints/service-document.rst docs/endpoints/status.rst docs/endpoints/update-media.rst docs/endpoints/update-metadata.rst docs/images/deposit-create-chart.png docs/images/deposit-delete-chart.png docs/images/deposit-update-chart.png resources/deposit/server.yml swh/__init__.py swh/manage.py swh.deposit.egg-info/PKG-INFO swh.deposit.egg-info/SOURCES.txt swh.deposit.egg-info/dependency_links.txt swh.deposit.egg-info/requires.txt swh.deposit.egg-info/top_level.txt swh/deposit/__init__.py swh/deposit/apps.py swh/deposit/auth.py swh/deposit/config.py swh/deposit/create_user.py swh/deposit/errors.py swh/deposit/models.py swh/deposit/parsers.py swh/deposit/signals.py swh/deposit/urls.py swh/deposit/wsgi.py swh/deposit/api/__init__.py swh/deposit/api/common.py swh/deposit/api/deposit.py swh/deposit/api/deposit_content.py swh/deposit/api/deposit_status.py swh/deposit/api/deposit_update.py swh/deposit/api/service_document.py swh/deposit/api/urls.py swh/deposit/api/private/__init__.py swh/deposit/api/private/deposit_check.py swh/deposit/api/private/deposit_read.py swh/deposit/api/private/deposit_update_status.py swh/deposit/api/private/urls.py swh/deposit/client/__init__.py swh/deposit/client/cli.py swh/deposit/fixtures/__init__.py swh/deposit/fixtures/deposit_data.yaml swh/deposit/loader/__init__.py swh/deposit/loader/checker.py swh/deposit/loader/loader.py swh/deposit/loader/scheduler.py swh/deposit/loader/tasks.py swh/deposit/migrations/0001_initial.py swh/deposit/migrations/0002_depositrequest_archive.py swh/deposit/migrations/0003_temporaryarchive.py swh/deposit/migrations/0004_delete_temporaryarchive.py swh/deposit/migrations/0005_auto_20171019_1436.py swh/deposit/migrations/0006_depositclient_url.py swh/deposit/migrations/0007_auto_20171129_1609.py swh/deposit/migrations/0008_auto_20171130_1513.py swh/deposit/migrations/0009_deposit_parent.py swh/deposit/migrations/0010_auto_20180110_0953.py swh/deposit/migrations/0011_auto_20180115_1510.py swh/deposit/migrations/__init__.py swh/deposit/settings/__init__.py swh/deposit/settings/common.py swh/deposit/settings/development.py swh/deposit/settings/production.py swh/deposit/settings/testing.py swh/deposit/static/robots.txt swh/deposit/static/css/bootstrap-responsive.min.css swh/deposit/static/css/style.css swh/deposit/static/img/arrow-up-small.png swh/deposit/static/img/swh-logo-deposit.png swh/deposit/static/img/swh-logo-deposit.svg swh/deposit/static/img/icons/swh-logo-32x32.png swh/deposit/static/img/icons/swh-logo-deposit-180x180.png swh/deposit/static/img/icons/swh-logo-deposit-192x192.png swh/deposit/static/img/icons/swh-logo-deposit-270x270.png swh/deposit/templates/__init__.py swh/deposit/templates/homepage.html swh/deposit/templates/layout.html swh/deposit/templates/deposit/__init__.py swh/deposit/templates/deposit/content.xml swh/deposit/templates/deposit/deposit_receipt.xml swh/deposit/templates/deposit/error.xml swh/deposit/templates/deposit/service_document.xml swh/deposit/templates/deposit/status.xml swh/deposit/templates/rest_framework/api.html swh/deposit/tests/__init__.py swh/deposit/tests/common.py swh/deposit/tests/api/__init__.py swh/deposit/tests/api/test_common.py swh/deposit/tests/api/test_deposit.py swh/deposit/tests/api/test_deposit_atom.py swh/deposit/tests/api/test_deposit_binary.py swh/deposit/tests/api/test_deposit_check.py swh/deposit/tests/api/test_deposit_delete.py swh/deposit/tests/api/test_deposit_multipart.py swh/deposit/tests/api/test_deposit_read_archive.py swh/deposit/tests/api/test_deposit_read_metadata.py swh/deposit/tests/api/test_deposit_status.py swh/deposit/tests/api/test_deposit_update.py swh/deposit/tests/api/test_deposit_update_status.py +swh/deposit/tests/api/test_parser.py swh/deposit/tests/api/test_service_document.py swh/deposit/tests/loader/__init__.py swh/deposit/tests/loader/common.py swh/deposit/tests/loader/test_checker.py swh/deposit/tests/loader/test_client.py swh/deposit/tests/loader/test_loader.py \ No newline at end of file diff --git a/swh.deposit.egg-info/requires.txt b/swh.deposit.egg-info/requires.txt index 963df3d6..43355e1b 100644 --- a/swh.deposit.egg-info/requires.txt +++ b/swh.deposit.egg-info/requires.txt @@ -1,11 +1,11 @@ Django click djangorestframework -djangorestframework-xml lxml swh.core>=0.0.36 swh.loader.core>=0.0.32 swh.loader.tar>=0.0.35 -swh.model>=0.0.21 +swh.model>=0.0.25 swh.scheduler>=0.0.26 vcversioner +xmltodict diff --git a/swh/deposit/api/common.py b/swh/deposit/api/common.py index 5706d610..e2574ea2 100644 --- a/swh/deposit/api/common.py +++ b/swh/deposit/api/common.py @@ -1,863 +1,861 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import hashlib from abc import ABCMeta, abstractmethod from django.core.urlresolvers import reverse from django.http import HttpResponse from django.shortcuts import render from django.utils import timezone from rest_framework import status from rest_framework.authentication import BasicAuthentication from rest_framework.permissions import IsAuthenticated, AllowAny from rest_framework.views import APIView from swh.model import hashutil from ..config import SWHDefaultConfig, EDIT_SE_IRI, EM_IRI, CONT_FILE_IRI from ..config import ARCHIVE_KEY, METADATA_KEY, STATE_IRI from ..config import DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_PARTIAL from ..config import DEPOSIT_STATUS_LOAD_SUCCESS from ..errors import MAX_UPLOAD_SIZE_EXCEEDED, BAD_REQUEST, ERROR_CONTENT from ..errors import CHECKSUM_MISMATCH, make_error_dict, MEDIATION_NOT_ALLOWED from ..errors import make_error_response_from_dict, FORBIDDEN from ..errors import NOT_FOUND, make_error_response, METHOD_NOT_ALLOWED from ..models import Deposit, DepositRequest, DepositCollection from ..models import DepositRequestType, DepositClient from ..parsers import parse_xml ACCEPT_PACKAGINGS = ['http://purl.org/net/sword/package/SimpleZip'] ACCEPT_ARCHIVE_CONTENT_TYPES = ['application/zip', 'application/x-tar'] class SWHAPIView(APIView): """Mixin intended as a based API view to enforce the basic authentication check """ authentication_classes = (BasicAuthentication, ) permission_classes = (IsAuthenticated, ) class SWHPrivateAPIView(SWHAPIView): """Mixin intended as private api (so no authentication) based API view (for the private ones). """ authentication_classes = () permission_classes = (AllowAny, ) class SWHBaseDeposit(SWHDefaultConfig, SWHAPIView, metaclass=ABCMeta): """Base deposit request class sharing multiple common behaviors. """ def __init__(self): super().__init__() deposit_request_types = DepositRequestType.objects.all() self.deposit_request_types = { type.name: type for type in deposit_request_types } def _read_headers(self, req): """Read and unify the necessary headers from the request (those are not stored in the same location or not properly formatted). Args: req (Request): Input request Returns: Dictionary with the following keys (some associated values may be None): - content-type - content-length - in-progress - content-disposition - packaging - slug - on-behalf-of """ meta = req._request.META content_type = req.content_type content_length = meta.get('CONTENT_LENGTH') if content_length and isinstance(content_length, str): content_length = int(content_length) # final deposit if not provided in_progress = meta.get('HTTP_IN_PROGRESS', False) content_disposition = meta.get('HTTP_CONTENT_DISPOSITION') if isinstance(in_progress, str): in_progress = in_progress.lower() == 'true' content_md5sum = meta.get('HTTP_CONTENT_MD5') if content_md5sum: content_md5sum = bytes.fromhex(content_md5sum) packaging = meta.get('HTTP_PACKAGING') slug = meta.get('HTTP_SLUG') on_behalf_of = meta.get('HTTP_ON_BEHALF_OF') metadata_relevant = meta.get('HTTP_METADATA_RELEVANT') return { 'content-type': content_type, 'content-length': content_length, 'in-progress': in_progress, 'content-disposition': content_disposition, 'content-md5sum': content_md5sum, 'packaging': packaging, 'slug': slug, 'on-behalf-of': on_behalf_of, 'metadata-relevant': metadata_relevant, } def _compute_md5(self, filehandler): """Compute uploaded file's md5 sum. Args: filehandler (InMemoryUploadedFile): the file to compute the md5 hash Returns: the md5 checksum (str) """ h = hashlib.md5() for chunk in filehandler: h.update(chunk) return h.digest() def _deposit_put(self, deposit_id=None, in_progress=False, external_id=None): """Save/Update a deposit in db. Args: deposit_id (int): deposit identifier in_progress (dict): The deposit's status external_id (str): The external identifier to associate to the deposit Returns: The Deposit instance saved or updated. """ if in_progress is False: complete_date = timezone.now() status_type = DEPOSIT_STATUS_DEPOSITED else: complete_date = None status_type = DEPOSIT_STATUS_PARTIAL if not deposit_id: try: # find a deposit parent (same external id, status load # to success) deposit_parent = Deposit.objects.filter( external_id=external_id, status=DEPOSIT_STATUS_LOAD_SUCCESS).order_by('-id')[0:1].get() # noqa except Deposit.DoesNotExist: deposit_parent = None deposit = Deposit(collection=self._collection, external_id=external_id, complete_date=complete_date, status=status_type, client=self._client, parent=deposit_parent) else: deposit = Deposit.objects.get(pk=deposit_id) # update metadata deposit.complete_date = complete_date deposit.status = status_type deposit.save() return deposit def _deposit_request_put(self, deposit, deposit_request_data, replace_metadata=False, replace_archives=False): """Save a deposit request with metadata attached to a deposit. Args: deposit (Deposit): The deposit concerned by the request deposit_request_data (dict): The dictionary with at most 2 deposit request types (archive, metadata) to associate to the deposit replace_metadata (bool): Flag defining if we add or update existing metadata to the deposit replace_archives (bool): Flag defining if we add or update archives to existing deposit Returns: None """ if replace_metadata: DepositRequest.objects.filter( deposit=deposit, type=self.deposit_request_types[METADATA_KEY]).delete() if replace_archives: DepositRequest.objects.filter( deposit=deposit, type=self.deposit_request_types[ARCHIVE_KEY]).delete() deposit_request = None archive_file = deposit_request_data.get(ARCHIVE_KEY) if archive_file: deposit_request = DepositRequest( type=self.deposit_request_types[ARCHIVE_KEY], deposit=deposit, archive=archive_file) deposit_request.save() metadata = deposit_request_data.get(METADATA_KEY) if metadata: deposit_request = DepositRequest( type=self.deposit_request_types[METADATA_KEY], deposit=deposit, metadata=metadata) deposit_request.save() assert deposit_request is not None def _delete_archives(self, collection_name, deposit_id): """Delete archives reference from the deposit id. """ try: deposit = Deposit.objects.get(pk=deposit_id) except Deposit.DoesNotExist: return make_error_dict( NOT_FOUND, 'The deposit %s does not exist' % deposit_id) DepositRequest.objects.filter( deposit=deposit, type=self.deposit_request_types[ARCHIVE_KEY]).delete() return {} def _delete_deposit(self, collection_name, deposit_id): """Delete deposit reference. Args: collection_name (str): Client's name deposit_id (id): The deposit to delete Returns Empty dict when ok. Dict with error key to describe the failure. """ try: deposit = Deposit.objects.get(pk=deposit_id) except Deposit.DoesNotExist: return make_error_dict( NOT_FOUND, 'The deposit %s does not exist' % deposit_id) if deposit.collection.name != collection_name: summary = 'Cannot delete a deposit from another collection' description = "Deposit %s does not belong to the collection %s" % ( deposit_id, collection_name) return make_error_dict( BAD_REQUEST, summary=summary, verbose_description=description) DepositRequest.objects.filter(deposit=deposit).delete() deposit.delete() return {} def _check_preconditions_on(self, filehandler, md5sum, content_length=None): """Check preconditions on provided file are respected. That is the length and/or the md5sum hash match the file's content. Args: filehandler (InMemoryUploadedFile): The file to check md5sum (hex str): md5 hash expected from the file's content content_length (int): the expected length if provided. Returns: Either none if no error or a dictionary with a key error detailing the problem. """ if content_length: if content_length > self.config['max_upload_size']: return make_error_dict( MAX_UPLOAD_SIZE_EXCEEDED, 'Upload size limit exceeded (max %s bytes).' % self.config['max_upload_size'], 'Please consider sending the archive in ' 'multiple steps.') length = filehandler.size if length != content_length: return make_error_dict(status.HTTP_412_PRECONDITION_FAILED, 'Wrong length') if md5sum: _md5sum = self._compute_md5(filehandler) if _md5sum != md5sum: return make_error_dict( CHECKSUM_MISMATCH, 'Wrong md5 hash', 'The checksum sent %s and the actual checksum ' '%s does not match.' % (hashutil.hash_to_hex(md5sum), hashutil.hash_to_hex(_md5sum))) return None def _binary_upload(self, req, headers, collection_name, deposit_id=None, replace_metadata=False, replace_archives=False): """Binary upload routine. Other than such a request, a 415 response is returned. Args: req (Request): the request holding information to parse and inject in db headers (dict): request headers formatted collection_name (str): the associated client deposit_id (id): deposit identifier if provided replace_metadata (bool): 'Update or add' request to existing deposit. If False (default), this adds new metadata request to existing ones. Otherwise, this will replace existing metadata. replace_archives (bool): 'Update or add' request to existing deposit. If False (default), this adds new archive request to existing ones. Otherwise, this will replace existing archives. ones. Returns: In the optimal case a dict with the following keys: - deposit_id (int): Deposit identifier - deposit_date (date): Deposit date - archive: None (no archive is provided here) Otherwise, a dictionary with the key error and the associated failures, either: - 400 (bad request) if the request is not providing an external identifier - 413 (request entity too large) if the length of the archive exceeds the max size configured - 412 (precondition failed) if the length or md5 hash provided mismatch the reality of the archive - 415 (unsupported media type) if a wrong media type is provided """ content_length = headers['content-length'] if not content_length: return make_error_dict( BAD_REQUEST, 'CONTENT_LENGTH header is mandatory', 'For archive deposit, the ' 'CONTENT_LENGTH header must be sent.') content_disposition = headers['content-disposition'] if not content_disposition: return make_error_dict( BAD_REQUEST, 'CONTENT_DISPOSITION header is mandatory', 'For archive deposit, the ' 'CONTENT_DISPOSITION header must be sent.') packaging = headers['packaging'] if packaging and packaging not in ACCEPT_PACKAGINGS: return make_error_dict( BAD_REQUEST, 'Only packaging %s is supported' % ACCEPT_PACKAGINGS, 'The packaging provided %s is not supported' % packaging) filehandler = req.FILES['file'] precondition_status_response = self._check_preconditions_on( filehandler, headers['content-md5sum'], content_length) if precondition_status_response: return precondition_status_response external_id = headers['slug'] # actual storage of data archive_metadata = filehandler deposit = self._deposit_put(deposit_id=deposit_id, in_progress=headers['in-progress'], external_id=external_id) self._deposit_request_put( deposit, {ARCHIVE_KEY: archive_metadata}, replace_metadata=replace_metadata, replace_archives=replace_archives) return { 'deposit_id': deposit.id, 'deposit_date': deposit.reception_date, 'status': deposit.status, 'archive': filehandler.name, } def _multipart_upload(self, req, headers, collection_name, deposit_id=None, replace_metadata=False, replace_archives=False): """Multipart upload supported with exactly: - 1 archive (zip) - 1 atom entry Other than such a request, a 415 response is returned. Args: req (Request): the request holding information to parse and inject in db headers (dict): request headers formatted collection_name (str): the associated client deposit_id (id): deposit identifier if provided replace_metadata (bool): 'Update or add' request to existing deposit. If False (default), this adds new metadata request to existing ones. Otherwise, this will replace existing metadata. replace_archives (bool): 'Update or add' request to existing deposit. If False (default), this adds new archive request to existing ones. Otherwise, this will replace existing archives. ones. Returns: In the optimal case a dict with the following keys: - deposit_id (int): Deposit identifier - deposit_date (date): Deposit date - archive: None (no archive is provided here) Otherwise, a dictionary with the key error and the associated failures, either: - 400 (bad request) if the request is not providing an external identifier - 412 (precondition failed) if the potentially md5 hash provided mismatch the reality of the archive - 413 (request entity too large) if the length of the archive exceeds the max size configured - 415 (unsupported media type) if a wrong media type is provided """ external_id = headers['slug'] content_types_present = set() data = { 'application/zip': None, # expected either zip 'application/x-tar': None, # or x-tar 'application/atom+xml': None, } for key, value in req.FILES.items(): fh = value if fh.content_type in content_types_present: return make_error_dict( ERROR_CONTENT, 'Only 1 application/zip (or application/x-tar) archive ' 'and 1 atom+xml entry is supported (as per sword2.0 ' 'specification)', 'You provided more than 1 application/(zip|x-tar) ' 'or more than 1 application/atom+xml content-disposition ' 'header in the multipart deposit') content_types_present.add(fh.content_type) data[fh.content_type] = fh if len(content_types_present) != 2: return make_error_dict( ERROR_CONTENT, 'You must provide both 1 application/zip (or ' 'application/x-tar) and 1 atom+xml entry for multipart ' 'deposit', 'You need to provide only 1 application/(zip|x-tar) ' 'and 1 application/atom+xml content-disposition header ' 'in the multipart deposit') filehandler = data['application/zip'] if not filehandler: filehandler = data['application/x-tar'] precondition_status_response = self._check_preconditions_on( filehandler, headers['content-md5sum']) if precondition_status_response: return precondition_status_response # actual storage of data atom_metadata = parse_xml(data['application/atom+xml']) deposit = self._deposit_put(deposit_id=deposit_id, in_progress=headers['in-progress'], external_id=external_id) deposit_request_data = { ARCHIVE_KEY: filehandler, METADATA_KEY: atom_metadata, } self._deposit_request_put( deposit, deposit_request_data, replace_metadata, replace_archives) return { 'deposit_id': deposit.id, 'deposit_date': deposit.reception_date, 'archive': filehandler.name, 'status': deposit.status, } def _atom_entry(self, req, headers, collection_name, deposit_id=None, replace_metadata=False, replace_archives=False): """Atom entry deposit. Args: req (Request): the request holding information to parse and inject in db headers (dict): request headers formatted collection_name (str): the associated client deposit_id (id): deposit identifier if provided replace_metadata (bool): 'Update or add' request to existing deposit. If False (default), this adds new metadata request to existing ones. Otherwise, this will replace existing metadata. replace_archives (bool): 'Update or add' request to existing deposit. If False (default), this adds new archive request to existing ones. Otherwise, this will replace existing archives. ones. Returns: In the optimal case a dict with the following keys: - deposit_id: deposit id associated to the deposit - deposit_date: date of the deposit - archive: None (no archive is provided here) Otherwise, a dictionary with the key error and the associated failures, either: - 400 (bad request) if the request is not providing an external identifier - 400 (bad request) if the request's body is empty - 415 (unsupported media type) if a wrong media type is provided """ if not req.data: return make_error_dict( BAD_REQUEST, 'Empty body request is not supported', 'Atom entry deposit is supposed to send for metadata. ' 'If the body is empty, there is no metadata.') - external_id = req.data.get( - '{http://www.w3.org/2005/Atom}external_identifier', - headers['slug']) + external_id = req.data.get('external_identifier', headers['slug']) deposit = self._deposit_put(deposit_id=deposit_id, in_progress=headers['in-progress'], external_id=external_id) self._deposit_request_put( deposit, {METADATA_KEY: req.data}, replace_metadata, replace_archives) return { 'deposit_id': deposit.id, 'deposit_date': deposit.reception_date, 'archive': None, 'status': deposit.status, } def _empty_post(self, req, headers, collection_name, deposit_id): """Empty post to finalize an empty deposit. Args: req (Request): the request holding information to parse and inject in db headers (dict): request headers formatted collection_name (str): the associated client deposit_id (id): deposit identifier Returns: Dictionary of result with the deposit's id, the date it was completed and no archive. """ deposit = Deposit.objects.get(pk=deposit_id) deposit.complete_date = timezone.now() deposit.status = DEPOSIT_STATUS_DEPOSITED deposit.save() return { 'deposit_id': deposit_id, 'deposit_date': deposit.complete_date, 'status': deposit.status, 'archive': None, } def _make_iris(self, req, collection_name, deposit_id): """Define the IRI endpoints Args: req (Request): The initial request collection_name (str): client/collection's name deposit_id (id): Deposit identifier Returns: Dictionary of keys with the iris' urls. """ args = [collection_name, deposit_id] return { iri: req.build_absolute_uri(reverse(iri, args=args)) for iri in [EM_IRI, EDIT_SE_IRI, CONT_FILE_IRI, STATE_IRI] } def additional_checks(self, req, headers, collection_name, deposit_id=None): """Permit the child class to enrich additional checks. Returns: dict with 'error' detailing the problem. """ return {} def checks(self, req, collection_name, deposit_id=None): try: self._collection = DepositCollection.objects.get( name=collection_name) except DepositCollection.DoesNotExist: return make_error_dict( NOT_FOUND, 'Unknown collection name %s' % collection_name) username = req.user.username if username: # unauthenticated request can have the username empty try: self._client = DepositClient.objects.get(username=username) except DepositClient.DoesNotExist: return make_error_dict(NOT_FOUND, 'Unknown client name %s' % username) if self._collection.id not in self._client.collections: return make_error_dict( FORBIDDEN, 'Client %s cannot access collection %s' % ( username, collection_name)) if deposit_id: try: deposit = Deposit.objects.get(pk=deposit_id) except Deposit.DoesNotExist: return make_error_dict( NOT_FOUND, 'Deposit with id %s does not exist' % deposit_id) checks = self.restrict_access(req, deposit) if checks: return checks headers = self._read_headers(req) if headers['on-behalf-of']: return make_error_dict(MEDIATION_NOT_ALLOWED, 'Mediation is not supported.') checks = self.additional_checks(req, headers, collection_name, deposit_id) if 'error' in checks: return checks return {'headers': headers} def restrict_access(self, req, deposit=None): if deposit: if (req.method != 'GET' and deposit.status != DEPOSIT_STATUS_PARTIAL): summary = "You can only act on deposit with status '%s'" % ( DEPOSIT_STATUS_PARTIAL, ) description = "This deposit has status '%s'" % deposit.status return make_error_dict( BAD_REQUEST, summary=summary, verbose_description=description) def _basic_not_allowed_method(self, req, method): return make_error_response( req, METHOD_NOT_ALLOWED, '%s method is not supported on this endpoint' % method) def get(self, req, *args, **kwargs): return self._basic_not_allowed_method(req, 'GET') def post(self, req, *args, **kwargs): return self._basic_not_allowed_method(req, 'POST') def put(self, req, *args, **kwargs): return self._basic_not_allowed_method(req, 'PUT') def delete(self, req, *args, **kwargs): return self._basic_not_allowed_method(req, 'DELETE') class SWHGetDepositAPI(SWHBaseDeposit, metaclass=ABCMeta): """Mixin for class to support GET method. """ def get(self, req, collection_name, deposit_id, format=None): """Endpoint to create/add resources to deposit. Returns: 200 response when no error during routine occurred 400 if the deposit does not belong to the collection 404 if the deposit or the collection does not exist """ checks = self.checks(req, collection_name, deposit_id) if 'error' in checks: return make_error_response_from_dict(req, checks['error']) r = self.process_get( req, collection_name, deposit_id) if isinstance(r, tuple): status, content, content_type = r return HttpResponse(content, status=status, content_type=content_type) return r @abstractmethod def process_get(self, req, collection_name, deposit_id): """Routine to deal with the deposit's get processing. Returns: Tuple status, stream of content, content-type """ pass class SWHPostDepositAPI(SWHBaseDeposit, metaclass=ABCMeta): """Mixin for class to support DELETE method. """ def post(self, req, collection_name, deposit_id=None, format=None): """Endpoint to create/add resources to deposit. Returns: 204 response when no error during routine occurred. 400 if the deposit does not belong to the collection 404 if the deposit or the collection does not exist """ checks = self.checks(req, collection_name, deposit_id) if 'error' in checks: return make_error_response_from_dict(req, checks['error']) headers = checks['headers'] _status, _iri_key, data = self.process_post( req, headers, collection_name, deposit_id) error = data.get('error') if error: return make_error_response_from_dict(req, error) data['packagings'] = ACCEPT_PACKAGINGS iris = self._make_iris(req, collection_name, data['deposit_id']) data.update(iris) response = render(req, 'deposit/deposit_receipt.xml', context=data, content_type='application/xml', status=_status) response._headers['location'] = 'Location', data[_iri_key] return response @abstractmethod def process_post(self, req, headers, collection_name, deposit_id=None): """Routine to deal with the deposit's processing. Returns Tuple of: - response status code (200, 201, etc...) - key iri (EM_IRI, EDIT_SE_IRI, etc...) - dictionary of the processing result """ pass class SWHPutDepositAPI(SWHBaseDeposit, metaclass=ABCMeta): """Mixin for class to support PUT method. """ def put(self, req, collection_name, deposit_id, format=None): """Endpoint to update deposit resources. Returns: 204 response when no error during routine occurred. 400 if the deposit does not belong to the collection 404 if the deposit or the collection does not exist """ checks = self.checks(req, collection_name, deposit_id) if 'error' in checks: return make_error_response_from_dict(req, checks['error']) headers = checks['headers'] data = self.process_put(req, headers, collection_name, deposit_id) error = data.get('error') if error: return make_error_response_from_dict(req, error) return HttpResponse(status=status.HTTP_204_NO_CONTENT) @abstractmethod def process_put(self, req, headers, collection_name, deposit_id): """Routine to deal with updating a deposit in some way. Returns dictionary of the processing result """ pass class SWHDeleteDepositAPI(SWHBaseDeposit, metaclass=ABCMeta): """Mixin for class to support DELETE method. """ def delete(self, req, collection_name, deposit_id): """Endpoint to delete some deposit's resources (archives, deposit). Returns: 204 response when no error during routine occurred. 400 if the deposit does not belong to the collection 404 if the deposit or the collection does not exist """ checks = self.checks(req, collection_name, deposit_id) if 'error' in checks: return make_error_response_from_dict(req, checks['error']) data = self.process_delete(req, collection_name, deposit_id) error = data.get('error') if error: return make_error_response_from_dict(req, error) return HttpResponse(status=status.HTTP_204_NO_CONTENT) @abstractmethod def process_delete(self, req, collection_name, deposit_id): """Routine to delete a resource. This is mostly not allowed except for the EM_IRI (cf. .api.deposit_update.SWHUpdateArchiveDeposit) """ pass diff --git a/swh/deposit/api/deposit_update.py b/swh/deposit/api/deposit_update.py index 4a0d5975..42af22e0 100644 --- a/swh/deposit/api/deposit_update.py +++ b/swh/deposit/api/deposit_update.py @@ -1,155 +1,155 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from rest_framework import status from .common import SWHPostDepositAPI, SWHPutDepositAPI, SWHDeleteDepositAPI from .common import ACCEPT_ARCHIVE_CONTENT_TYPES from ..config import CONT_FILE_IRI, EDIT_SE_IRI, EM_IRI -from ..errors import make_error_response, BAD_REQUEST +from ..errors import make_error_dict, BAD_REQUEST from ..parsers import SWHFileUploadZipParser, SWHFileUploadTarParser from ..parsers import SWHAtomEntryParser from ..parsers import SWHMultiPartParser class SWHUpdateArchiveDeposit(SWHPostDepositAPI, SWHPutDepositAPI, SWHDeleteDepositAPI): """Deposit request class defining api endpoints for sword deposit. What's known as 'EM IRI' in the sword specification. HTTP verbs supported: PUT, POST, DELETE """ parser_classes = (SWHFileUploadZipParser, SWHFileUploadTarParser, ) def process_put(self, req, headers, collection_name, deposit_id): """Replace existing content for the existing deposit. source: http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html #protocoloperations_editingcontent_binary Returns: 204 No content """ if req.content_type not in ACCEPT_ARCHIVE_CONTENT_TYPES: msg = 'Packaging format supported is restricted to %s' % ( ', '.join(ACCEPT_ARCHIVE_CONTENT_TYPES)) - return make_error_response(req, BAD_REQUEST, msg) + return make_error_dict(BAD_REQUEST, msg) return self._binary_upload(req, headers, collection_name, deposit_id=deposit_id, replace_archives=True) def process_post(self, req, headers, collection_name, deposit_id): """Add new content to the existing deposit. source: http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html #protocoloperations_addingcontent_mediaresource Returns: 201 Created Headers: Location: [Cont-File-IRI] Body: [optional Deposit Receipt] """ if req.content_type not in ACCEPT_ARCHIVE_CONTENT_TYPES: msg = 'Packaging format supported is restricted to %s' % ( ', '.join(ACCEPT_ARCHIVE_CONTENT_TYPES)) - return make_error_response(req, BAD_REQUEST, msg) + return 'unused', 'unused', make_error_dict(BAD_REQUEST, msg) return (status.HTTP_201_CREATED, CONT_FILE_IRI, self._binary_upload(req, headers, collection_name, deposit_id)) def process_delete(self, req, collection_name, deposit_id): """Delete content (archives) from existing deposit. source: http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html #protocoloperations_deletingcontent Returns: 204 Created """ return self._delete_archives(collection_name, deposit_id) class SWHUpdateMetadataDeposit(SWHPostDepositAPI, SWHPutDepositAPI, SWHDeleteDepositAPI): """Deposit request class defining api endpoints for sword deposit. What's known as 'Edit IRI' (and SE IRI) in the sword specification. HTTP verbs supported: POST (SE IRI), PUT (Edit IRI), DELETE """ parser_classes = (SWHMultiPartParser, SWHAtomEntryParser) def process_put(self, req, headers, collection_name, deposit_id): """Replace existing deposit's metadata/archive with new ones. source: - http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html #protocoloperations_editingcontent_metadata - http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html #protocoloperations_editingcontent_multipart Returns: 204 No content """ if req.content_type.startswith('multipart/'): return self._multipart_upload(req, headers, collection_name, deposit_id=deposit_id, replace_archives=True, replace_metadata=True) return self._atom_entry(req, headers, collection_name, deposit_id=deposit_id, replace_metadata=True) def process_post(self, req, headers, collection_name, deposit_id): """Add new metadata/archive to existing deposit. source: - http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html #protocoloperations_addingcontent_metadata - http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html #protocoloperations_addingcontent_multipart This also deals with an empty post corner case to finalize a deposit. Returns: In optimal case for a multipart and atom-entry update, a 201 Created response. The body response will hold a deposit. And the response headers will contain an entry 'Location' with the EM-IRI. For the empty post case, this returns a 200. """ if req.content_type.startswith('multipart/'): return (status.HTTP_201_CREATED, EM_IRI, self._multipart_upload(req, headers, collection_name, deposit_id=deposit_id)) # check for final empty post # source: http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html # #continueddeposit_complete if headers['content-length'] == 0 and headers['in-progress'] is False: data = self._empty_post(req, headers, collection_name, deposit_id) return (status.HTTP_200_OK, EDIT_SE_IRI, data) return (status.HTTP_201_CREATED, EM_IRI, self._atom_entry(req, headers, collection_name, deposit_id=deposit_id)) def process_delete(self, req, collection_name, deposit_id): """Delete the container (deposit). Source: http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html #protocoloperations_deleteconteiner """ return self._delete_deposit(collection_name, deposit_id) diff --git a/swh/deposit/api/private/deposit_read.py b/swh/deposit/api/private/deposit_read.py index 57c3b374..f145a9de 100644 --- a/swh/deposit/api/private/deposit_read.py +++ b/swh/deposit/api/private/deposit_read.py @@ -1,237 +1,237 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json import os import shutil import tempfile from contextlib import contextmanager from django.http import FileResponse from rest_framework import status from swh.core import tarball from swh.model import identifiers from ...config import SWH_PERSON from ..common import SWHGetDepositAPI, SWHPrivateAPIView from ...models import Deposit, DepositRequest @contextmanager def aggregate_tarballs(extraction_dir, archive_paths): """Aggregate multiple tarballs into one and returns this new archive's path. Args: extraction_dir (path): Path to use for the tarballs computation archive_paths ([str]): Deposit's archive paths Returns: Tuple (directory to clean up, archive path (aggregated or not)) """ if len(archive_paths) > 1: # need to rebuild one archive # from multiple ones os.makedirs(extraction_dir, 0o755, exist_ok=True) dir_path = tempfile.mkdtemp(prefix='swh.deposit-', dir=extraction_dir) # root folder to build an aggregated tarball aggregated_tarball_rootdir = os.path.join(dir_path, 'aggregate') os.makedirs(aggregated_tarball_rootdir, 0o755, exist_ok=True) # uncompress in a temporary location all archives for archive_path in archive_paths: tarball.uncompress(archive_path, aggregated_tarball_rootdir) # Aggregate into one big tarball the multiple smaller ones temp_tarpath = tarball.compress( aggregated_tarball_rootdir + '.zip', nature='zip', dirpath_or_files=aggregated_tarball_rootdir) # can already clean up temporary directory shutil.rmtree(aggregated_tarball_rootdir) try: yield temp_tarpath finally: shutil.rmtree(dir_path) else: # only 1 archive, no need to do fancy actions (and no cleanup step) yield archive_paths[0] class SWHDepositReadArchives(SWHGetDepositAPI, SWHPrivateAPIView): """Dedicated class to read a deposit's raw archives content. Only GET is supported. """ ADDITIONAL_CONFIG = { 'extraction_dir': ('str', '/tmp/swh-deposit/archive/'), } def __init__(self): super().__init__() self.extraction_dir = self.config['extraction_dir'] if not os.path.exists(self.extraction_dir): os.makedirs(self.extraction_dir) def retrieve_archives(self, deposit_id): """Given a deposit identifier, returns its associated archives' path. Yields: path to deposited archives """ deposit = Deposit.objects.get(pk=deposit_id) deposit_requests = DepositRequest.objects.filter( deposit=deposit, type=self.deposit_request_types['archive']).order_by('id') for deposit_request in deposit_requests: yield deposit_request.archive.path def process_get(self, req, collection_name, deposit_id): """Build a unique tarball from the multiple received and stream that content to the client. Args: req (Request): collection_name (str): Collection owning the deposit deposit_id (id): Deposit concerned by the reading Returns: Tuple status, stream of content, content-type """ archive_paths = list(self.retrieve_archives(deposit_id)) with aggregate_tarballs(self.extraction_dir, archive_paths) as path: return FileResponse(open(path, 'rb'), status=status.HTTP_200_OK, content_type='application/octet-stream') class SWHDepositReadMetadata(SWHGetDepositAPI, SWHPrivateAPIView): """Class in charge of aggregating metadata on a deposit. """ ADDITIONAL_CONFIG = { 'provider': ('dict', { # 'provider_name': '', # those are not set since read from the # 'provider_url': '', # deposit's client 'provider_type': 'deposit_client', 'metadata': {} }), 'tool': ('dict', { 'name': 'swh-deposit', 'version': '0.0.1', 'configuration': { 'sword_version': '2' } }) } def __init__(self): super().__init__() self.provider = self.config['provider'] self.tool = self.config['tool'] def _aggregate_metadata(self, deposit, metadata_requests): """Retrieve and aggregates metadata information. """ metadata = {} for req in metadata_requests: metadata.update(req.metadata) return metadata def _retrieve_url(self, deposit, metadata): client_domain = deposit.client.domain for field in metadata: if 'url' in field: if client_domain in metadata[field]: return metadata[field] def aggregate(self, deposit, requests): """Aggregate multiple data on deposit into one unified data dictionary. Args: deposit (Deposit): Deposit concerned by the data aggregation. requests ([DepositRequest]): List of associated requests which need aggregation. Returns: Dictionary of data representing the deposit to inject in swh. """ data = {} # Retrieve tarballs/metadata information metadata = self._aggregate_metadata(deposit, requests) # create origin_url from metadata only after deposit_check validates it origin_url = self._retrieve_url(deposit, metadata) # Read information metadata data['origin'] = { 'type': 'deposit', 'url': origin_url } # revision fullname = deposit.client.username author_committer = SWH_PERSON # metadata provider self.provider['provider_name'] = deposit.client.last_name self.provider['provider_url'] = deposit.client.provider_url revision_type = 'tar' revision_msg = '%s: Deposit %s in collection %s' % ( fullname, deposit.id, deposit.collection.name) complete_date = identifiers.normalize_timestamp(deposit.complete_date) data['revision'] = { 'synthetic': True, 'date': complete_date, 'committer_date': complete_date, 'author': author_committer, 'committer': author_committer, 'type': revision_type, 'message': revision_msg, 'metadata': metadata, } if deposit.parent: swh_persistent_id = deposit.parent.swh_id persistent_identifier = identifiers.parse_persistent_identifier( swh_persistent_id) - parent_revision = persistent_identifier['object_id'] + parent_revision = persistent_identifier.object_id data['revision']['parents'] = [parent_revision] data['branch_name'] = 'master' data['origin_metadata'] = { 'provider': self.provider, 'tool': self.tool, 'metadata': metadata } return data def process_get(self, req, collection_name, deposit_id): deposit = Deposit.objects.get(pk=deposit_id) requests = DepositRequest.objects.filter( deposit=deposit, type=self.deposit_request_types['metadata']) data = self.aggregate(deposit, requests) d = {} if data: d = json.dumps(data) return status.HTTP_200_OK, d, 'application/json' diff --git a/swh/deposit/parsers.py b/swh/deposit/parsers.py index 5c4198c3..ce231c87 100644 --- a/swh/deposit/parsers.py +++ b/swh/deposit/parsers.py @@ -1,68 +1,76 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """Module in charge of defining parsers with SWORD 2.0 supported mediatypes. """ -from decimal import Decimal +import xmltodict + +from django.conf import settings +from rest_framework.parsers import BaseParser from rest_framework.parsers import FileUploadParser from rest_framework.parsers import MultiPartParser -from rest_framework_xml.parsers import XMLParser class SWHFileUploadZipParser(FileUploadParser): """File upload parser limited to zip archive. """ media_type = 'application/zip' class SWHFileUploadTarParser(FileUploadParser): - """File upload parser limited to zip archive. + """File upload parser limited to tarball (tar, tar.gz, tar.*) archives. """ media_type = 'application/x-tar' -class SWHXMLParser(XMLParser): - def _type_convert(self, value): - """Override the default type converter to avoid having decimal in the - resulting output. +class SWHXMLParser(BaseParser): + """ + XML parser. + """ + media_type = 'application/xml' + def parse(self, stream, media_type=None, parser_context=None): """ - value = super()._type_convert(value) - if isinstance(value, Decimal): - value = str(value) - - return value + Parses the incoming bytestream as XML and returns the resulting data. + """ + parser_context = parser_context or {} + encoding = parser_context.get('encoding', settings.DEFAULT_CHARSET) + data = xmltodict.parse(stream, encoding=encoding, + process_namespaces=False) + if 'entry' in data: + data = data['entry'] + return data class SWHAtomEntryParser(SWHXMLParser): """Atom entry parser limited to specific mediatype """ media_type = 'application/atom+xml;type=entry' class SWHMultiPartParser(MultiPartParser): """Multipart parser limited to a subset of mediatypes. """ media_type = 'multipart/*; *' def parse_xml(raw_content): """Parse xml body. Args: raw_content (bytes): The content to parse Returns: content parsed as dict. """ return SWHXMLParser().parse(raw_content) diff --git a/swh/deposit/tests/api/test_deposit.py b/swh/deposit/tests/api/test_deposit.py index 81c78b3d..507bafdd 100644 --- a/swh/deposit/tests/api/test_deposit.py +++ b/swh/deposit/tests/api/test_deposit.py @@ -1,163 +1,167 @@ -# Copyright (C) 2017 The Software Heritage developers +# Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import hashlib from django.core.urlresolvers import reverse from io import BytesIO from nose.tools import istest, nottest from rest_framework import status from rest_framework.test import APITestCase from swh.deposit.config import COL_IRI, EDIT_SE_IRI, DEPOSIT_STATUS_REJECTED from swh.deposit.config import DEPOSIT_STATUS_PARTIAL from swh.deposit.config import DEPOSIT_STATUS_LOAD_SUCCESS from swh.deposit.config import DEPOSIT_STATUS_LOAD_FAILURE from swh.deposit.models import Deposit, DepositClient, DepositCollection from swh.deposit.parsers import parse_xml from ..common import BasicTestCase, WithAuthTestCase, CommonCreationRoutine class DepositNoAuthCase(APITestCase, BasicTestCase): """Deposit access are protected with basic authentication. """ @istest def post_will_fail_with_401(self): """Without authentication, endpoint refuses access with 401 response """ url = reverse(COL_IRI, args=[self.collection.name]) # when response = self.client.post(url) # then self.assertEqual(response.status_code, status.HTTP_401_UNAUTHORIZED) class DepositFailuresTest(APITestCase, WithAuthTestCase, BasicTestCase, CommonCreationRoutine): """Deposit access are protected with basic authentication. """ def setUp(self): super().setUp() # Add another user _collection2 = DepositCollection(name='some') _collection2.save() _user = DepositClient.objects.create_user(username='user', password='user') _user.collections = [_collection2.id] self.collection2 = _collection2 @istest def access_to_another_user_collection_is_forbidden(self): """Access to another user collection should return a 403 """ url = reverse(COL_IRI, args=[self.collection2.name]) response = self.client.post(url) self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) + self.assertRegex(response.content.decode('utf-8'), + 'Client hal cannot access collection %s' % ( + self.collection2.name, )) @istest def delete_on_col_iri_not_supported(self): """Delete on col iri should return a 405 response """ url = reverse(COL_IRI, args=[self.collection.name]) response = self.client.delete(url) self.assertEqual(response.status_code, status.HTTP_405_METHOD_NOT_ALLOWED) + self.assertRegex(response.content.decode('utf-8'), + 'DELETE method is not supported on this endpoint') @nottest def create_deposit_with_rejection_status(self): url = reverse(COL_IRI, args=[self.collection.name]) data = b'some data which is clearly not a zip file' md5sum = hashlib.md5(data).hexdigest() external_id = 'some-external-id-1' # when response = self.client.post( url, content_type='application/zip', # as zip data=data, # + headers CONTENT_LENGTH=len(data), # other headers needs HTTP_ prefix to be taken into account HTTP_SLUG=external_id, HTTP_CONTENT_MD5=md5sum, HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') self.assertEquals(response.status_code, status.HTTP_201_CREATED) response_content = parse_xml(BytesIO(response.content)) - actual_state = response_content[ - '{http://www.w3.org/2005/Atom}deposit_status'] + actual_state = response_content['deposit_status'] self.assertEquals(actual_state, DEPOSIT_STATUS_REJECTED) @istest def act_on_deposit_rejected_is_not_permitted(self): deposit_id = self.create_deposit_with_status(DEPOSIT_STATUS_REJECTED) deposit = Deposit.objects.get(pk=deposit_id) assert deposit.status == DEPOSIT_STATUS_REJECTED response = self.client.post( reverse(EDIT_SE_IRI, args=[self.collection.name, deposit_id]), content_type='application/atom+xml;type=entry', data=self.atom_entry_data1, HTTP_SLUG='external-id') self.assertEquals(response.status_code, status.HTTP_400_BAD_REQUEST) self.assertRegex( response.content.decode('utf-8'), "You can only act on deposit with status '%s'" % ( DEPOSIT_STATUS_PARTIAL, )) @istest def add_deposit_with_parent(self): # given multiple deposit already loaded deposit_id = self.create_deposit_with_status( status=DEPOSIT_STATUS_LOAD_SUCCESS, external_id='some-external-id') deposit1 = Deposit.objects.get(pk=deposit_id) self.assertIsNotNone(deposit1) self.assertEquals(deposit1.external_id, 'some-external-id') self.assertEquals(deposit1.status, DEPOSIT_STATUS_LOAD_SUCCESS) deposit_id2 = self.create_deposit_with_status( status=DEPOSIT_STATUS_LOAD_SUCCESS, external_id='some-external-id') deposit2 = Deposit.objects.get(pk=deposit_id2) self.assertIsNotNone(deposit2) self.assertEquals(deposit2.external_id, 'some-external-id') self.assertEquals(deposit2.status, DEPOSIT_STATUS_LOAD_SUCCESS) deposit_id3 = self.create_deposit_with_status( status=DEPOSIT_STATUS_LOAD_FAILURE, external_id='some-external-id') deposit3 = Deposit.objects.get(pk=deposit_id3) self.assertIsNotNone(deposit3) self.assertEquals(deposit3.external_id, 'some-external-id') self.assertEquals(deposit3.status, DEPOSIT_STATUS_LOAD_FAILURE) # when deposit_id3 = self.create_simple_deposit_partial( external_id='some-external-id') # then deposit4 = Deposit.objects.get(pk=deposit_id3) self.assertIsNotNone(deposit4) self.assertEquals(deposit4.external_id, 'some-external-id') self.assertEquals(deposit4.status, DEPOSIT_STATUS_PARTIAL) self.assertEquals(deposit4.parent, deposit2) diff --git a/swh/deposit/tests/api/test_deposit_atom.py b/swh/deposit/tests/api/test_deposit_atom.py index 2d68ac62..09ba9a8b 100644 --- a/swh/deposit/tests/api/test_deposit_atom.py +++ b/swh/deposit/tests/api/test_deposit_atom.py @@ -1,524 +1,517 @@ -# Copyright (C) 2017 The Software Heritage developers +# Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.core.urlresolvers import reverse from io import BytesIO from nose.tools import istest from rest_framework import status from rest_framework.test import APITestCase from swh.deposit.config import COL_IRI, DEPOSIT_STATUS_DEPOSITED from swh.deposit.models import Deposit, DepositRequest from swh.deposit.parsers import parse_xml from ..common import BasicTestCase, WithAuthTestCase class DepositAtomEntryTestCase(APITestCase, WithAuthTestCase, BasicTestCase): """Try and post atom entry deposit. """ def setUp(self): super().setUp() self.atom_entry_data0 = b""" Awesome Compiler hal urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a %s 2017-10-07T15:17:08Z some awesome author something awesome-compiler This is an awesome compiler destined to awesomely compile stuff and other stuff compiler,programming,language 2005-10-07T17:17:08Z 2005-10-07T17:17:08Z release note related link Awesome https://hoster.org/awesome-compiler GNU/Linux 0.0.1 running all """ self.atom_entry_data1 = b""" hal urn:uuid:2225c695-cfb8-4ebb-aaaa-80da344efa6a 2017-10-07T15:17:08Z some awesome author something awesome-compiler This is an awesome compiler destined to awesomely compile stuff and other stuff compiler,programming,language 2005-10-07T17:17:08Z 2005-10-07T17:17:08Z release note related link Awesome https://hoster.org/awesome-compiler GNU/Linux 0.0.1 running all """ self.atom_entry_data2 = b""" %s """ self.atom_entry_data_empty_body = b""" """ self.atom_entry_data3 = b""" something """ self.atom_entry_data_atom_only = b""" Awesome Compiler urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 1785io25c695 2017-10-07T15:17:08Z some awesome author """ self.atom_entry_data_codemeta = b""" Awesome Compiler urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 1785io25c695 1785io25c695 origin url other identifier, DOI, ARK Domain description key-word 1 key-word 2 creation date publication date comment article name article id Collaboration/Projet project name id see also Sponsor A Sponsor B Platform/OS dependencies Version active license url spdx .Net Framework 3.0 Python2.3 author1 Inria UPMC author2 Inria UPMC http://code.com language 1 language 2 http://issuetracker.com """ # noqa self.atom_entry_data_dc_codemeta = b""" %s hal-01587361 https://hal.inria.fr/hal-01587361 https://hal.inria.fr/hal-01587361/document https://hal.inria.fr/hal-01587361/file/AffectationRO-v1.0.0.zip doi:10.5281/zenodo.438684 The assignment problem AffectationRO Gruenpeter, Morane [INFO] Computer Science [cs] [INFO.INFO-RO] Computer Science [cs]/Operations Research [cs.RO] SOFTWARE Project in OR: The assignment problemA java implementation for the assignment problem first release description fr 2015-06-01 2017-10-19 en url stable Version sur hal Version entre par lutilisateur Mots-cls Commentaire Rfrence interne Collaboration/Projet nom du projet id Voir aussi Financement Projet ANR Projet Europen Platform/OS Dpendances Etat du dveloppement license url spdx Outils de dveloppement- outil no1 Outils de dveloppement- outil no2 http://code.com language 1 language 2 """ # noqa self.atom_entry_tei = b"""HAL TEI export of hal-01587083CCSDDistributed under a Creative Commons Attribution 4.0 International License

HAL API platform

questionnaire software metadataMoraneGruenpeter7de56c632362954fa84172cad80afe4einria.fr1556733MoraneGruenpeterf85a43a5fb4a2e0778a77e017f28c8fdgmail.com2017-09-29 11:21:322017-10-03 17:20:132017-10-03 17:20:132017-09-292017-09-29contributorMoraneGruenpeterf85a43a5fb4a2e0778a77e017f28c8fdgmail.comCCSDhal-01587083https://hal.inria.fr/hal-01587083gruenpeter:hal-0158708320172017questionnaire software metadataMoraneGruenpeter7de56c632362954fa84172cad80afe4einria.fr1556733EnglishComputer Science [cs]SoftwareIRILLInitiative pour la Recherche et l'Innovation sur le Logiciel Libre
https://www.irill.org/
Universite Pierre et Marie Curie - Paris 6UPMC
4 place Jussieu - 75005 Paris
http://www.upmc.fr/
Institut National de Recherche en Informatique et en AutomatiqueInria
Domaine de VoluceauRocquencourt - BP 10578153 Le Chesnay Cedex
http://www.inria.fr/en/
Universite Paris Diderot - Paris 7UPD7
5 rue Thomas-Mann - 75205 Paris cedex 13
http://www.univ-paris-diderot.fr
""" # noqa self.atom_entry_data_badly_formatted = b""" """ self.atom_error_with_decimal = b""" Composing a Web of Audio Applications hal hal-01243065 hal-01243065 https://hal-test.archives-ouvertes.fr/hal-01243065 test DSP programming,Web,Composability,Faust 2017-05-03T16:08:47+02:00 The Web offers a great opportunity to share, deploy and use programs without installation difficulties. In this article we explore the idea of freely combining/composing real-time audio applications deployed on the Web using Faust audio DSP language. 1 10.4 phpstorm stable linux php python C GNU General Public License v3.0 only CeCILL Free Software License Agreement v1.1 HAL hal@ccsd.cnrs.fr Someone Nice someone@nice.fr FFJ """ # noqa @istest def post_deposit_atom_entry_serialization_error(self): """Posting an initial atom entry should return 201 with deposit receipt """ # given # when response = self.client.post( reverse(COL_IRI, args=[self.collection.name]), content_type='application/atom+xml;type=entry', data=self.atom_error_with_decimal, HTTP_SLUG='external-id', HTTP_IN_PROGRESS='false') # then self.assertEqual(response.status_code, status.HTTP_201_CREATED) response_content = parse_xml(BytesIO(response.content)) - deposit_id = response_content[ - '{http://www.w3.org/2005/Atom}deposit_id'] + deposit_id = response_content['deposit_id'] deposit = Deposit.objects.get(pk=deposit_id) dr = DepositRequest.objects.get(deposit=deposit) self.assertIsNotNone(dr.metadata) - sw_version = dr.metadata.get( - '{https://doi.org/10.5063/SCHEMA/CODEMETA-2.0}softwareVersion') + sw_version = dr.metadata.get('codemeta:softwareVersion') self.assertEquals(sw_version, '10.4') @istest def post_deposit_atom_empty_body_request(self): """Posting empty body request should return a 400 response """ response = self.client.post( reverse(COL_IRI, args=[self.collection.name]), content_type='application/atom+xml;type=entry', data=self.atom_entry_data_empty_body) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) @istest def post_deposit_atom_badly_formatted_is_a_bad_request(self): """Posting a badly formatted atom should return a 400 response """ response = self.client.post( reverse(COL_IRI, args=[self.collection.name]), content_type='application/atom+xml;type=entry', data=self.atom_entry_data_badly_formatted) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) @istest def post_deposit_atom_without_slug_header_is_bad_request(self): """Posting an atom entry without a slug header should return a 400 """ url = reverse(COL_IRI, args=[self.collection.name]) # when response = self.client.post( url, content_type='application/atom+xml;type=entry', data=self.atom_entry_data0, # + headers HTTP_IN_PROGRESS='false') self.assertIn(b'Missing SLUG header', response.content) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) @istest def post_deposit_atom_unknown_collection(self): """Posting an atom entry to an unknown collection should return a 404 """ response = self.client.post( reverse(COL_IRI, args=['unknown-one']), content_type='application/atom+xml;type=entry', data=self.atom_entry_data3, HTTP_SLUG='something') self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) @istest def post_deposit_atom_entry_initial(self): """Posting an initial atom entry should return 201 with deposit receipt """ # given external_id = 'urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a' with self.assertRaises(Deposit.DoesNotExist): Deposit.objects.get(external_id=external_id) atom_entry_data = self.atom_entry_data0 % external_id.encode('utf-8') # when response = self.client.post( reverse(COL_IRI, args=[self.collection.name]), content_type='application/atom+xml;type=entry', data=atom_entry_data, HTTP_SLUG='external-id', HTTP_IN_PROGRESS='false') # then self.assertEqual(response.status_code, status.HTTP_201_CREATED) response_content = parse_xml(BytesIO(response.content)) - deposit_id = response_content[ - '{http://www.w3.org/2005/Atom}deposit_id'] + deposit_id = response_content['deposit_id'] deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(deposit.collection, self.collection) self.assertEqual(deposit.external_id, external_id) self.assertEqual(deposit.status, DEPOSIT_STATUS_DEPOSITED) self.assertEqual(deposit.client, self.user) # one associated request to a deposit deposit_request = DepositRequest.objects.get(deposit=deposit) self.assertIsNotNone(deposit_request.metadata) self.assertFalse(bool(deposit_request.archive)) @istest def post_deposit_atom_entry_with_codemeta(self): """Posting an initial atom entry should return 201 with deposit receipt """ # given external_id = 'urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a' with self.assertRaises(Deposit.DoesNotExist): Deposit.objects.get(external_id=external_id) atom_entry_data = self.atom_entry_data_dc_codemeta % ( external_id.encode('utf-8'), ) # when response = self.client.post( reverse(COL_IRI, args=[self.collection.name]), content_type='application/atom+xml;type=entry', data=atom_entry_data, HTTP_SLUG='external-id', HTTP_IN_PROGRESS='false') # then self.assertEqual(response.status_code, status.HTTP_201_CREATED) response_content = parse_xml(BytesIO(response.content)) - deposit_id = response_content[ - '{http://www.w3.org/2005/Atom}deposit_id'] + deposit_id = response_content['deposit_id'] deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(deposit.collection, self.collection) self.assertEqual(deposit.external_id, external_id) self.assertEqual(deposit.status, DEPOSIT_STATUS_DEPOSITED) self.assertEqual(deposit.client, self.user) # one associated request to a deposit deposit_request = DepositRequest.objects.get(deposit=deposit) self.assertIsNotNone(deposit_request.metadata) self.assertFalse(bool(deposit_request.archive)) @istest def test_post_deposit_atom_entry_tei(self): """Posting initial atom entry as TEI should return 201 with receipt """ # given external_id = 'urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a' with self.assertRaises(Deposit.DoesNotExist): Deposit.objects.get(external_id=external_id) atom_entry_data = self.atom_entry_tei # when response = self.client.post( reverse(COL_IRI, args=[self.collection.name]), content_type='application/atom+xml;type=entry', data=atom_entry_data, HTTP_SLUG=external_id, HTTP_IN_PROGRESS='false') # then self.assertEqual(response.status_code, status.HTTP_201_CREATED) response_content = parse_xml(BytesIO(response.content)) - deposit_id = response_content[ - '{http://www.w3.org/2005/Atom}deposit_id'] + deposit_id = response_content['deposit_id'] deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(deposit.collection, self.collection) self.assertEqual(deposit.external_id, external_id) self.assertEqual(deposit.status, DEPOSIT_STATUS_DEPOSITED) self.assertEqual(deposit.client, self.user) # one associated request to a deposit deposit_request = DepositRequest.objects.get(deposit=deposit) self.assertIsNotNone(deposit_request.metadata) self.assertFalse(bool(deposit_request.archive)) @istest def post_deposit_atom_entry_multiple_steps(self): """After initial deposit, updating a deposit should return a 201 """ # given external_id = 'urn:uuid:2225c695-cfb8-4ebb-aaaa-80da344efa6a' with self.assertRaises(Deposit.DoesNotExist): deposit = Deposit.objects.get(external_id=external_id) # when response = self.client.post( reverse(COL_IRI, args=[self.collection.name]), content_type='application/atom+xml;type=entry', data=self.atom_entry_data1, HTTP_IN_PROGRESS='True', HTTP_SLUG=external_id) # then self.assertEqual(response.status_code, status.HTTP_201_CREATED) response_content = parse_xml(BytesIO(response.content)) - deposit_id = response_content[ - '{http://www.w3.org/2005/Atom}deposit_id'] + deposit_id = int(response_content['deposit_id']) deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(deposit.collection, self.collection) self.assertEqual(deposit.external_id, external_id) self.assertEqual(deposit.status, 'partial') self.assertEqual(deposit.client, self.user) # one associated request to a deposit deposit_requests = DepositRequest.objects.filter(deposit=deposit) self.assertEqual(len(deposit_requests), 1) atom_entry_data = self.atom_entry_data2 % external_id.encode('utf-8') update_uri = response._headers['location'][1] # when updating the first deposit post response = self.client.post( update_uri, content_type='application/atom+xml;type=entry', data=atom_entry_data, HTTP_IN_PROGRESS='False') # then self.assertEqual(response.status_code, status.HTTP_201_CREATED) response_content = parse_xml(BytesIO(response.content)) - deposit_id = response_content[ - '{http://www.w3.org/2005/Atom}deposit_id'] + deposit_id = int(response_content['deposit_id']) deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(deposit.collection, self.collection) self.assertEqual(deposit.external_id, external_id) self.assertEqual(deposit.status, DEPOSIT_STATUS_DEPOSITED) self.assertEqual(deposit.client, self.user) self.assertEqual(len(Deposit.objects.all()), 1) # now 2 associated requests to a same deposit deposit_requests = DepositRequest.objects.filter(deposit=deposit) self.assertEqual(len(deposit_requests), 2) for deposit_request in deposit_requests: actual_metadata = deposit_request.metadata self.assertIsNotNone(actual_metadata) self.assertFalse(bool(deposit_request.archive)) diff --git a/swh/deposit/tests/api/test_deposit_binary.py b/swh/deposit/tests/api/test_deposit_binary.py index 89d876c8..e28ba807 100644 --- a/swh/deposit/tests/api/test_deposit_binary.py +++ b/swh/deposit/tests/api/test_deposit_binary.py @@ -1,660 +1,652 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.core.files.uploadedfile import InMemoryUploadedFile from django.core.urlresolvers import reverse from io import BytesIO from nose.tools import istest from rest_framework import status from rest_framework.test import APITestCase from swh.deposit.tests import TEST_CONFIG from swh.deposit.config import COL_IRI, EM_IRI from swh.deposit.config import DEPOSIT_STATUS_DEPOSITED from swh.deposit.models import Deposit, DepositRequest from swh.deposit.parsers import parse_xml from ..common import BasicTestCase, WithAuthTestCase, create_arborescence_zip from ..common import FileSystemCreationRoutine class DepositTestCase(APITestCase, WithAuthTestCase, BasicTestCase, FileSystemCreationRoutine): """Try and upload one single deposit """ def setUp(self): super().setUp() self.atom_entry_data0 = b""" Awesome Compiler hal urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a %s 2017-10-07T15:17:08Z some awesome author something awesome-compiler This is an awesome compiler destined to awesomely compile stuff and other stuff compiler,programming,language 2005-10-07T17:17:08Z 2005-10-07T17:17:08Z release note related link Awesome https://hoster.org/awesome-compiler GNU/Linux 0.0.1 running all """ self.atom_entry_data1 = b""" hal urn:uuid:2225c695-cfb8-4ebb-aaaa-80da344efa6a 2017-10-07T15:17:08Z some awesome author something awesome-compiler This is an awesome compiler destined to awesomely compile stuff and other stuff compiler,programming,language 2005-10-07T17:17:08Z 2005-10-07T17:17:08Z release note related link Awesome https://hoster.org/awesome-compiler GNU/Linux 0.0.1 running all """ self.atom_entry_data2 = b""" %s """ self.atom_entry_data_empty_body = b""" """ self.atom_entry_data3 = b""" something """ self.data_atom_entry_ok = b""" Title urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 2005-10-07T17:17:08Z Contributor The abstract The abstract Access Rights Alternative Title Date Available Bibliographic Citation # noqa Contributor Description Has Part Has Version Identifier Is Part Of Publisher References Rights Holder Source Title Type """ @istest def post_deposit_binary_without_slug_header_is_bad_request(self): """Posting a binary deposit without slug header should return 400 """ url = reverse(COL_IRI, args=[self.collection.name]) # when response = self.client.post( url, content_type='application/zip', # as zip data=self.archive['data'], # + headers CONTENT_LENGTH=self.archive['length'], HTTP_CONTENT_MD5=self.archive['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS='false', HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') self.assertIn(b'Missing SLUG header', response.content) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) @istest def post_deposit_binary_upload_final_and_status_check(self): """Binary upload with correct headers should return 201 with receipt """ # given url = reverse(COL_IRI, args=[self.collection.name]) external_id = 'some-external-id-1' # when response = self.client.post( url, content_type='application/zip', # as zip data=self.archive['data'], # + headers CONTENT_LENGTH=self.archive['length'], # other headers needs HTTP_ prefix to be taken into account HTTP_SLUG=external_id, HTTP_CONTENT_MD5=self.archive['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS='false', HTTP_CONTENT_DISPOSITION='attachment; filename=%s' % ( self.archive['name'], )) # then response_content = parse_xml(BytesIO(response.content)) self.assertEqual(response.status_code, status.HTTP_201_CREATED) - deposit_id = response_content[ - '{http://www.w3.org/2005/Atom}deposit_id'] + deposit_id = response_content['deposit_id'] deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(deposit.status, DEPOSIT_STATUS_DEPOSITED) self.assertEqual(deposit.external_id, external_id) self.assertEqual(deposit.collection, self.collection) self.assertEqual(deposit.client, self.user) self.assertIsNone(deposit.swh_id) deposit_request = DepositRequest.objects.get(deposit=deposit) self.assertEquals(deposit_request.deposit, deposit) self.assertRegex(deposit_request.archive.name, self.archive['name']) response_content = parse_xml(BytesIO(response.content)) - self.assertEqual( - response_content['{http://www.w3.org/2005/Atom}deposit_archive'], - self.archive['name']) - self.assertEqual( - response_content['{http://www.w3.org/2005/Atom}deposit_id'], - deposit.id) - self.assertEqual( - response_content['{http://www.w3.org/2005/Atom}deposit_status'], - deposit.status) + self.assertEqual(response_content['deposit_archive'], + self.archive['name']) + self.assertEqual(int(response_content['deposit_id']), + deposit.id) + self.assertEqual(response_content['deposit_status'], + deposit.status) edit_se_iri = reverse('edit_se_iri', args=[self.collection.name, deposit.id]) self.assertEqual(response._headers['location'], ('Location', 'http://testserver' + edit_se_iri)) @istest def post_deposit_binary_upload_supports_zip_or_tar(self): """Binary upload with content-type not in [zip,x-tar] should return 415 """ # given url = reverse(COL_IRI, args=[self.collection.name]) external_id = 'some-external-id-1' # when response = self.client.post( url, content_type='application/octet-stream', data=self.archive['data'], # + headers CONTENT_LENGTH=self.archive['length'], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=self.archive['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS='false', HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') # then self.assertEqual(response.status_code, status.HTTP_415_UNSUPPORTED_MEDIA_TYPE) with self.assertRaises(Deposit.DoesNotExist): Deposit.objects.get(external_id=external_id) @istest def post_deposit_binary_fails_if_unsupported_packaging_header( self): """Bin deposit without supported content_disposition header returns 400 """ # given url = reverse(COL_IRI, args=[self.collection.name]) external_id = 'some-external-id' # when response = self.client.post( url, content_type='application/zip', data=self.archive['data'], # + headers CONTENT_LENGTH=self.archive['length'], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=self.archive['md5sum'], HTTP_PACKAGING='something-unsupported', HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') # then self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) with self.assertRaises(Deposit.DoesNotExist): Deposit.objects.get(external_id=external_id) @istest def post_deposit_binary_upload_fail_if_no_content_disposition_header( self): """Binary upload without content_disposition header should return 400 """ # given url = reverse(COL_IRI, args=[self.collection.name]) external_id = 'some-external-id' # when response = self.client.post( url, content_type='application/zip', data=self.archive['data'], # + headers CONTENT_LENGTH=self.archive['length'], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=self.archive['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS='false') # then self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) with self.assertRaises(Deposit.DoesNotExist): Deposit.objects.get(external_id=external_id) @istest def post_deposit_mediation_not_supported(self): """Binary upload with mediation should return a 412 response """ # given url = reverse(COL_IRI, args=[self.collection.name]) external_id = 'some-external-id-1' # when response = self.client.post( url, content_type='application/zip', data=self.archive['data'], # + headers CONTENT_LENGTH=self.archive['length'], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=self.archive['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS='false', HTTP_ON_BEHALF_OF='someone', HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') # then self.assertEqual(response.status_code, status.HTTP_412_PRECONDITION_FAILED) with self.assertRaises(Deposit.DoesNotExist): Deposit.objects.get(external_id=external_id) @istest def post_deposit_binary_upload_fail_if_upload_size_limit_exceeded( self): """Binary upload must not exceed the limit set up... """ # given url = reverse(COL_IRI, args=[self.collection.name]) archive = create_arborescence_zip( self.root_path, 'archive2', 'file2', b'some content in file', up_to_size=TEST_CONFIG['max_upload_size']) external_id = 'some-external-id' # when response = self.client.post( url, content_type='application/zip', data=archive['data'], # + headers CONTENT_LENGTH=archive['length'], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=archive['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS='false', HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') # then self.assertEqual(response.status_code, status.HTTP_413_REQUEST_ENTITY_TOO_LARGE) self.assertRegex(response.content, b'Upload size limit exceeded') with self.assertRaises(Deposit.DoesNotExist): Deposit.objects.get(external_id=external_id) @istest def post_deposit_2_post_2_different_deposits(self): """2 posting deposits should return 2 different 201 with receipt """ url = reverse(COL_IRI, args=[self.collection.name]) # when response = self.client.post( url, content_type='application/zip', # as zip data=self.archive['data'], # + headers CONTENT_LENGTH=self.archive['length'], HTTP_SLUG='some-external-id-1', HTTP_CONTENT_MD5=self.archive['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS='false', HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') # then self.assertEqual(response.status_code, status.HTTP_201_CREATED) response_content = parse_xml(BytesIO(response.content)) - deposit_id = response_content[ - '{http://www.w3.org/2005/Atom}deposit_id'] + deposit_id = response_content['deposit_id'] deposit = Deposit.objects.get(pk=deposit_id) deposits = Deposit.objects.all() self.assertEqual(len(deposits), 1) self.assertEqual(deposits[0], deposit) # second post response = self.client.post( url, content_type='application/x-tar', # as zip data=self.archive['data'], # + headers CONTENT_LENGTH=self.archive['length'], HTTP_SLUG='another-external-id', HTTP_CONTENT_MD5=self.archive['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS='false', HTTP_CONTENT_DISPOSITION='attachment; filename=filename1') self.assertEqual(response.status_code, status.HTTP_201_CREATED) response_content = parse_xml(BytesIO(response.content)) - deposit_id2 = response_content[ - '{http://www.w3.org/2005/Atom}deposit_id'] + deposit_id2 = response_content['deposit_id'] deposit2 = Deposit.objects.get(pk=deposit_id2) self.assertNotEqual(deposit, deposit2) deposits = Deposit.objects.all().order_by('id') self.assertEqual(len(deposits), 2) self.assertEqual(list(deposits), [deposit, deposit2]) @istest def post_deposit_binary_and_post_to_add_another_archive(self): """Updating a deposit should return a 201 with receipt """ # given url = reverse(COL_IRI, args=[self.collection.name]) external_id = 'some-external-id-1' # when response = self.client.post( url, content_type='application/zip', # as zip data=self.archive['data'], # + headers CONTENT_LENGTH=self.archive['length'], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=self.archive['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS='true', HTTP_CONTENT_DISPOSITION='attachment; filename=%s' % ( self.archive['name'], )) # then self.assertEqual(response.status_code, status.HTTP_201_CREATED) response_content = parse_xml(BytesIO(response.content)) - deposit_id = response_content[ - '{http://www.w3.org/2005/Atom}deposit_id'] + deposit_id = response_content['deposit_id'] deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(deposit.status, 'partial') self.assertEqual(deposit.external_id, external_id) self.assertEqual(deposit.collection, self.collection) self.assertEqual(deposit.client, self.user) self.assertIsNone(deposit.swh_id) deposit_request = DepositRequest.objects.get(deposit=deposit) self.assertEquals(deposit_request.deposit, deposit) self.assertEquals(deposit_request.type.name, 'archive') self.assertRegex(deposit_request.archive.name, self.archive['name']) # 2nd archive to upload archive2 = create_arborescence_zip( self.root_path, 'archive2', 'file2', b'some other content in file') # uri to update the content update_uri = reverse(EM_IRI, args=[self.collection.name, deposit_id]) # adding another archive for the deposit and finalizing it response = self.client.post( update_uri, content_type='application/zip', # as zip data=archive2['data'], # + headers CONTENT_LENGTH=archive2['length'], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=archive2['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_CONTENT_DISPOSITION='attachment; filename=%s' % ( archive2['name'])) self.assertEqual(response.status_code, status.HTTP_201_CREATED) response_content = parse_xml(BytesIO(response.content)) deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(deposit.status, DEPOSIT_STATUS_DEPOSITED) self.assertEqual(deposit.external_id, external_id) self.assertEqual(deposit.collection, self.collection) self.assertEqual(deposit.client, self.user) self.assertIsNone(deposit.swh_id) deposit_requests = list(DepositRequest.objects.filter(deposit=deposit). order_by('id')) # 2 deposit requests for the same deposit self.assertEquals(len(deposit_requests), 2) self.assertEquals(deposit_requests[0].deposit, deposit) self.assertEquals(deposit_requests[0].type.name, 'archive') self.assertRegex(deposit_requests[0].archive.name, self.archive['name']) self.assertEquals(deposit_requests[1].deposit, deposit) self.assertEquals(deposit_requests[1].type.name, 'archive') self.assertRegex(deposit_requests[1].archive.name, archive2['name']) # only 1 deposit in db deposits = Deposit.objects.all() self.assertEqual(len(deposits), 1) @istest def post_deposit_then_post_or_put_is_refused_when_status_ready(self): """Updating a deposit with status 'ready' should return a 400 """ url = reverse(COL_IRI, args=[self.collection.name]) external_id = 'some-external-id-1' # when response = self.client.post( url, content_type='application/zip', # as zip data=self.archive['data'], # + headers CONTENT_LENGTH=self.archive['length'], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=self.archive['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS='false', HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') # then self.assertEqual(response.status_code, status.HTTP_201_CREATED) response_content = parse_xml(BytesIO(response.content)) - deposit_id = response_content[ - '{http://www.w3.org/2005/Atom}deposit_id'] + deposit_id = response_content['deposit_id'] deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(deposit.status, DEPOSIT_STATUS_DEPOSITED) self.assertEqual(deposit.external_id, external_id) self.assertEqual(deposit.collection, self.collection) self.assertEqual(deposit.client, self.user) self.assertIsNone(deposit.swh_id) deposit_request = DepositRequest.objects.get(deposit=deposit) self.assertEquals(deposit_request.deposit, deposit) self.assertRegex(deposit_request.archive.name, 'filename0') # updating/adding is forbidden # uri to update the content edit_se_iri = reverse( 'edit_se_iri', args=[self.collection.name, deposit_id]) em_iri = reverse( 'em_iri', args=[self.collection.name, deposit_id]) # Testing all update/add endpoint should fail # since the status is ready archive2 = create_arborescence_zip( self.root_path, 'archive2', 'file2', b'some content in file 2') # replacing file is no longer possible since the deposit's # status is ready r = self.client.put( em_iri, content_type='application/zip', data=archive2['data'], CONTENT_LENGTH=archive2['length'], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=archive2['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS='false', HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') self.assertEquals(r.status_code, status.HTTP_400_BAD_REQUEST) # adding file is no longer possible since the deposit's status # is ready r = self.client.post( em_iri, content_type='application/zip', data=archive2['data'], CONTENT_LENGTH=archive2['length'], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=archive2['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS='false', HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') self.assertEquals(r.status_code, status.HTTP_400_BAD_REQUEST) # replacing metadata is no longer possible since the deposit's # status is ready r = self.client.put( edit_se_iri, content_type='application/atom+xml;type=entry', data=self.data_atom_entry_ok, CONTENT_LENGTH=len(self.data_atom_entry_ok), HTTP_SLUG=external_id) self.assertEquals(r.status_code, status.HTTP_400_BAD_REQUEST) # adding new metadata is no longer possible since the # deposit's status is ready r = self.client.post( edit_se_iri, content_type='application/atom+xml;type=entry', data=self.data_atom_entry_ok, CONTENT_LENGTH=len(self.data_atom_entry_ok), HTTP_SLUG=external_id) self.assertEquals(r.status_code, status.HTTP_400_BAD_REQUEST) archive_content = b'some content representing archive' archive = InMemoryUploadedFile( BytesIO(archive_content), field_name='archive0', name='archive0', content_type='application/zip', size=len(archive_content), charset=None) atom_entry = InMemoryUploadedFile( BytesIO(self.data_atom_entry_ok), field_name='atom0', name='atom0', content_type='application/atom+xml; charset="utf-8"', size=len(self.data_atom_entry_ok), charset='utf-8') # replacing multipart metadata is no longer possible since the # deposit's status is ready r = self.client.put( edit_se_iri, format='multipart', data={ 'archive': archive, 'atom_entry': atom_entry, }) self.assertEquals(r.status_code, status.HTTP_400_BAD_REQUEST) # adding new metadata is no longer possible since the # deposit's status is ready r = self.client.post( edit_se_iri, format='multipart', data={ 'archive': archive, 'atom_entry': atom_entry, }) self.assertEquals(r.status_code, status.HTTP_400_BAD_REQUEST) diff --git a/swh/deposit/tests/api/test_deposit_delete.py b/swh/deposit/tests/api/test_deposit_delete.py index a0a10aa1..721d3128 100644 --- a/swh/deposit/tests/api/test_deposit_delete.py +++ b/swh/deposit/tests/api/test_deposit_delete.py @@ -1,119 +1,119 @@ -# Copyright (C) 2017 The Software Heritage developers +# Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.core.urlresolvers import reverse from nose.tools import istest from rest_framework import status from rest_framework.test import APITestCase from swh.deposit.config import EDIT_SE_IRI, EM_IRI, ARCHIVE_KEY, METADATA_KEY from swh.deposit.config import DEPOSIT_STATUS_DEPOSITED from swh.deposit.models import Deposit, DepositRequest from ..common import BasicTestCase, WithAuthTestCase, CommonCreationRoutine class DepositDeleteTest(APITestCase, WithAuthTestCase, BasicTestCase, CommonCreationRoutine): @istest def delete_archive_on_partial_deposit_works(self): """Removing partial deposit's archive should return a 204 response """ # given deposit_id = self.create_deposit_partial() deposit = Deposit.objects.get(pk=deposit_id) deposit_requests = DepositRequest.objects.filter(deposit=deposit) self.assertEquals(len(deposit_requests), 2) for dr in deposit_requests: if dr.type.name == ARCHIVE_KEY: continue elif dr.type.name == METADATA_KEY: continue else: self.fail('only archive and metadata type should exist ' 'in this test context') # when update_uri = reverse(EM_IRI, args=[self.collection.name, deposit_id]) response = self.client.delete(update_uri) # then self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) deposit = Deposit.objects.get(pk=deposit_id) requests = list(DepositRequest.objects.filter(deposit=deposit)) self.assertEquals(len(requests), 2) self.assertEquals(requests[0].type.name, 'metadata') self.assertEquals(requests[1].type.name, 'metadata') @istest def delete_archive_on_undefined_deposit_fails(self): """Delete undefined deposit returns a 404 response """ # when update_uri = reverse(EM_IRI, args=[self.collection.name, 999]) response = self.client.delete(update_uri) # then self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) @istest def delete_archive_on_non_partial_deposit_fails(self): """Delete !partial status deposit should return a 400 response""" deposit_id = self.create_deposit_ready() deposit = Deposit.objects.get(pk=deposit_id) self.assertEquals(deposit.status, DEPOSIT_STATUS_DEPOSITED) # when update_uri = reverse(EM_IRI, args=[self.collection.name, deposit_id]) response = self.client.delete(update_uri) # then self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) deposit = Deposit.objects.get(pk=deposit_id) self.assertIsNotNone(deposit) @istest def delete_partial_deposit_works(self): """Delete deposit should return a 204 response """ # given deposit_id = self.create_simple_deposit_partial() deposit = Deposit.objects.get(pk=deposit_id) assert deposit.id == deposit_id # when url = reverse(EDIT_SE_IRI, args=[self.collection.name, deposit_id]) response = self.client.delete(url) # then self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) deposit_requests = list(DepositRequest.objects.filter(deposit=deposit)) self.assertEquals(deposit_requests, []) deposits = list(Deposit.objects.filter(pk=deposit_id)) self.assertEquals(deposits, []) @istest def delete_on_edit_se_iri_cannot_delete_non_partial_deposit(self): """Delete !partial deposit should return a 400 response """ # given deposit_id = self.create_deposit_ready() deposit = Deposit.objects.get(pk=deposit_id) assert deposit.id == deposit_id # when url = reverse(EDIT_SE_IRI, args=[self.collection.name, deposit_id]) response = self.client.delete(url) # then self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) deposit = Deposit.objects.get(pk=deposit_id) self.assertIsNotNone(deposit) diff --git a/swh/deposit/tests/api/test_deposit_multipart.py b/swh/deposit/tests/api/test_deposit_multipart.py index cee1c2b6..a53b5696 100644 --- a/swh/deposit/tests/api/test_deposit_multipart.py +++ b/swh/deposit/tests/api/test_deposit_multipart.py @@ -1,393 +1,395 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.core.files.uploadedfile import InMemoryUploadedFile from django.core.urlresolvers import reverse from io import BytesIO from nose.tools import istest from rest_framework import status from rest_framework.test import APITestCase from swh.deposit.config import COL_IRI from swh.deposit.config import DEPOSIT_STATUS_DEPOSITED from swh.deposit.models import Deposit, DepositRequest from swh.deposit.parsers import parse_xml from ..common import BasicTestCase, WithAuthTestCase from ..common import FileSystemCreationRoutine class DepositMultipartTestCase(APITestCase, WithAuthTestCase, BasicTestCase, FileSystemCreationRoutine): """Post multipart deposit scenario """ def setUp(self): super().setUp() self.data_atom_entry_ok = b""" Title urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 2005-10-07T17:17:08Z Contributor The abstract The abstract Access Rights Alternative Title Date Available Bibliographic Citation # noqa Contributor Description Has Part Has Version Identifier Is Part Of Publisher References Rights Holder Source Title Type """ self.data_atom_entry_update_in_place = """ urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa7b Title Type """ @istest def post_deposit_multipart_without_slug_header_is_bad_request(self): # given url = reverse(COL_IRI, args=[self.collection.name]) data_atom_entry = self.data_atom_entry_ok archive_content = b'some content representing archive' archive = InMemoryUploadedFile( BytesIO(archive_content), field_name='archive0', name='archive0', content_type='application/zip', size=len(archive_content), charset=None) atom_entry = InMemoryUploadedFile( BytesIO(data_atom_entry), field_name='atom0', name='atom0', content_type='application/atom+xml; charset="utf-8"', size=len(data_atom_entry), charset='utf-8') # when response = self.client.post( url, format='multipart', data={ 'archive': archive, 'atom_entry': atom_entry, }, # + headers HTTP_IN_PROGRESS='false') self.assertIn(b'Missing SLUG header', response.content) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) @istest def post_deposit_multipart_zip(self): """one multipart deposit (zip+xml) should be accepted """ # given url = reverse(COL_IRI, args=[self.collection.name]) # from django.core.files import uploadedfile data_atom_entry = self.data_atom_entry_ok archive = InMemoryUploadedFile( BytesIO(self.archive['data']), field_name=self.archive['name'], name=self.archive['name'], content_type='application/zip', size=self.archive['length'], charset=None) atom_entry = InMemoryUploadedFile( BytesIO(data_atom_entry), field_name='atom0', name='atom0', content_type='application/atom+xml; charset="utf-8"', size=len(data_atom_entry), charset='utf-8') external_id = 'external-id' # when response = self.client.post( url, format='multipart', data={ 'archive': archive, 'atom_entry': atom_entry, }, # + headers HTTP_IN_PROGRESS='false', HTTP_SLUG=external_id) # then self.assertEqual(response.status_code, status.HTTP_201_CREATED) response_content = parse_xml(BytesIO(response.content)) - deposit_id = response_content[ - '{http://www.w3.org/2005/Atom}deposit_id'] + deposit_id = response_content['deposit_id'] deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(deposit.status, DEPOSIT_STATUS_DEPOSITED) self.assertEqual(deposit.external_id, external_id) self.assertEqual(deposit.collection, self.collection) self.assertEqual(deposit.client, self.user) self.assertIsNone(deposit.swh_id) deposit_requests = DepositRequest.objects.filter(deposit=deposit) self.assertEquals(len(deposit_requests), 2) for deposit_request in deposit_requests: self.assertEquals(deposit_request.deposit, deposit) if deposit_request.type.name == 'archive': self.assertRegex(deposit_request.archive.name, self.archive['name']) else: self.assertEquals( - deposit_request.metadata[ - '{http://www.w3.org/2005/Atom}id'], + deposit_request.metadata['id'], 'urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a') @istest def post_deposit_multipart_tar(self): """one multipart deposit (tar+xml) should be accepted """ # given url = reverse(COL_IRI, args=[self.collection.name]) # from django.core.files import uploadedfile data_atom_entry = self.data_atom_entry_ok archive = InMemoryUploadedFile( BytesIO(self.archive['data']), field_name=self.archive['name'], name=self.archive['name'], content_type='application/x-tar', size=self.archive['length'], charset=None) atom_entry = InMemoryUploadedFile( BytesIO(data_atom_entry), field_name='atom0', name='atom0', content_type='application/atom+xml; charset="utf-8"', size=len(data_atom_entry), charset='utf-8') external_id = 'external-id' # when response = self.client.post( url, format='multipart', data={ 'archive': archive, 'atom_entry': atom_entry, }, # + headers HTTP_IN_PROGRESS='false', HTTP_SLUG=external_id) # then self.assertEqual(response.status_code, status.HTTP_201_CREATED) response_content = parse_xml(BytesIO(response.content)) - deposit_id = response_content[ - '{http://www.w3.org/2005/Atom}deposit_id'] + deposit_id = response_content['deposit_id'] deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(deposit.status, DEPOSIT_STATUS_DEPOSITED) self.assertEqual(deposit.external_id, external_id) self.assertEqual(deposit.collection, self.collection) self.assertEqual(deposit.client, self.user) self.assertIsNone(deposit.swh_id) deposit_requests = DepositRequest.objects.filter(deposit=deposit) self.assertEquals(len(deposit_requests), 2) for deposit_request in deposit_requests: self.assertEquals(deposit_request.deposit, deposit) if deposit_request.type.name == 'archive': self.assertRegex(deposit_request.archive.name, self.archive['name']) else: self.assertEquals( - deposit_request.metadata[ - '{http://www.w3.org/2005/Atom}id'], + deposit_request.metadata['id'], 'urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a') @istest def post_deposit_multipart_put_to_replace_metadata(self): """One multipart deposit followed by a metadata update should be accepted """ # given url = reverse(COL_IRI, args=[self.collection.name]) data_atom_entry = self.data_atom_entry_ok archive = InMemoryUploadedFile( BytesIO(self.archive['data']), field_name=self.archive['name'], name=self.archive['name'], content_type='application/zip', size=self.archive['length'], charset=None) atom_entry = InMemoryUploadedFile( BytesIO(data_atom_entry), field_name='atom0', name='atom0', content_type='application/atom+xml; charset="utf-8"', size=len(data_atom_entry), charset='utf-8') external_id = 'external-id' # when response = self.client.post( url, format='multipart', data={ 'archive': archive, 'atom_entry': atom_entry, }, # + headers HTTP_IN_PROGRESS='true', HTTP_SLUG=external_id) # then self.assertEqual(response.status_code, status.HTTP_201_CREATED) response_content = parse_xml(BytesIO(response.content)) - deposit_id = response_content[ - '{http://www.w3.org/2005/Atom}deposit_id'] + deposit_id = response_content['deposit_id'] deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(deposit.status, 'partial') self.assertEqual(deposit.external_id, external_id) self.assertEqual(deposit.collection, self.collection) self.assertEqual(deposit.client, self.user) self.assertIsNone(deposit.swh_id) deposit_requests = DepositRequest.objects.filter(deposit=deposit) self.assertEquals(len(deposit_requests), 2) for deposit_request in deposit_requests: self.assertEquals(deposit_request.deposit, deposit) if deposit_request.type.name == 'archive': self.assertRegex(deposit_request.archive.name, self.archive['name']) else: self.assertEquals( - deposit_request.metadata[ - '{http://www.w3.org/2005/Atom}id'], + deposit_request.metadata['id'], 'urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a') replace_metadata_uri = response._headers['location'][1] response = self.client.put( replace_metadata_uri, content_type='application/atom+xml;type=entry', data=self.data_atom_entry_update_in_place, HTTP_IN_PROGRESS='false') self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) # deposit_id did not change deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(deposit.status, DEPOSIT_STATUS_DEPOSITED) self.assertEqual(deposit.external_id, external_id) self.assertEqual(deposit.collection, self.collection) self.assertEqual(deposit.client, self.user) self.assertIsNone(deposit.swh_id) deposit_requests = DepositRequest.objects.filter(deposit=deposit) self.assertEquals(len(deposit_requests), 2) for deposit_request in deposit_requests: self.assertEquals(deposit_request.deposit, deposit) if deposit_request.type.name == 'archive': self.assertRegex(deposit_request.archive.name, self.archive['name']) else: self.assertEquals( - deposit_request.metadata[ - '{http://www.w3.org/2005/Atom}id'], + deposit_request.metadata['id'], 'urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa7b') # FAILURE scenarios @istest def post_deposit_multipart_only_archive_and_atom_entry(self): """Multipart deposit only accepts one archive and one atom+xml""" # given url = reverse(COL_IRI, args=[self.collection.name]) archive_content = b'some content representing archive' archive = InMemoryUploadedFile(BytesIO(archive_content), field_name='archive0', name='archive0', content_type='application/x-tar', size=len(archive_content), charset=None) other_archive_content = b"some-other-content" other_archive = InMemoryUploadedFile(BytesIO(other_archive_content), field_name='atom0', name='atom0', content_type='application/x-tar', size=len(other_archive_content), charset='utf-8') # when response = self.client.post( url, format='multipart', data={ 'archive': archive, 'atom_entry': other_archive, }, # + headers HTTP_IN_PROGRESS='false', HTTP_SLUG='external-id') # then self.assertEqual(response.status_code, status.HTTP_415_UNSUPPORTED_MEDIA_TYPE) + self.assertTrue( + 'Only 1 application/zip (or application/x-tar) archive' in + response.content.decode('utf-8')) + # when archive.seek(0) response = self.client.post( url, format='multipart', data={ 'archive': archive, }, # + headers HTTP_IN_PROGRESS='false', HTTP_SLUG='external-id') # then self.assertEqual(response.status_code, status.HTTP_415_UNSUPPORTED_MEDIA_TYPE) + self.assertTrue( + 'You must provide both 1 application/zip (or ' + 'application/x-tar) and 1 atom+xml entry for ' + 'multipart deposit' in response.content.decode('utf-8') + ) diff --git a/swh/deposit/tests/api/test_deposit_read_metadata.py b/swh/deposit/tests/api/test_deposit_read_metadata.py index a639d282..2260e789 100644 --- a/swh/deposit/tests/api/test_deposit_read_metadata.py +++ b/swh/deposit/tests/api/test_deposit_read_metadata.py @@ -1,211 +1,207 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json from django.core.urlresolvers import reverse from nose.tools import istest from rest_framework import status from rest_framework.test import APITestCase from swh.deposit.models import Deposit from swh.deposit.config import PRIVATE_GET_DEPOSIT_METADATA from swh.deposit.config import DEPOSIT_STATUS_LOAD_SUCCESS from swh.deposit.config import DEPOSIT_STATUS_PARTIAL from ...config import SWH_PERSON from ..common import BasicTestCase, WithAuthTestCase, CommonCreationRoutine class DepositReadMetadataTest(APITestCase, WithAuthTestCase, BasicTestCase, CommonCreationRoutine): """Deposit access to read metadata information on deposit. """ @istest def read_metadata(self): """Private metadata read api to existing deposit should return metadata """ deposit_id = self.create_deposit_partial() url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=[self.collection.name, deposit_id]) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEquals(response._headers['content-type'][1], 'application/json') data = json.loads(response.content.decode('utf-8')) expected_meta = { 'origin': { 'url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id', 'type': 'deposit' }, 'origin_metadata': { 'metadata': { - '{http://www.w3.org/2005/Atom}external_identifier': - 'some-external-id', - '{http://www.w3.org/2005/Atom}url': - 'https://hal-test.archives-ouvertes.fr/' + - 'some-external-id' + '@xmlns': 'http://www.w3.org/2005/Atom', + 'external_identifier':'some-external-id', + 'url': 'https://hal-test.archives-ouvertes.fr/' + + 'some-external-id' }, 'provider': { 'provider_name': 'hal', 'provider_type': 'deposit_client', 'provider_url': 'https://hal-test.archives-ouvertes.fr/', 'metadata': {} }, 'tool': { 'tool_name': 'swh-deposit', 'tool_version': '0.0.1', 'tool_configuration': { 'sword_version': '2' } } }, 'revision': { 'synthetic': True, 'committer_date': None, 'message': 'hal: Deposit %s in collection hal' % deposit_id, 'author': SWH_PERSON, 'committer': SWH_PERSON, 'date': None, 'metadata': { - '{http://www.w3.org/2005/Atom}external_identifier': - 'some-external-id', - '{http://www.w3.org/2005/Atom}url': - 'https://hal-test.archives-ouvertes.fr/' + - 'some-external-id' + '@xmlns': 'http://www.w3.org/2005/Atom', + 'external_identifier': 'some-external-id', + 'url': 'https://hal-test.archives-ouvertes.fr/' + + 'some-external-id' }, 'type': 'tar' }, 'branch_name': 'master', } self.assertEquals(data, expected_meta) @istest def read_metadata_revision_with_parent(self): """Private read metadata to a deposit (with parent) returns metadata """ swh_id = 'da78a9d4cf1d5d29873693fd496142e3a18c20fa' swh_persistent_id = 'swh:1:rev:%s' % swh_id deposit_id1 = self.create_deposit_with_status( status=DEPOSIT_STATUS_LOAD_SUCCESS, external_id='some-external-id', swh_id=swh_persistent_id) deposit_parent = Deposit.objects.get(pk=deposit_id1) self.assertEquals(deposit_parent.swh_id, swh_persistent_id) self.assertEquals(deposit_parent.external_id, 'some-external-id') self.assertEquals(deposit_parent.status, DEPOSIT_STATUS_LOAD_SUCCESS) deposit_id = self.create_deposit_partial( external_id='some-external-id') deposit = Deposit.objects.get(pk=deposit_id) self.assertEquals(deposit.external_id, 'some-external-id') self.assertEquals(deposit.swh_id, None) self.assertEquals(deposit.parent, deposit_parent) self.assertEquals(deposit.status, DEPOSIT_STATUS_PARTIAL) url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=[self.collection.name, deposit_id]) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEquals(response._headers['content-type'][1], 'application/json') data = json.loads(response.content.decode('utf-8')) expected_meta = { 'origin': { 'url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id', 'type': 'deposit' }, 'origin_metadata': { 'metadata': { - '{http://www.w3.org/2005/Atom}external_identifier': - 'some-external-id', - '{http://www.w3.org/2005/Atom}url': - 'https://hal-test.archives-ouvertes.fr/' + - 'some-external-id' + '@xmlns': 'http://www.w3.org/2005/Atom', + 'external_identifier': 'some-external-id', + 'url': 'https://hal-test.archives-ouvertes.fr/' + + 'some-external-id' }, 'provider': { 'provider_name': 'hal', 'provider_type': 'deposit_client', 'provider_url': 'https://hal-test.archives-ouvertes.fr/', 'metadata': {} }, 'tool': { 'tool_name': 'swh-deposit', 'tool_version': '0.0.1', 'tool_configuration': { 'sword_version': '2' } } }, 'revision': { 'synthetic': True, 'date': None, 'committer_date': None, 'author': SWH_PERSON, 'committer': SWH_PERSON, 'type': 'tar', 'message': 'hal: Deposit %s in collection hal' % deposit_id, 'metadata': { - '{http://www.w3.org/2005/Atom}external_identifier': - 'some-external-id', - '{http://www.w3.org/2005/Atom}url': - 'https://hal-test.archives-ouvertes.fr/' + - 'some-external-id' + '@xmlns': 'http://www.w3.org/2005/Atom', + 'external_identifier': 'some-external-id', + 'url': 'https://hal-test.archives-ouvertes.fr/' + + 'some-external-id' }, 'parents': [swh_id] }, 'branch_name': 'master', } self.assertEquals(data, expected_meta) @istest def access_to_nonexisting_deposit_returns_404_response(self): """Read unknown collection should return a 404 response """ unknown_id = '999' url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=[self.collection.name, unknown_id]) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) self.assertIn('Deposit with id %s does not exist' % unknown_id, response.content.decode('utf-8')) @istest def access_to_nonexisting_collection_returns_404_response(self): """Read unknown deposit should return a 404 response """ collection_name = 'non-existing' deposit_id = self.create_deposit_partial() url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=[collection_name, deposit_id]) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) self.assertIn('Unknown collection name %s' % collection_name, response.content.decode('utf-8'),) diff --git a/swh/deposit/tests/api/test_deposit_status.py b/swh/deposit/tests/api/test_deposit_status.py index f3c0692d..71e95db4 100644 --- a/swh/deposit/tests/api/test_deposit_status.py +++ b/swh/deposit/tests/api/test_deposit_status.py @@ -1,121 +1,114 @@ -# Copyright (C) 2017 The Software Heritage developers +# Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.core.urlresolvers import reverse from io import BytesIO from nose.tools import istest from rest_framework import status from rest_framework.test import APITestCase from swh.deposit.models import Deposit, DEPOSIT_STATUS_DETAIL from swh.deposit.models import DEPOSIT_STATUS_LOAD_SUCCESS from swh.deposit.parsers import parse_xml from ..common import BasicTestCase, WithAuthTestCase, FileSystemCreationRoutine from ..common import CommonCreationRoutine from ...config import COL_IRI, STATE_IRI, DEPOSIT_STATUS_DEPOSITED class DepositStatusTestCase(APITestCase, WithAuthTestCase, BasicTestCase, FileSystemCreationRoutine, CommonCreationRoutine): """Status on deposit """ @istest def post_deposit_with_status_check(self): """Binary upload should be accepted """ # given url = reverse(COL_IRI, args=[self.collection.name]) external_id = 'some-external-id-1' # when response = self.client.post( url, content_type='application/zip', # as zip data=self.archive['data'], # + headers CONTENT_LENGTH=self.archive['length'], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=self.archive['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS='false', HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') # then self.assertEqual(response.status_code, status.HTTP_201_CREATED) deposit = Deposit.objects.get(external_id=external_id) status_url = reverse(STATE_IRI, args=[self.collection.name, deposit.id]) # check status status_response = self.client.get(status_url) self.assertEqual(status_response.status_code, status.HTTP_200_OK) r = parse_xml(BytesIO(status_response.content)) - self.assertEqual(r['{http://www.w3.org/2005/Atom}deposit_id'], - deposit.id) - self.assertEqual(r['{http://www.w3.org/2005/Atom}deposit_status'], - DEPOSIT_STATUS_DEPOSITED) - self.assertEqual( - r['{http://www.w3.org/2005/Atom}deposit_status_detail'], - DEPOSIT_STATUS_DETAIL[DEPOSIT_STATUS_DEPOSITED]) + self.assertEqual(int(r['deposit_id']), deposit.id) + self.assertEqual(r['deposit_status'], DEPOSIT_STATUS_DEPOSITED) + self.assertEqual(r['deposit_status_detail'], + DEPOSIT_STATUS_DETAIL[DEPOSIT_STATUS_DEPOSITED]) @istest def status_with_swh_id(self): _status = DEPOSIT_STATUS_LOAD_SUCCESS _swh_id = '548b3c0a2bb43e1fca191e24b5803ff6b3bc7c10' # given deposit_id = self.create_deposit_with_status( status=_status, swh_id=_swh_id) url = reverse(STATE_IRI, args=[self.collection.name, deposit_id]) # when status_response = self.client.get(url) # then self.assertEqual(status_response.status_code, status.HTTP_200_OK) r = parse_xml(BytesIO(status_response.content)) - self.assertEqual(r['{http://www.w3.org/2005/Atom}deposit_id'], - deposit_id) - self.assertEqual(r['{http://www.w3.org/2005/Atom}deposit_status'], - _status) - self.assertEqual( - r['{http://www.w3.org/2005/Atom}deposit_status_detail'], - DEPOSIT_STATUS_DETAIL[DEPOSIT_STATUS_LOAD_SUCCESS]) - self.assertEqual(r['{http://www.w3.org/2005/Atom}deposit_swh_id'], - _swh_id) + self.assertEqual(int(r['deposit_id']), deposit_id) + self.assertEqual(r['deposit_status'], _status) + self.assertEqual(r['deposit_status_detail'], + DEPOSIT_STATUS_DETAIL[DEPOSIT_STATUS_LOAD_SUCCESS]) + self.assertEqual(r['deposit_swh_id'], _swh_id) @istest def status_on_unknown_deposit(self): """Asking for the status of unknown deposit returns 404 response""" status_url = reverse(STATE_IRI, args=[self.collection.name, 999]) status_response = self.client.get(status_url) self.assertEqual(status_response.status_code, status.HTTP_404_NOT_FOUND) @istest def status_with_http_accept_header_should_not_break(self): """Asking deposit status with Accept header should return 200 """ deposit_id = self.create_deposit_partial() status_url = reverse(STATE_IRI, args=[ self.collection.name, deposit_id]) response = self.client.get( status_url, HTTP_ACCEPT='text/html,application/xml;q=9,*/*,q=8') self.assertEqual(response.status_code, status.HTTP_200_OK) diff --git a/swh/deposit/tests/api/test_deposit_update.py b/swh/deposit/tests/api/test_deposit_update.py index de40e70f..3f227ecc 100644 --- a/swh/deposit/tests/api/test_deposit_update.py +++ b/swh/deposit/tests/api/test_deposit_update.py @@ -1,337 +1,345 @@ -# Copyright (C) 2017 The Software Heritage developers +# Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.core.urlresolvers import reverse from nose.tools import istest from rest_framework import status from rest_framework.test import APITestCase from swh.deposit.models import Deposit, DepositRequest from swh.deposit.config import EDIT_SE_IRI, EM_IRI from ..common import BasicTestCase, WithAuthTestCase, CommonCreationRoutine from ..common import FileSystemCreationRoutine, create_arborescence_zip class DepositUpdateOrReplaceExistingDataTest( APITestCase, WithAuthTestCase, BasicTestCase, FileSystemCreationRoutine, CommonCreationRoutine): """Try put/post (update/replace) query on EM_IRI """ def setUp(self): super().setUp() self.atom_entry_data1 = b""" bar """ self.atom_entry_data1 = b""" bar """ self.archive2 = create_arborescence_zip( self.root_path, 'archive2', 'file2', b'some other content in file') @istest def replace_archive_to_deposit_is_possible(self): """Replace all archive with another one should return a 204 response """ # given deposit_id = self.create_simple_binary_deposit(status_partial=True) deposit = Deposit.objects.get(pk=deposit_id) requests = DepositRequest.objects.filter( deposit=deposit, type=self.deposit_request_types['archive']) assert len(list(requests)) == 1 assert self.archive['name'] in requests[0].archive.name # we have no metadata for that deposit requests = list(DepositRequest.objects.filter( deposit=deposit, type=self.deposit_request_types['metadata'])) assert len(requests) == 0 deposit_id = self._update_deposit_with_status(deposit_id, status_partial=True) requests = list(DepositRequest.objects.filter( deposit=deposit, type=self.deposit_request_types['metadata'])) assert len(requests) == 1 update_uri = reverse(EM_IRI, args=[self.collection.name, deposit_id]) external_id = 'some-external-id-1' response = self.client.put( update_uri, content_type='application/zip', # as zip data=self.archive2['data'], # + headers CONTENT_LENGTH=self.archive2['length'], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=self.archive2['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS='false', HTTP_CONTENT_DISPOSITION='attachment; filename=%s' % ( self.archive2['name'], )) self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) requests = DepositRequest.objects.filter( deposit=deposit, type=self.deposit_request_types['archive']) self.assertEquals(len(list(requests)), 1) self.assertRegex(requests[0].archive.name, self.archive2['name']) # check we did not touch the other parts requests = list(DepositRequest.objects.filter( deposit=deposit, type=self.deposit_request_types['metadata'])) self.assertEquals(len(requests), 1) @istest def replace_metadata_to_deposit_is_possible(self): """Replace all metadata with another one should return a 204 response """ # given deposit_id = self.create_simple_binary_deposit(status_partial=True) deposit = Deposit.objects.get(pk=deposit_id) requests = DepositRequest.objects.filter( deposit=deposit, type=self.deposit_request_types['metadata']) assert len(list(requests)) == 0 requests = list(DepositRequest.objects.filter( deposit=deposit, type=self.deposit_request_types['archive'])) assert len(requests) == 1 update_uri = reverse(EDIT_SE_IRI, args=[self.collection.name, deposit_id]) response = self.client.put( update_uri, content_type='application/atom+xml;type=entry', data=self.atom_entry_data1) self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) requests = DepositRequest.objects.filter( deposit=deposit, type=self.deposit_request_types['metadata']) self.assertEquals(len(list(requests)), 1) metadata = requests[0].metadata - self.assertEquals(metadata["{http://www.w3.org/2005/Atom}foobar"], - 'bar') + self.assertEquals(metadata['foobar'], 'bar') # check we did not touch the other parts requests = list(DepositRequest.objects.filter( deposit=deposit, type=self.deposit_request_types['archive'])) self.assertEquals(len(requests), 1) @istest def add_archive_to_deposit_is_possible(self): """Add another archive to a deposit return a 201 response """ # given deposit_id = self.create_simple_binary_deposit(status_partial=True) deposit = Deposit.objects.get(pk=deposit_id) requests = DepositRequest.objects.filter( deposit=deposit, type=self.deposit_request_types['archive']) assert len(list(requests)) == 1 assert self.archive['name'] in requests[0].archive.name requests = list(DepositRequest.objects.filter( deposit=deposit, type=self.deposit_request_types['metadata'])) assert len(requests) == 0 update_uri = reverse(EM_IRI, args=[self.collection.name, deposit_id]) external_id = 'some-external-id-1' response = self.client.post( update_uri, content_type='application/zip', # as zip data=self.archive2['data'], # + headers CONTENT_LENGTH=self.archive2['length'], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=self.archive2['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS='false', HTTP_CONTENT_DISPOSITION='attachment; filename=%s' % ( self.archive2['name'],)) self.assertEqual(response.status_code, status.HTTP_201_CREATED) requests = list(DepositRequest.objects.filter( deposit=deposit, type=self.deposit_request_types['archive']).order_by('id')) self.assertEquals(len(requests), 2) # first archive still exists self.assertRegex(requests[0].archive.name, self.archive['name']) # a new one was added self.assertRegex(requests[1].archive.name, self.archive2['name']) # check we did not touch the other parts requests = list(DepositRequest.objects.filter( deposit=deposit, type=self.deposit_request_types['metadata'])) self.assertEquals(len(requests), 0) @istest def add_metadata_to_deposit_is_possible(self): """Add metadata with another one should return a 204 response """ # given deposit_id = self.create_deposit_partial() deposit = Deposit.objects.get(pk=deposit_id) requests = DepositRequest.objects.filter( deposit=deposit, type=self.deposit_request_types['metadata']) assert len(list(requests)) == 2 requests = list(DepositRequest.objects.filter( deposit=deposit, type=self.deposit_request_types['archive'])) assert len(requests) == 0 update_uri = reverse(EDIT_SE_IRI, args=[self.collection.name, deposit_id]) response = self.client.post( update_uri, content_type='application/atom+xml;type=entry', data=self.atom_entry_data1) self.assertEqual(response.status_code, status.HTTP_201_CREATED) requests = DepositRequest.objects.filter( deposit=deposit, type=self.deposit_request_types['metadata']).order_by('id') self.assertEquals(len(list(requests)), 3) # a new one was added - self.assertEquals(requests[1].metadata[ - "{http://www.w3.org/2005/Atom}foobar"], 'bar') + self.assertEquals(requests[1].metadata['foobar'], 'bar') # check we did not touch the other parts requests = list(DepositRequest.objects.filter( deposit=deposit, type=self.deposit_request_types['archive'])) self.assertEquals(len(requests), 0) class DepositUpdateFailuresTest(APITestCase, WithAuthTestCase, BasicTestCase, CommonCreationRoutine): """Failure scenario about add/replace (post/put) query on deposit. """ @istest def add_metadata_to_unknown_collection(self): """Replacing metadata to unknown deposit should return a 404 response """ - url = reverse(EDIT_SE_IRI, - args=['unknown', 999]), + url = reverse(EDIT_SE_IRI, args=['test', 1000]) response = self.client.post( url, content_type='application/atom+xml;type=entry', data=self.atom_entry_data0) self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) + self.assertRegex(response.content.decode('utf-8'), + 'Unknown collection name test') @istest def add_metadata_to_unknown_deposit(self): """Replacing metadata to unknown deposit should return a 404 response """ - url = reverse(EDIT_SE_IRI, - args=[self.collection.name, 999]), + url = reverse(EDIT_SE_IRI, args=[self.collection.name, 999]) response = self.client.post( url, content_type='application/atom+xml;type=entry', data=self.atom_entry_data0) self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) + self.assertRegex(response.content.decode('utf-8'), + 'Deposit with id 999 does not exist') @istest def replace_metadata_to_unknown_deposit(self): """Adding metadata to unknown deposit should return a 404 response """ - url = reverse(EDIT_SE_IRI, - args=[self.collection.name, 999]), + url = reverse(EDIT_SE_IRI, args=[self.collection.name, 998]) response = self.client.put( url, content_type='application/atom+xml;type=entry', data=self.atom_entry_data0) self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) + self.assertRegex(response.content.decode('utf-8'), + 'Deposit with id 998 does not exist') @istest def add_archive_to_unknown_deposit(self): """Adding metadata to unknown deposit should return a 404 response """ - url = reverse(EM_IRI, - args=[self.collection.name, 999]), + url = reverse(EM_IRI, args=[self.collection.name, 997]) response = self.client.post( url, content_type='application/zip', data=self.atom_entry_data0) self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) + self.assertRegex(response.content.decode('utf-8'), + 'Deposit with id 997 does not exist') @istest def replace_archive_to_unknown_deposit(self): """Replacing archive to unknown deposit should return a 404 response """ - url = reverse(EM_IRI, - args=[self.collection.name, 999]), + url = reverse(EM_IRI, args=[self.collection.name, 996]) response = self.client.put( url, content_type='application/zip', data=self.atom_entry_data0) self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) + self.assertRegex(response.content.decode('utf-8'), + 'Deposit with id 996 does not exist') @istest def post_metadata_to_em_iri_failure(self): - """Add archive with wrong content type should return a 400 response + """Update (POST) archive with wrong content type should return 400 """ - deposit_id = self.create_deposit_ready() - + deposit_id = self.create_deposit_partial() # only update on partial update_uri = reverse(EM_IRI, args=[self.collection.name, deposit_id]) - response = self.client.put( + response = self.client.post( update_uri, - content_type='application/binary', + content_type='application/x-gtar-compressed', data=self.atom_entry_data0) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertRegex(response.content.decode('utf-8'), + 'Packaging format supported is restricted to ' + 'application/zip, application/x-tar') @istest def put_metadata_to_em_iri_failure(self): - """Update archive with wrong content type should return 400 response + """Update (PUT) archive with wrong content type should return 400 """ # given - deposit_id = self.create_deposit_ready() + deposit_id = self.create_deposit_partial() # only update on partial # when update_uri = reverse(EM_IRI, args=[self.collection.name, deposit_id]) response = self.client.put( update_uri, content_type='application/atom+xml;type=entry', data=self.atom_entry_data0) # then self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertRegex(response.content.decode('utf-8'), + 'Packaging format supported is restricted to ' + 'application/zip, application/x-tar') diff --git a/swh/deposit/tests/api/test_parser.py b/swh/deposit/tests/api/test_parser.py new file mode 100644 index 00000000..d874867f --- /dev/null +++ b/swh/deposit/tests/api/test_parser.py @@ -0,0 +1,104 @@ +# Copyright (C) 2018 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import io + +from collections import OrderedDict +from nose.tools import istest +from rest_framework.test import APITestCase + +from swh.deposit.parsers import SWHXMLParser + + +class ParsingTest(APITestCase): + """Access to main entry point is ok without authentication + + """ + @istest + def parsing_without_duplicates(self): + xml_no_duplicate = io.BytesIO(b''' + + Awesome Compiler + + GPL3.0 + https://opensource.org/licenses/GPL-3.0 + + Python3 + + author1 + Inria + + ocaml + http://issuetracker.com + ''') + + actual_result = SWHXMLParser().parse(xml_no_duplicate) + expected_dict = OrderedDict( + [('@xmlns', 'http://www.w3.org/2005/Atom'), + ('@xmlns:codemeta', + 'https://doi.org/10.5063/SCHEMA/CODEMETA-2.0'), + ('title', 'Awesome Compiler'), + ('codemeta:license', + OrderedDict([('codemeta:name', 'GPL3.0'), + ('codemeta:url', + 'https://opensource.org/licenses/GPL-3.0')])), + ('codemeta:runtimePlatform', 'Python3'), + ('codemeta:author', + OrderedDict([('codemeta:name', 'author1'), + ('codemeta:affiliation', 'Inria')])), + ('codemeta:programmingLanguage', 'ocaml'), + ('codemeta:issueTracker', 'http://issuetracker.com')]) + self.assertEqual(expected_dict, actual_result) + + @istest + def parsing_with_duplicates(self): + xml_with_duplicates = io.BytesIO(b''' + + Another Compiler + GNU/Linux + + GPL3.0 + https://opensource.org/licenses/GPL-3.0 + + Un*x + + author1 + Inria + + + author2 + Inria + + ocaml + haskell + + spdx + http://spdx.org + + python3 + ''') + + actual_result = SWHXMLParser().parse(xml_with_duplicates) + + expected_dict = OrderedDict([ + ('@xmlns', 'http://www.w3.org/2005/Atom'), + ('@xmlns:codemeta', 'https://doi.org/10.5063/SCHEMA/CODEMETA-2.0'), + ('title', 'Another Compiler'), + ('codemeta:runtimePlatform', ['GNU/Linux', 'Un*x']), + ('codemeta:license', + [OrderedDict([('codemeta:name', 'GPL3.0'), + ('codemeta:url', + 'https://opensource.org/licenses/GPL-3.0')]), + OrderedDict([('codemeta:name', 'spdx'), + ('codemeta:url', 'http://spdx.org')])]), + ('codemeta:author', + [OrderedDict([('codemeta:name', 'author1'), + ('codemeta:affiliation', 'Inria')]), + OrderedDict([('codemeta:name', 'author2'), + ('codemeta:affiliation', 'Inria')])]), + ('codemeta:programmingLanguage', ['ocaml', 'haskell', 'python3'])]) + self.assertEqual(expected_dict, actual_result) diff --git a/swh/deposit/tests/common.py b/swh/deposit/tests/common.py index d17c836d..c08324e3 100644 --- a/swh/deposit/tests/common.py +++ b/swh/deposit/tests/common.py @@ -1,467 +1,460 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import base64 import hashlib import os import shutil import tempfile from django.core.urlresolvers import reverse from django.test import TestCase from io import BytesIO from nose.plugins.attrib import attr from rest_framework import status from swh.deposit.config import COL_IRI, EM_IRI, EDIT_SE_IRI from swh.deposit.models import DepositClient, DepositCollection, Deposit from swh.deposit.models import DepositRequest from swh.deposit.models import DepositRequestType from swh.deposit.parsers import parse_xml from swh.deposit.settings.testing import MEDIA_ROOT from swh.core import tarball def create_arborescence_zip(root_path, archive_name, filename, content, up_to_size=None): """Build an archive named archive_name in the root_path. This archive contains one file named filename with the content content. Returns: dict with the keys: - dir: the directory of that archive - path: full path to the archive - sha1sum: archive's sha1sum - length: archive's length """ os.makedirs(root_path, exist_ok=True) archive_path_dir = tempfile.mkdtemp(dir=root_path) dir_path = os.path.join(archive_path_dir, archive_name) os.mkdir(dir_path) filepath = os.path.join(dir_path, filename) _length = len(content) count = 0 batch_size = 128 with open(filepath, 'wb') as f: f.write(content) if up_to_size: # fill with blank content up to a given size count += _length while count < up_to_size: f.write(b'0'*batch_size) count += batch_size zip_path = dir_path + '.zip' zip_path = tarball.compress(zip_path, 'zip', dir_path) with open(zip_path, 'rb') as f: length = 0 sha1sum = hashlib.sha1() md5sum = hashlib.md5() data = b'' for chunk in f: sha1sum.update(chunk) md5sum.update(chunk) length += len(chunk) data += chunk return { 'dir': archive_path_dir, 'name': archive_name, 'data': data, 'path': zip_path, 'sha1sum': sha1sum.hexdigest(), 'md5sum': md5sum.hexdigest(), 'length': length, } @attr('fs') class FileSystemCreationRoutine(TestCase): """Mixin intended for tests needed to tamper with archives. """ def setUp(self): """Define the test client and other test variables.""" super().setUp() self.root_path = '/tmp/swh-deposit/test/build-zip/' os.makedirs(self.root_path, exist_ok=True) self.archive = create_arborescence_zip( self.root_path, 'archive1', 'file1', b'some content in file') self.atom_entry = b""" Awesome Compiler urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 1785io25c695 2017-10-07T15:17:08Z some awesome author https://hal-test.archives-ouvertes.fr """ def tearDown(self): super().tearDown() shutil.rmtree(self.root_path) def create_simple_binary_deposit(self, status_partial=True): response = self.client.post( reverse(COL_IRI, args=[self.collection.name]), content_type='application/zip', data=self.archive['data'], CONTENT_LENGTH=self.archive['length'], HTTP_MD5SUM=self.archive['md5sum'], HTTP_SLUG='external-id', HTTP_IN_PROGRESS=status_partial, HTTP_CONTENT_DISPOSITION='attachment; filename=%s' % ( self.archive['name'], )) # then assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) - deposit_id = response_content[ - '{http://www.w3.org/2005/Atom}deposit_id'] + deposit_id = int(response_content['deposit_id']) return deposit_id def create_complex_binary_deposit(self, status_partial=False): deposit_id = self.create_simple_binary_deposit( status_partial=True) # Add a second archive to the deposit # update its status to DEPOSIT_STATUS_VERIFIED response = self.client.post( reverse(EM_IRI, args=[self.collection.name, deposit_id]), content_type='application/zip', data=self.archive2['data'], CONTENT_LENGTH=self.archive2['length'], HTTP_MD5SUM=self.archive2['md5sum'], HTTP_SLUG='external-id', HTTP_IN_PROGRESS=status_partial, HTTP_CONTENT_DISPOSITION='attachment; filename=filename1.zip') # then assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) - deposit_id = response_content[ - '{http://www.w3.org/2005/Atom}deposit_id'] + deposit_id = int(response_content['deposit_id']) return deposit_id def update_binary_deposit(self, deposit_id, status_partial=False): # update existing deposit with atom entry metadata response = self.client.post( reverse(EDIT_SE_IRI, args=[self.collection.name, deposit_id]), content_type='application/atom+xml;type=entry', data=self.codemeta_entry_data1, HTTP_SLUG='external-id', HTTP_IN_PROGRESS=status_partial) # then # assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) - deposit_id = response_content[ - '{http://www.w3.org/2005/Atom}deposit_id'] + deposit_id = int(response_content['deposit_id']) return deposit_id @attr('fs') class BasicTestCase(TestCase): """Mixin intended for data setup purposes (user, collection, etc...) """ def setUp(self): """Define the test client and other test variables.""" super().setUp() # expanding diffs in tests self.maxDiff = None # basic minimum test data deposit_request_types = {} # Add deposit request types for deposit_request_type in ['archive', 'metadata']: drt = DepositRequestType(name=deposit_request_type) drt.save() deposit_request_types[deposit_request_type] = drt _name = 'hal' _provider_url = 'https://hal-test.archives-ouvertes.fr/' _domain = 'archives-ouvertes.fr/' # set collection up _collection = DepositCollection(name=_name) _collection.save() # set user/client up _client = DepositClient.objects.create_user(username=_name, password=_name, provider_url=_provider_url, domain=_domain) _client.collections = [_collection.id] _client.last_name = _name _client.save() self.collection = _collection self.user = _client self.username = _name self.userpass = _name self.deposit_request_types = deposit_request_types def tearDown(self): super().tearDown() # Clean up uploaded files in temporary directory (tests have # their own media root folder) if os.path.exists(MEDIA_ROOT): for d in os.listdir(MEDIA_ROOT): shutil.rmtree(os.path.join(MEDIA_ROOT, d)) class WithAuthTestCase(TestCase): """Mixin intended for testing the api with basic authentication. """ def setUp(self): super().setUp() _token = '%s:%s' % (self.username, self.userpass) token = base64.b64encode(_token.encode('utf-8')) authorization = 'Basic %s' % token.decode('utf-8') self.client.credentials(HTTP_AUTHORIZATION=authorization) def tearDown(self): super().tearDown() self.client.credentials() class CommonCreationRoutine(TestCase): """Mixin class to share initialization routine. cf: `class`:test_deposit_update.DepositReplaceExistingDataTest `class`:test_deposit_update.DepositUpdateDepositWithNewDataTest `class`:test_deposit_update.DepositUpdateFailuresTest `class`:test_deposit_delete.DepositDeleteTest """ def setUp(self): super().setUp() self.atom_entry_data0 = b""" some-external-id https://hal-test.archives-ouvertes.fr/some-external-id """ self.atom_entry_data1 = b""" anotherthing https://hal-test.archives-ouvertes.fr/anotherthing """ self.atom_entry_data2 = b""" Awesome Compiler urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 1785io25c695 2017-10-07T15:17:08Z some awesome author https://hal-test.archives-ouvertes.fr/id """ self.codemeta_entry_data0 = b""" Awesome Compiler https://hal-test.archives-ouvertes.fr/1785io25c695 urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 1785io25c695 2017-10-07T15:17:08Z some awesome author description key-word 1 """ self.codemeta_entry_data1 = b""" Composing a Web of Audio Applications hal hal-01243065 hal-01243065 https://hal-test.archives-ouvertes.fr/hal-01243065 test DSP programming,Web 2017-05-03T16:08:47+02:00 this is the description 1 phpstorm stable php python C GNU General Public License v3.0 only CeCILL Free Software License Agreement v1.1 HAL hal@ccsd.cnrs.fr Morane Gruenpeter """ def create_invalid_deposit(self, external_id='some-external-id-1'): url = reverse(COL_IRI, args=[self.collection.name]) data = b'some data which is clearly not a zip file' md5sum = hashlib.md5(data).hexdigest() # when response = self.client.post( url, content_type='application/zip', # as zip data=data, # + headers CONTENT_LENGTH=len(data), # other headers needs HTTP_ prefix to be taken into account HTTP_SLUG=external_id, HTTP_CONTENT_MD5=md5sum, HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') response_content = parse_xml(BytesIO(response.content)) - deposit_id = response_content[ - '{http://www.w3.org/2005/Atom}deposit_id'] - + deposit_id = int(response_content['deposit_id']) return deposit_id def create_deposit_with_status( self, status, external_id='some-external-id-1', swh_id=None): deposit_id = self.create_invalid_deposit(external_id) # We cannot create some form of deposit with a given status in # test context ('rejected' for example). As flipped off the # checks in the configuration so all deposits have the status # deposited). Update in place the deposit with such # status deposit = Deposit.objects.get(pk=deposit_id) deposit.status = status if swh_id: deposit.swh_id = swh_id deposit.save() return deposit_id def create_simple_deposit_partial(self, external_id='some-external-id'): """Create a simple deposit (1 request) in `partial` state and returns its new identifier. Returns: deposit id """ response = self.client.post( reverse(COL_IRI, args=[self.collection.name]), content_type='application/atom+xml;type=entry', data=self.atom_entry_data0, HTTP_SLUG=external_id, HTTP_IN_PROGRESS='true') assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) - deposit_id = response_content[ - '{http://www.w3.org/2005/Atom}deposit_id'] + deposit_id = int(response_content['deposit_id']) return deposit_id def create_deposit_partial_with_data_in_args(self, data): """Create a simple deposit (1 request) in `partial` state with the data or metadata as an argument and returns its new identifier. Args: data: atom entry Returns: deposit id """ response = self.client.post( reverse(COL_IRI, args=[self.collection.name]), content_type='application/atom+xml;type=entry', data=data, HTTP_SLUG='external-id', HTTP_IN_PROGRESS='true') assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) - deposit_id = response_content[ - '{http://www.w3.org/2005/Atom}deposit_id'] + deposit_id = int(response_content['deposit_id']) return deposit_id def _update_deposit_with_status(self, deposit_id, status_partial=False): """Add to a given deposit another archive and update its current status to `deposited` (by default). Returns: deposit id """ # when response = self.client.post( reverse(EDIT_SE_IRI, args=[self.collection.name, deposit_id]), content_type='application/atom+xml;type=entry', data=self.atom_entry_data1, HTTP_SLUG='external-id', HTTP_IN_PROGRESS=status_partial) # then assert response.status_code == status.HTTP_201_CREATED return deposit_id def create_deposit_ready(self, external_id='some-external-id'): """Create a complex deposit (2 requests) in status `deposited`. """ deposit_id = self.create_simple_deposit_partial( external_id=external_id) deposit_id = self._update_deposit_with_status(deposit_id) return deposit_id def create_deposit_partial(self, external_id='some-external-id'): """Create a complex deposit (2 requests) in status `partial`. """ deposit_id = self.create_simple_deposit_partial( external_id=external_id) deposit_id = self._update_deposit_with_status( deposit_id, status_partial=True) return deposit_id def add_metadata_to_deposit(self, deposit_id, status_partial=False): """Add metadata to deposit. """ # when response = self.client.post( reverse(EDIT_SE_IRI, args=[self.collection.name, deposit_id]), content_type='application/atom+xml;type=entry', data=self.codemeta_entry_data1, HTTP_SLUG='external-id', HTTP_IN_PROGRESS=status_partial) assert response.status_code == status.HTTP_201_CREATED # then deposit = Deposit.objects.get(pk=deposit_id) assert deposit is not None deposit_requests = DepositRequest.objects.filter(deposit=deposit) assert deposit_requests is not [] for dr in deposit_requests: if dr.type.name == 'metadata': assert deposit_requests[0].metadata is not {} return deposit_id diff --git a/swh/deposit/tests/loader/test_loader.py b/swh/deposit/tests/loader/test_loader.py index 66c04fec..221329a3 100644 --- a/swh/deposit/tests/loader/test_loader.py +++ b/swh/deposit/tests/loader/test_loader.py @@ -1,289 +1,294 @@ -# Copyright (C) 2017 The Software Heritage developers +# Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os import unittest import shutil from nose.tools import istest from nose.plugins.attrib import attr from rest_framework.test import APITestCase from swh.model import hashutil from swh.deposit.loader import loader from swh.deposit.config import PRIVATE_GET_RAW_CONTENT from swh.deposit.config import PRIVATE_GET_DEPOSIT_METADATA from swh.deposit.config import PRIVATE_PUT_DEPOSIT from django.core.urlresolvers import reverse from .common import SWHDepositTestClient, CLIENT_TEST_CONFIG from .. import TEST_LOADER_CONFIG from ..common import BasicTestCase, WithAuthTestCase, CommonCreationRoutine from ..common import FileSystemCreationRoutine TOOL_ID = 99 PROVIDER_ID = 12 class DepositLoaderInhibitsStorage: """Mixin class to inhibit the persistence and keep in memory the data sent for storage. cf. SWHDepositLoaderNoStorage """ def __init__(self, client=None): # client is not used here, transit it nonetheless to other mixins super().__init__(client=client) # typed data self.state = { 'origin': [], 'origin_visit': [], 'origin_metadata': [], 'content': [], 'directory': [], 'revision': [], 'release': [], 'snapshot': [], 'tool': [], 'provider': [] } def _add(self, type, l): """Add without duplicates and keeping the insertion order. Args: type (str): Type of objects concerned by the action l ([object]): List of 'type' object """ col = self.state[type] for o in l: if o in col: continue col.extend([o]) def send_origin(self, origin): origin.update({'id': 1}) self._add('origin', [origin]) return origin['id'] def send_origin_visit(self, origin_id, visit_date): origin_visit = { 'origin': origin_id, 'visit_date': visit_date, 'visit': 1, } self._add('origin_visit', [origin_visit]) return origin_visit def send_origin_metadata(self, origin_id, visit_date, provider_id, tool_id, metadata): origin_metadata = { 'origin_id': origin_id, 'visit_date': visit_date, 'provider_id': provider_id, 'tool_id': tool_id, 'metadata': metadata } self._add('origin_metadata', [origin_metadata]) return origin_metadata def send_tool(self, tool): tool = { 'tool_name': tool['tool_name'], 'tool_version': tool['tool_version'], 'tool_configuration': tool['tool_configuration'] } self._add('tool', [tool]) tool_id = TOOL_ID return tool_id def send_provider(self, provider): provider = { 'provider_name': provider['provider_name'], 'provider_type': provider['provider_type'], 'provider_url': provider['provider_url'], 'metadata': provider['metadata'] } self._add('provider', [provider]) provider_id = PROVIDER_ID return provider_id def maybe_load_contents(self, contents): self._add('content', contents) def maybe_load_directories(self, directories): self._add('directory', directories) def maybe_load_revisions(self, revisions): self._add('revision', revisions) def maybe_load_releases(self, releases): self._add('release', releases) def maybe_load_snapshot(self, snapshot): self._add('snapshot', [snapshot]) def open_fetch_history(self): pass def close_fetch_history_failure(self, fetch_history_id): pass def close_fetch_history_success(self, fetch_history_id): pass def update_origin_visit(self, origin_id, visit, status): self.status = status # Override to do nothing at the end def close_failure(self): pass def close_success(self): pass class TestLoaderUtils(unittest.TestCase): def assertRevisionsOk(self, expected_revisions): # noqa: N802 """Check the loader's revisions match the expected revisions. Expects self.loader to be instantiated and ready to be inspected (meaning the loading took place). Args: expected_revisions (dict): Dict with key revision id, value the targeted directory id. """ # The last revision being the one used later to start back from for rev in self.loader.state['revision']: rev_id = hashutil.hash_to_hex(rev['id']) directory_id = hashutil.hash_to_hex(rev['directory']) self.assertEquals(expected_revisions[rev_id], directory_id) class SWHDepositLoaderNoStorage(DepositLoaderInhibitsStorage, loader.DepositLoader): """Loader to test. It inherits from the actual deposit loader to actually test its correct behavior. It also inherits from DepositLoaderInhibitsStorage so that no persistence takes place. """ pass @attr('fs') class DepositLoaderScenarioTest(APITestCase, WithAuthTestCase, BasicTestCase, CommonCreationRoutine, FileSystemCreationRoutine, TestLoaderUtils): def setUp(self): super().setUp() # create the extraction dir used by the loader os.makedirs(TEST_LOADER_CONFIG['extraction_dir'], exist_ok=True) # 1. create a deposit with archive and metadata self.deposit_id = self.create_simple_binary_deposit() # 2. Sets a basic client which accesses the test data loader_client = SWHDepositTestClient(self.client, config=CLIENT_TEST_CONFIG) # 3. setup loader with no persistence and that client self.loader = SWHDepositLoaderNoStorage(client=loader_client) def tearDown(self): super().tearDown() shutil.rmtree(TEST_LOADER_CONFIG['extraction_dir']) @istest def inject_deposit_ready(self): """Load a deposit which is ready """ args = [self.collection.name, self.deposit_id] archive_url = reverse(PRIVATE_GET_RAW_CONTENT, args=args) deposit_meta_url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=args) deposit_update_url = reverse(PRIVATE_PUT_DEPOSIT, args=args) # when self.loader.load(archive_url=archive_url, deposit_meta_url=deposit_meta_url, deposit_update_url=deposit_update_url) # then self.assertEquals(len(self.loader.state['content']), 1) self.assertEquals(len(self.loader.state['directory']), 1) self.assertEquals(len(self.loader.state['revision']), 1) self.assertEquals(len(self.loader.state['release']), 0) self.assertEquals(len(self.loader.state['snapshot']), 1) @istest def inject_deposit_verify_metadata(self): """Load a deposit with metadata, test metadata integrity """ self.deposit_metadata_id = self.add_metadata_to_deposit( - self.deposit_id) + self.deposit_id) args = [self.collection.name, self.deposit_metadata_id] archive_url = reverse(PRIVATE_GET_RAW_CONTENT, args=args) deposit_meta_url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=args) deposit_update_url = reverse(PRIVATE_PUT_DEPOSIT, args=args) # when self.loader.load(archive_url=archive_url, deposit_meta_url=deposit_meta_url, deposit_update_url=deposit_update_url) # then self.assertEquals(len(self.loader.state['content']), 1) self.assertEquals(len(self.loader.state['directory']), 1) self.assertEquals(len(self.loader.state['revision']), 1) self.assertEquals(len(self.loader.state['release']), 0) self.assertEquals(len(self.loader.state['snapshot']), 1) self.assertEquals(len(self.loader.state['origin_metadata']), 1) self.assertEquals(len(self.loader.state['tool']), 1) self.assertEquals(len(self.loader.state['provider']), 1) - atom = '{http://www.w3.org/2005/Atom}' - codemeta = '{https://doi.org/10.5063/SCHEMA/CODEMETA-2.0}' + codemeta = 'codemeta:' expected_origin_metadata = { - atom + 'author': { - atom + 'email': 'hal@ccsd.cnrs.fr', - atom + 'name': 'HAL' + '@xmlns': 'http://www.w3.org/2005/Atom', + '@xmlns:codemeta': 'https://doi.org/10.5063/SCHEMA/CODEMETA-2.0', + 'author': { + 'email': 'hal@ccsd.cnrs.fr', + 'name': 'HAL' }, codemeta + 'url': - 'https://hal-test.archives-ouvertes.fr/hal-01243065', + 'https://hal-test.archives-ouvertes.fr/hal-01243065', codemeta + 'runtimePlatform': 'phpstorm', - codemeta + 'license': { - codemeta + 'name': - 'CeCILL Free Software License Agreement v1.1' - }, + codemeta + 'license': [ + { + codemeta + 'name': 'GNU General Public License v3.0 only' + }, + { + codemeta + 'name': 'CeCILL Free Software License Agreement v1.1' # noqa + } + ], codemeta + 'author': { codemeta + 'name': 'Morane Gruenpeter' }, - codemeta + 'programmingLanguage': 'C', + codemeta + 'programmingLanguage': ['php', 'python', 'C'], codemeta + 'applicationCategory': 'test', codemeta + 'dateCreated': '2017-05-03T16:08:47+02:00', - codemeta + 'version': 1, - atom + 'external_identifier': 'hal-01243065', - atom + 'title': 'Composing a Web of Audio Applications', + codemeta + 'version': '1', + 'external_identifier': 'hal-01243065', + 'title': 'Composing a Web of Audio Applications', codemeta + 'description': 'this is the description', - atom + 'id': 'hal-01243065', - atom + 'client': 'hal', + 'id': 'hal-01243065', + 'client': 'hal', codemeta + 'keywords': 'DSP programming,Web', codemeta + 'developmentStatus': 'stable' } result = self.loader.state['origin_metadata'][0] self.assertEquals(result['metadata'], expected_origin_metadata) self.assertEquals(result['tool_id'], TOOL_ID) self.assertEquals(result['provider_id'], PROVIDER_ID) diff --git a/version.txt b/version.txt index 667e1dda..8e024313 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -v0.0.52-0-g7e535ab \ No newline at end of file +v0.0.53-0-gaeaab02 \ No newline at end of file