diff --git a/MANIFEST.in b/MANIFEST.in index 92319741..3f5831b7 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,7 +1,10 @@ include Makefile include requirements.txt +include requirements-test.txt include requirements-swh.txt +include requirements-server.txt +include requirements-swh-server.txt include version.txt recursive-include swh/deposit/static * recursive-include swh/deposit/fixtures * recursive-include swh/deposit/templates * diff --git a/PKG-INFO b/PKG-INFO index 4e229c89..c286f2ca 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,34 +1,35 @@ Metadata-Version: 2.1 Name: swh.deposit -Version: 0.0.67 +Version: 0.0.68 Summary: Software Heritage Deposit Server Home-page: https://forge.softwareheritage.org/source/swh-deposit/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-deposit Description: # swh-deposit This is [Software Heritage](https://www.softwareheritage.org)'s [SWORD 2.0](http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html) Server implementation. **S.W.O.R.D** (**S**imple **W**eb-Service **O**ffering **R**epository **D**eposit) is an interoperability standard for digital file deposit. This implementation will permit interaction between a client (a repository) and a server (SWH repository) to permit deposits of software source code archives and associated metadata. The documentation is at ./docs/README-specification.md Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Description-Content-Type: text/markdown +Provides-Extra: server Provides-Extra: testing diff --git a/bin/swh-deposit b/bin/swh-deposit deleted file mode 100755 index 89366915..00000000 --- a/bin/swh-deposit +++ /dev/null @@ -1,7 +0,0 @@ -#!/usr/bin/env bash - -# Use: ./swh-deposit --help -# -# Documentation: https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html - -python3 -m swh.deposit.client.cli $@ diff --git a/requirements.txt b/requirements-server.txt similarity index 54% copy from requirements.txt copy to requirements-server.txt index 8bf3155c..0fd17f45 100644 --- a/requirements.txt +++ b/requirements-server.txt @@ -1,5 +1,2 @@ -vcversioner -click Django < 2.0 djangorestframework -xmltodict diff --git a/requirements-swh.txt b/requirements-swh-server.txt similarity index 83% copy from requirements-swh.txt copy to requirements-swh-server.txt index ae6f63e2..1b77634b 100644 --- a/requirements-swh.txt +++ b/requirements-swh-server.txt @@ -1,5 +1,4 @@ -swh.core >= 0.0.36 swh.loader.tar >= 0.0.39 swh.loader.core >= 0.0.32 swh.scheduler >= 0.0.39 swh.model >= 0.0.26 diff --git a/requirements-swh.txt b/requirements-swh.txt index ae6f63e2..5662ee37 100644 --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -1,5 +1 @@ swh.core >= 0.0.36 -swh.loader.tar >= 0.0.39 -swh.loader.core >= 0.0.32 -swh.scheduler >= 0.0.39 -swh.model >= 0.0.26 diff --git a/requirements-test.txt b/requirements-test.txt new file mode 100644 index 00000000..893614ff --- /dev/null +++ b/requirements-test.txt @@ -0,0 +1,3 @@ +pytest<4 +pytest-django +swh.scheduler[testing] diff --git a/requirements.txt b/requirements.txt index 8bf3155c..68bb2e26 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,4 @@ vcversioner click -Django < 2.0 -djangorestframework xmltodict +iso8601 diff --git a/setup.py b/setup.py index 52090e35..2b1163be 100755 --- a/setup.py +++ b/setup.py @@ -1,66 +1,71 @@ #!/usr/bin/env python3 # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from setuptools import setup, find_packages from os import path from io import open here = path.abspath(path.dirname(__file__)) # Get the long description from the README file with open(path.join(here, 'README.md'), encoding='utf-8') as f: long_description = f.read() def parse_requirements(name=None): if name: reqf = 'requirements-%s.txt' % name else: reqf = 'requirements.txt' requirements = [] if not path.exists(reqf): return requirements with open(reqf) as f: for line in f.readlines(): line = line.strip() if not line or line.startswith('#'): continue requirements.append(line) return requirements setup( name='swh.deposit', description='Software Heritage Deposit Server', long_description=long_description, long_description_content_type='text/markdown', author='Software Heritage developers', author_email='swh-devel@inria.fr', url='https://forge.softwareheritage.org/source/swh-deposit/', packages=find_packages(), - scripts=['bin/swh-deposit'], # scripts to package install_requires=parse_requirements() + parse_requirements('swh'), tests_require=parse_requirements('test'), setup_requires=['vcversioner'], - extras_require={'testing': parse_requirements('test')}, + extras_require={'testing': parse_requirements('test'), + 'server': (parse_requirements('server') + + parse_requirements('swh-server'))}, vcversioner={}, include_package_data=True, + entry_points=''' + [console_scripts] + swh-deposit=swh.deposit.cli:main + ''', classifiers=[ "Programming Language :: Python :: 3", "Intended Audience :: Developers", "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", "Operating System :: OS Independent", "Development Status :: 5 - Production/Stable", ], project_urls={ 'Bug Reports': 'https://forge.softwareheritage.org/maniphest', 'Funding': 'https://www.softwareheritage.org/donate', 'Source': 'https://forge.softwareheritage.org/source/swh-deposit', }, ) diff --git a/swh.deposit.egg-info/PKG-INFO b/swh.deposit.egg-info/PKG-INFO index 4e229c89..c286f2ca 100644 --- a/swh.deposit.egg-info/PKG-INFO +++ b/swh.deposit.egg-info/PKG-INFO @@ -1,34 +1,35 @@ Metadata-Version: 2.1 Name: swh.deposit -Version: 0.0.67 +Version: 0.0.68 Summary: Software Heritage Deposit Server Home-page: https://forge.softwareheritage.org/source/swh-deposit/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-deposit Description: # swh-deposit This is [Software Heritage](https://www.softwareheritage.org)'s [SWORD 2.0](http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html) Server implementation. **S.W.O.R.D** (**S**imple **W**eb-Service **O**ffering **R**epository **D**eposit) is an interoperability standard for digital file deposit. This implementation will permit interaction between a client (a repository) and a server (SWH repository) to permit deposits of software source code archives and associated metadata. The documentation is at ./docs/README-specification.md Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Description-Content-Type: text/markdown +Provides-Extra: server Provides-Extra: testing diff --git a/swh.deposit.egg-info/SOURCES.txt b/swh.deposit.egg-info/SOURCES.txt index 8e1e6b79..f69f6e16 100644 --- a/swh.deposit.egg-info/SOURCES.txt +++ b/swh.deposit.egg-info/SOURCES.txt @@ -1,117 +1,122 @@ MANIFEST.in Makefile README.md +requirements-server.txt +requirements-swh-server.txt requirements-swh.txt +requirements-test.txt requirements.txt setup.py version.txt -bin/swh-deposit swh/__init__.py -swh/manage.py swh.deposit.egg-info/PKG-INFO swh.deposit.egg-info/SOURCES.txt swh.deposit.egg-info/dependency_links.txt +swh.deposit.egg-info/entry_points.txt swh.deposit.egg-info/requires.txt swh.deposit.egg-info/top_level.txt swh/deposit/__init__.py swh/deposit/apps.py swh/deposit/auth.py swh/deposit/config.py -swh/deposit/create_user.py swh/deposit/errors.py +swh/deposit/manage.py swh/deposit/models.py swh/deposit/parsers.py swh/deposit/signals.py swh/deposit/urls.py swh/deposit/utils.py swh/deposit/wsgi.py swh/deposit/api/__init__.py swh/deposit/api/common.py swh/deposit/api/converters.py swh/deposit/api/deposit.py swh/deposit/api/deposit_content.py swh/deposit/api/deposit_status.py swh/deposit/api/deposit_update.py swh/deposit/api/service_document.py swh/deposit/api/urls.py swh/deposit/api/private/__init__.py swh/deposit/api/private/deposit_check.py swh/deposit/api/private/deposit_list.py swh/deposit/api/private/deposit_read.py swh/deposit/api/private/deposit_update_status.py swh/deposit/api/private/urls.py +swh/deposit/cli/__init__.py +swh/deposit/cli/admin.py +swh/deposit/cli/deposit.py swh/deposit/client/__init__.py -swh/deposit/client/cli.py swh/deposit/fixtures/__init__.py swh/deposit/fixtures/deposit_data.yaml swh/deposit/loader/__init__.py swh/deposit/loader/checker.py swh/deposit/loader/loader.py swh/deposit/loader/tasks.py swh/deposit/migrations/0001_initial.py swh/deposit/migrations/0002_depositrequest_archive.py swh/deposit/migrations/0003_temporaryarchive.py swh/deposit/migrations/0004_delete_temporaryarchive.py swh/deposit/migrations/0005_auto_20171019_1436.py swh/deposit/migrations/0006_depositclient_url.py swh/deposit/migrations/0007_auto_20171129_1609.py swh/deposit/migrations/0008_auto_20171130_1513.py swh/deposit/migrations/0009_deposit_parent.py swh/deposit/migrations/0010_auto_20180110_0953.py swh/deposit/migrations/0011_auto_20180115_1510.py swh/deposit/migrations/0012_deposit_status_detail.py swh/deposit/migrations/0013_depositrequest_raw_metadata.py swh/deposit/migrations/0014_auto_20180720_1221.py +swh/deposit/migrations/0015_depositrequest_typemigration.py swh/deposit/migrations/__init__.py swh/deposit/settings/__init__.py swh/deposit/settings/common.py swh/deposit/settings/development.py swh/deposit/settings/production.py swh/deposit/settings/testing.py swh/deposit/static/robots.txt swh/deposit/static/css/bootstrap-responsive.min.css swh/deposit/static/css/style.css swh/deposit/static/img/arrow-up-small.png swh/deposit/static/img/swh-logo-deposit.png swh/deposit/static/img/swh-logo-deposit.svg swh/deposit/static/img/icons/swh-logo-32x32.png swh/deposit/static/img/icons/swh-logo-deposit-180x180.png swh/deposit/static/img/icons/swh-logo-deposit-192x192.png swh/deposit/static/img/icons/swh-logo-deposit-270x270.png swh/deposit/templates/__init__.py swh/deposit/templates/homepage.html swh/deposit/templates/layout.html swh/deposit/templates/deposit/__init__.py swh/deposit/templates/deposit/content.xml swh/deposit/templates/deposit/deposit_receipt.xml swh/deposit/templates/deposit/error.xml swh/deposit/templates/deposit/service_document.xml swh/deposit/templates/deposit/status.xml swh/deposit/templates/rest_framework/api.html swh/deposit/tests/__init__.py swh/deposit/tests/common.py swh/deposit/tests/test_utils.py swh/deposit/tests/api/__init__.py swh/deposit/tests/api/test_common.py swh/deposit/tests/api/test_converters.py swh/deposit/tests/api/test_deposit.py swh/deposit/tests/api/test_deposit_atom.py swh/deposit/tests/api/test_deposit_binary.py swh/deposit/tests/api/test_deposit_check.py swh/deposit/tests/api/test_deposit_delete.py swh/deposit/tests/api/test_deposit_list.py swh/deposit/tests/api/test_deposit_multipart.py swh/deposit/tests/api/test_deposit_read_archive.py swh/deposit/tests/api/test_deposit_read_metadata.py swh/deposit/tests/api/test_deposit_status.py swh/deposit/tests/api/test_deposit_update.py swh/deposit/tests/api/test_deposit_update_status.py swh/deposit/tests/api/test_parser.py swh/deposit/tests/api/test_service_document.py swh/deposit/tests/loader/__init__.py swh/deposit/tests/loader/common.py swh/deposit/tests/loader/conftest.py swh/deposit/tests/loader/test_checker.py swh/deposit/tests/loader/test_client.py swh/deposit/tests/loader/test_loader.py swh/deposit/tests/loader/test_tasks.py \ No newline at end of file diff --git a/swh.deposit.egg-info/entry_points.txt b/swh.deposit.egg-info/entry_points.txt new file mode 100644 index 00000000..e0dcc7a1 --- /dev/null +++ b/swh.deposit.egg-info/entry_points.txt @@ -0,0 +1,4 @@ + + [console_scripts] + swh-deposit=swh.deposit.cli:main + \ No newline at end of file diff --git a/swh.deposit.egg-info/requires.txt b/swh.deposit.egg-info/requires.txt index 748cff14..dc10ec30 100644 --- a/swh.deposit.egg-info/requires.txt +++ b/swh.deposit.egg-info/requires.txt @@ -1,15 +1,18 @@ vcversioner click -Django<2.0 -djangorestframework xmltodict +iso8601 swh.core>=0.0.36 + +[server] +Django<2.0 +djangorestframework swh.loader.tar>=0.0.39 swh.loader.core>=0.0.32 swh.scheduler>=0.0.39 swh.model>=0.0.26 [testing] pytest<4 pytest-django swh.scheduler[testing] diff --git a/swh/deposit/api/common.py b/swh/deposit/api/common.py index 2694fd61..cb972894 100644 --- a/swh/deposit/api/common.py +++ b/swh/deposit/api/common.py @@ -1,883 +1,893 @@ -# Copyright (C) 2017-2018 The Software Heritage developers +# Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import hashlib from abc import ABCMeta, abstractmethod -from django.core.urlresolvers import reverse +from django.urls import reverse from django.http import HttpResponse from django.shortcuts import render from django.utils import timezone from rest_framework import status from rest_framework.authentication import BasicAuthentication from rest_framework.permissions import IsAuthenticated, AllowAny from rest_framework.views import APIView from swh.model import hashutil from ..config import ( SWHDefaultConfig, EDIT_SE_IRI, EM_IRI, CONT_FILE_IRI, ARCHIVE_KEY, METADATA_KEY, RAW_METADATA_KEY, STATE_IRI, DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_PARTIAL, - DEPOSIT_STATUS_LOAD_SUCCESS + DEPOSIT_STATUS_LOAD_SUCCESS, ARCHIVE_TYPE, METADATA_TYPE ) from ..errors import ( MAX_UPLOAD_SIZE_EXCEEDED, BAD_REQUEST, ERROR_CONTENT, CHECKSUM_MISMATCH, make_error_dict, MEDIATION_NOT_ALLOWED, make_error_response_from_dict, FORBIDDEN, - NOT_FOUND, make_error_response, METHOD_NOT_ALLOWED + NOT_FOUND, make_error_response, METHOD_NOT_ALLOWED, + ParserError, PARSING_ERROR ) from ..models import ( - Deposit, DepositRequest, DepositCollection, DepositRequestType, + Deposit, DepositRequest, DepositCollection, DepositClient ) from ..parsers import parse_xml ACCEPT_PACKAGINGS = ['http://purl.org/net/sword/package/SimpleZip'] ACCEPT_ARCHIVE_CONTENT_TYPES = ['application/zip', 'application/x-tar'] class SWHAPIView(APIView): """Mixin intended as a based API view to enforce the basic authentication check """ authentication_classes = (BasicAuthentication, ) permission_classes = (IsAuthenticated, ) class SWHPrivateAPIView(SWHAPIView): """Mixin intended as private api (so no authentication) based API view (for the private ones). """ authentication_classes = () permission_classes = (AllowAny, ) class SWHBaseDeposit(SWHDefaultConfig, SWHAPIView, metaclass=ABCMeta): """Base deposit request class sharing multiple common behaviors. """ - def __init__(self): - super().__init__() - deposit_request_types = DepositRequestType.objects.all() - self.deposit_request_types = { - type.name: type for type in deposit_request_types - } def _read_headers(self, req): """Read and unify the necessary headers from the request (those are not stored in the same location or not properly formatted). Args: req (Request): Input request Returns: Dictionary with the following keys (some associated values may be None): - content-type - content-length - in-progress - content-disposition - packaging - slug - on-behalf-of """ meta = req._request.META content_type = req.content_type content_length = meta.get('CONTENT_LENGTH') if content_length and isinstance(content_length, str): content_length = int(content_length) # final deposit if not provided in_progress = meta.get('HTTP_IN_PROGRESS', False) content_disposition = meta.get('HTTP_CONTENT_DISPOSITION') if isinstance(in_progress, str): in_progress = in_progress.lower() == 'true' content_md5sum = meta.get('HTTP_CONTENT_MD5') if content_md5sum: content_md5sum = bytes.fromhex(content_md5sum) packaging = meta.get('HTTP_PACKAGING') slug = meta.get('HTTP_SLUG') on_behalf_of = meta.get('HTTP_ON_BEHALF_OF') metadata_relevant = meta.get('HTTP_METADATA_RELEVANT') return { 'content-type': content_type, 'content-length': content_length, 'in-progress': in_progress, 'content-disposition': content_disposition, 'content-md5sum': content_md5sum, 'packaging': packaging, 'slug': slug, 'on-behalf-of': on_behalf_of, 'metadata-relevant': metadata_relevant, } def _compute_md5(self, filehandler): """Compute uploaded file's md5 sum. Args: filehandler (InMemoryUploadedFile): the file to compute the md5 hash Returns: the md5 checksum (str) """ h = hashlib.md5() for chunk in filehandler: h.update(chunk) return h.digest() def _deposit_put(self, deposit_id=None, in_progress=False, external_id=None): """Save/Update a deposit in db. Args: deposit_id (int): deposit identifier in_progress (dict): The deposit's status external_id (str): The external identifier to associate to the deposit Returns: The Deposit instance saved or updated. """ if in_progress is False: complete_date = timezone.now() status_type = DEPOSIT_STATUS_DEPOSITED else: complete_date = None status_type = DEPOSIT_STATUS_PARTIAL if not deposit_id: try: # find a deposit parent (same external id, status load # to success) deposit_parent = Deposit.objects.filter( external_id=external_id, status=DEPOSIT_STATUS_LOAD_SUCCESS).order_by('-id')[0:1].get() # noqa except Deposit.DoesNotExist: deposit_parent = None deposit = Deposit(collection=self._collection, external_id=external_id, complete_date=complete_date, status=status_type, client=self._client, parent=deposit_parent) else: deposit = Deposit.objects.get(pk=deposit_id) # update metadata deposit.complete_date = complete_date deposit.status = status_type deposit.save() return deposit def _deposit_request_put(self, deposit, deposit_request_data, replace_metadata=False, replace_archives=False): """Save a deposit request with metadata attached to a deposit. Args: deposit (Deposit): The deposit concerned by the request deposit_request_data (dict): The dictionary with at most 2 deposit request types (archive, metadata) to associate to the deposit replace_metadata (bool): Flag defining if we add or update existing metadata to the deposit replace_archives (bool): Flag defining if we add or update archives to existing deposit Returns: None """ if replace_metadata: DepositRequest.objects.filter( deposit=deposit, - type=self.deposit_request_types[METADATA_KEY]).delete() + type=METADATA_TYPE).delete() if replace_archives: DepositRequest.objects.filter( deposit=deposit, - type=self.deposit_request_types[ARCHIVE_KEY]).delete() + type=ARCHIVE_TYPE).delete() deposit_request = None archive_file = deposit_request_data.get(ARCHIVE_KEY) if archive_file: deposit_request = DepositRequest( - type=self.deposit_request_types[ARCHIVE_KEY], + type=ARCHIVE_TYPE, deposit=deposit, archive=archive_file) deposit_request.save() metadata = deposit_request_data.get(METADATA_KEY) if metadata: raw_metadata = deposit_request_data.get(RAW_METADATA_KEY) deposit_request = DepositRequest( - type=self.deposit_request_types[METADATA_KEY], + type=METADATA_TYPE, deposit=deposit, metadata=metadata, raw_metadata=raw_metadata) deposit_request.save() assert deposit_request is not None def _delete_archives(self, collection_name, deposit_id): """Delete archives reference from the deposit id. """ try: deposit = Deposit.objects.get(pk=deposit_id) except Deposit.DoesNotExist: return make_error_dict( NOT_FOUND, 'The deposit %s does not exist' % deposit_id) DepositRequest.objects.filter( deposit=deposit, - type=self.deposit_request_types[ARCHIVE_KEY]).delete() + type=ARCHIVE_TYPE).delete() return {} def _delete_deposit(self, collection_name, deposit_id): """Delete deposit reference. Args: collection_name (str): Client's name deposit_id (id): The deposit to delete Returns Empty dict when ok. Dict with error key to describe the failure. """ try: deposit = Deposit.objects.get(pk=deposit_id) except Deposit.DoesNotExist: return make_error_dict( NOT_FOUND, 'The deposit %s does not exist' % deposit_id) if deposit.collection.name != collection_name: summary = 'Cannot delete a deposit from another collection' description = "Deposit %s does not belong to the collection %s" % ( deposit_id, collection_name) return make_error_dict( BAD_REQUEST, summary=summary, verbose_description=description) DepositRequest.objects.filter(deposit=deposit).delete() deposit.delete() return {} def _check_preconditions_on(self, filehandler, md5sum, content_length=None): """Check preconditions on provided file are respected. That is the length and/or the md5sum hash match the file's content. Args: filehandler (InMemoryUploadedFile): The file to check md5sum (hex str): md5 hash expected from the file's content content_length (int): the expected length if provided. Returns: Either none if no error or a dictionary with a key error detailing the problem. """ if content_length: if content_length > self.config['max_upload_size']: return make_error_dict( MAX_UPLOAD_SIZE_EXCEEDED, 'Upload size limit exceeded (max %s bytes).' % self.config['max_upload_size'], 'Please consider sending the archive in ' 'multiple steps.') length = filehandler.size if length != content_length: return make_error_dict(status.HTTP_412_PRECONDITION_FAILED, 'Wrong length') if md5sum: _md5sum = self._compute_md5(filehandler) if _md5sum != md5sum: return make_error_dict( CHECKSUM_MISMATCH, 'Wrong md5 hash', 'The checksum sent %s and the actual checksum ' '%s does not match.' % (hashutil.hash_to_hex(md5sum), hashutil.hash_to_hex(_md5sum))) return None def _binary_upload(self, req, headers, collection_name, deposit_id=None, replace_metadata=False, replace_archives=False): """Binary upload routine. Other than such a request, a 415 response is returned. Args: req (Request): the request holding information to parse and inject in db headers (dict): request headers formatted collection_name (str): the associated client deposit_id (id): deposit identifier if provided replace_metadata (bool): 'Update or add' request to existing deposit. If False (default), this adds new metadata request to existing ones. Otherwise, this will replace existing metadata. replace_archives (bool): 'Update or add' request to existing deposit. If False (default), this adds new archive request to existing ones. Otherwise, this will replace existing archives. ones. Returns: In the optimal case a dict with the following keys: - deposit_id (int): Deposit identifier - deposit_date (date): Deposit date - archive: None (no archive is provided here) Otherwise, a dictionary with the key error and the associated failures, either: - 400 (bad request) if the request is not providing an external identifier - 413 (request entity too large) if the length of the archive exceeds the max size configured - 412 (precondition failed) if the length or md5 hash provided mismatch the reality of the archive - 415 (unsupported media type) if a wrong media type is provided """ content_length = headers['content-length'] if not content_length: return make_error_dict( BAD_REQUEST, 'CONTENT_LENGTH header is mandatory', 'For archive deposit, the ' 'CONTENT_LENGTH header must be sent.') content_disposition = headers['content-disposition'] if not content_disposition: return make_error_dict( BAD_REQUEST, 'CONTENT_DISPOSITION header is mandatory', 'For archive deposit, the ' 'CONTENT_DISPOSITION header must be sent.') packaging = headers['packaging'] if packaging and packaging not in ACCEPT_PACKAGINGS: return make_error_dict( BAD_REQUEST, 'Only packaging %s is supported' % ACCEPT_PACKAGINGS, 'The packaging provided %s is not supported' % packaging) filehandler = req.FILES['file'] precondition_status_response = self._check_preconditions_on( filehandler, headers['content-md5sum'], content_length) if precondition_status_response: return precondition_status_response external_id = headers['slug'] # actual storage of data archive_metadata = filehandler deposit = self._deposit_put(deposit_id=deposit_id, in_progress=headers['in-progress'], external_id=external_id) self._deposit_request_put( deposit, {ARCHIVE_KEY: archive_metadata}, replace_metadata=replace_metadata, replace_archives=replace_archives) return { 'deposit_id': deposit.id, 'deposit_date': deposit.reception_date, 'status': deposit.status, 'archive': filehandler.name, } def _read_metadata(self, metadata_stream): """Given a metadata stream, reads the metadata and returns both the parsed and the raw metadata. """ raw_metadata = metadata_stream.read() metadata = parse_xml(raw_metadata) return raw_metadata, metadata def _multipart_upload(self, req, headers, collection_name, deposit_id=None, replace_metadata=False, replace_archives=False): """Multipart upload supported with exactly: - 1 archive (zip) - 1 atom entry Other than such a request, a 415 response is returned. Args: req (Request): the request holding information to parse and inject in db headers (dict): request headers formatted collection_name (str): the associated client deposit_id (id): deposit identifier if provided replace_metadata (bool): 'Update or add' request to existing deposit. If False (default), this adds new metadata request to existing ones. Otherwise, this will replace existing metadata. replace_archives (bool): 'Update or add' request to existing deposit. If False (default), this adds new archive request to existing ones. Otherwise, this will replace existing archives. ones. Returns: In the optimal case a dict with the following keys: - deposit_id (int): Deposit identifier - deposit_date (date): Deposit date - archive: None (no archive is provided here) Otherwise, a dictionary with the key error and the associated failures, either: - 400 (bad request) if the request is not providing an external identifier - 412 (precondition failed) if the potentially md5 hash provided mismatch the reality of the archive - 413 (request entity too large) if the length of the archive exceeds the max size configured - 415 (unsupported media type) if a wrong media type is provided """ external_id = headers['slug'] content_types_present = set() data = { 'application/zip': None, # expected either zip 'application/x-tar': None, # or x-tar 'application/atom+xml': None, } for key, value in req.FILES.items(): fh = value if fh.content_type in content_types_present: return make_error_dict( ERROR_CONTENT, 'Only 1 application/zip (or application/x-tar) archive ' 'and 1 atom+xml entry is supported (as per sword2.0 ' 'specification)', 'You provided more than 1 application/(zip|x-tar) ' 'or more than 1 application/atom+xml content-disposition ' 'header in the multipart deposit') content_types_present.add(fh.content_type) data[fh.content_type] = fh if len(content_types_present) != 2: return make_error_dict( ERROR_CONTENT, 'You must provide both 1 application/zip (or ' 'application/x-tar) and 1 atom+xml entry for multipart ' 'deposit', 'You need to provide only 1 application/(zip|x-tar) ' 'and 1 application/atom+xml content-disposition header ' 'in the multipart deposit') filehandler = data['application/zip'] if not filehandler: filehandler = data['application/x-tar'] precondition_status_response = self._check_preconditions_on( filehandler, headers['content-md5sum']) if precondition_status_response: return precondition_status_response - raw_metadata, metadata = self._read_metadata( - data['application/atom+xml']) + try: + raw_metadata, metadata = self._read_metadata( + data['application/atom+xml']) + except ParserError: + return make_error_dict( + PARSING_ERROR, + 'Malformed xml metadata', + "The xml received is malformed. " + "Please ensure your metadata file is correctly formatted.") # actual storage of data deposit = self._deposit_put(deposit_id=deposit_id, in_progress=headers['in-progress'], external_id=external_id) deposit_request_data = { ARCHIVE_KEY: filehandler, METADATA_KEY: metadata, RAW_METADATA_KEY: raw_metadata, } self._deposit_request_put( deposit, deposit_request_data, replace_metadata, replace_archives) return { 'deposit_id': deposit.id, 'deposit_date': deposit.reception_date, 'archive': filehandler.name, 'status': deposit.status, } def _atom_entry(self, req, headers, collection_name, deposit_id=None, replace_metadata=False, replace_archives=False): """Atom entry deposit. Args: req (Request): the request holding information to parse and inject in db headers (dict): request headers formatted collection_name (str): the associated client deposit_id (id): deposit identifier if provided replace_metadata (bool): 'Update or add' request to existing deposit. If False (default), this adds new metadata request to existing ones. Otherwise, this will replace existing metadata. replace_archives (bool): 'Update or add' request to existing deposit. If False (default), this adds new archive request to existing ones. Otherwise, this will replace existing archives. ones. Returns: In the optimal case a dict with the following keys: - deposit_id: deposit id associated to the deposit - deposit_date: date of the deposit - archive: None (no archive is provided here) Otherwise, a dictionary with the key error and the associated failures, either: - 400 (bad request) if the request is not providing an external identifier - 400 (bad request) if the request's body is empty - 415 (unsupported media type) if a wrong media type is provided """ - raw_metadata, metadata = self._read_metadata(req.data) + try: + raw_metadata, metadata = self._read_metadata(req.data) + except ParserError: + return make_error_dict( + BAD_REQUEST, + 'Malformed xml metadata', + "The xml received is malformed. " + "Please ensure your metadata file is correctly formatted.") + if not metadata: return make_error_dict( BAD_REQUEST, 'Empty body request is not supported', 'Atom entry deposit is supposed to send for metadata. ' 'If the body is empty, there is no metadata.') external_id = metadata.get('external_identifier', headers['slug']) deposit = self._deposit_put(deposit_id=deposit_id, in_progress=headers['in-progress'], external_id=external_id) self._deposit_request_put( deposit, {METADATA_KEY: metadata, RAW_METADATA_KEY: raw_metadata}, replace_metadata, replace_archives) return { 'deposit_id': deposit.id, 'deposit_date': deposit.reception_date, 'archive': None, 'status': deposit.status, } def _empty_post(self, req, headers, collection_name, deposit_id): """Empty post to finalize an empty deposit. Args: req (Request): the request holding information to parse and inject in db headers (dict): request headers formatted collection_name (str): the associated client deposit_id (id): deposit identifier Returns: Dictionary of result with the deposit's id, the date it was completed and no archive. """ deposit = Deposit.objects.get(pk=deposit_id) deposit.complete_date = timezone.now() deposit.status = DEPOSIT_STATUS_DEPOSITED deposit.save() return { 'deposit_id': deposit_id, 'deposit_date': deposit.complete_date, 'status': deposit.status, 'archive': None, } def _make_iris(self, req, collection_name, deposit_id): """Define the IRI endpoints Args: req (Request): The initial request collection_name (str): client/collection's name deposit_id (id): Deposit identifier Returns: Dictionary of keys with the iris' urls. """ args = [collection_name, deposit_id] return { iri: req.build_absolute_uri(reverse(iri, args=args)) for iri in [EM_IRI, EDIT_SE_IRI, CONT_FILE_IRI, STATE_IRI] } def additional_checks(self, req, headers, collection_name, deposit_id=None): """Permit the child class to enrich additional checks. Returns: dict with 'error' detailing the problem. """ return {} def checks(self, req, collection_name, deposit_id=None): try: self._collection = DepositCollection.objects.get( name=collection_name) except DepositCollection.DoesNotExist: return make_error_dict( NOT_FOUND, 'Unknown collection name %s' % collection_name) username = req.user.username if username: # unauthenticated request can have the username empty try: self._client = DepositClient.objects.get(username=username) except DepositClient.DoesNotExist: return make_error_dict(NOT_FOUND, 'Unknown client name %s' % username) if self._collection.id not in self._client.collections: return make_error_dict( FORBIDDEN, 'Client %s cannot access collection %s' % ( username, collection_name)) if deposit_id: try: deposit = Deposit.objects.get(pk=deposit_id) except Deposit.DoesNotExist: return make_error_dict( NOT_FOUND, 'Deposit with id %s does not exist' % deposit_id) checks = self.restrict_access(req, deposit) if checks: return checks headers = self._read_headers(req) if headers['on-behalf-of']: return make_error_dict(MEDIATION_NOT_ALLOWED, 'Mediation is not supported.') checks = self.additional_checks(req, headers, collection_name, deposit_id) if 'error' in checks: return checks return {'headers': headers} def restrict_access(self, req, deposit=None): if deposit: if (req.method != 'GET' and deposit.status != DEPOSIT_STATUS_PARTIAL): summary = "You can only act on deposit with status '%s'" % ( DEPOSIT_STATUS_PARTIAL, ) description = "This deposit has status '%s'" % deposit.status return make_error_dict( BAD_REQUEST, summary=summary, verbose_description=description) def _basic_not_allowed_method(self, req, method): return make_error_response( req, METHOD_NOT_ALLOWED, '%s method is not supported on this endpoint' % method) def get(self, req, *args, **kwargs): return self._basic_not_allowed_method(req, 'GET') def post(self, req, *args, **kwargs): return self._basic_not_allowed_method(req, 'POST') def put(self, req, *args, **kwargs): return self._basic_not_allowed_method(req, 'PUT') def delete(self, req, *args, **kwargs): return self._basic_not_allowed_method(req, 'DELETE') class SWHGetDepositAPI(SWHBaseDeposit, metaclass=ABCMeta): """Mixin for class to support GET method. """ def get(self, req, collection_name, deposit_id, format=None): """Endpoint to create/add resources to deposit. Returns: 200 response when no error during routine occurred 400 if the deposit does not belong to the collection 404 if the deposit or the collection does not exist """ checks = self.checks(req, collection_name, deposit_id) if 'error' in checks: return make_error_response_from_dict(req, checks['error']) r = self.process_get( req, collection_name, deposit_id) if isinstance(r, tuple): status, content, content_type = r return HttpResponse(content, status=status, content_type=content_type) return r @abstractmethod def process_get(self, req, collection_name, deposit_id): """Routine to deal with the deposit's get processing. Returns: Tuple status, stream of content, content-type """ pass class SWHPostDepositAPI(SWHBaseDeposit, metaclass=ABCMeta): """Mixin for class to support DELETE method. """ def post(self, req, collection_name, deposit_id=None, format=None): """Endpoint to create/add resources to deposit. Returns: 204 response when no error during routine occurred. 400 if the deposit does not belong to the collection 404 if the deposit or the collection does not exist """ checks = self.checks(req, collection_name, deposit_id) if 'error' in checks: return make_error_response_from_dict(req, checks['error']) headers = checks['headers'] _status, _iri_key, data = self.process_post( req, headers, collection_name, deposit_id) error = data.get('error') if error: return make_error_response_from_dict(req, error) data['packagings'] = ACCEPT_PACKAGINGS iris = self._make_iris(req, collection_name, data['deposit_id']) data.update(iris) response = render(req, 'deposit/deposit_receipt.xml', context=data, content_type='application/xml', status=_status) response._headers['location'] = 'Location', data[_iri_key] return response @abstractmethod def process_post(self, req, headers, collection_name, deposit_id=None): """Routine to deal with the deposit's processing. Returns Tuple of: - response status code (200, 201, etc...) - key iri (EM_IRI, EDIT_SE_IRI, etc...) - dictionary of the processing result """ pass class SWHPutDepositAPI(SWHBaseDeposit, metaclass=ABCMeta): """Mixin for class to support PUT method. """ def put(self, req, collection_name, deposit_id, format=None): """Endpoint to update deposit resources. Returns: 204 response when no error during routine occurred. 400 if the deposit does not belong to the collection 404 if the deposit or the collection does not exist """ checks = self.checks(req, collection_name, deposit_id) if 'error' in checks: return make_error_response_from_dict(req, checks['error']) headers = checks['headers'] data = self.process_put(req, headers, collection_name, deposit_id) error = data.get('error') if error: return make_error_response_from_dict(req, error) return HttpResponse(status=status.HTTP_204_NO_CONTENT) @abstractmethod def process_put(self, req, headers, collection_name, deposit_id): """Routine to deal with updating a deposit in some way. Returns dictionary of the processing result """ pass class SWHDeleteDepositAPI(SWHBaseDeposit, metaclass=ABCMeta): """Mixin for class to support DELETE method. """ def delete(self, req, collection_name, deposit_id): """Endpoint to delete some deposit's resources (archives, deposit). Returns: 204 response when no error during routine occurred. 400 if the deposit does not belong to the collection 404 if the deposit or the collection does not exist """ checks = self.checks(req, collection_name, deposit_id) if 'error' in checks: return make_error_response_from_dict(req, checks['error']) data = self.process_delete(req, collection_name, deposit_id) error = data.get('error') if error: return make_error_response_from_dict(req, error) return HttpResponse(status=status.HTTP_204_NO_CONTENT) @abstractmethod def process_delete(self, req, collection_name, deposit_id): """Routine to delete a resource. This is mostly not allowed except for the EM_IRI (cf. .api.deposit_update.SWHUpdateArchiveDeposit) """ pass diff --git a/swh/deposit/api/deposit_status.py b/swh/deposit/api/deposit_status.py index 240db47d..f800bd41 100644 --- a/swh/deposit/api/deposit_status.py +++ b/swh/deposit/api/deposit_status.py @@ -1,65 +1,55 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.shortcuts import render from rest_framework import status from .common import SWHBaseDeposit from .converters import convert_status_detail from ..errors import NOT_FOUND, make_error_response from ..errors import make_error_response_from_dict from ..models import DEPOSIT_STATUS_DETAIL, Deposit class SWHDepositStatus(SWHBaseDeposit): """Deposit status. What's known as 'State IRI' in the sword specification. HTTP verbs supported: GET """ def get(self, req, collection_name, deposit_id, format=None): checks = self.checks(req, collection_name, deposit_id) if 'error' in checks: return make_error_response_from_dict(req, checks['error']) try: deposit = Deposit.objects.get(pk=deposit_id) if deposit.collection.name != collection_name: raise Deposit.DoesNotExist except Deposit.DoesNotExist: return make_error_response( req, NOT_FOUND, 'deposit %s does not belong to collection %s' % ( deposit_id, collection_name)) status_detail = convert_status_detail(deposit.status_detail) if not status_detail: status_detail = DEPOSIT_STATUS_DETAIL[deposit.status] context = { 'deposit_id': deposit.id, - 'status': deposit.status, 'status_detail': status_detail, - 'swh_id': None, - 'swh_id_context': None, - 'swh_anchor_id': None, - 'swh_anchor_id_context': None, } - - if deposit.swh_id: - context['swh_id'] = deposit.swh_id - if deposit.swh_id_context: - context['swh_id_context'] = deposit.swh_id_context - if deposit.swh_anchor_id: - context['swh_anchor_id'] = deposit.swh_anchor_id - if deposit.swh_anchor_id_context: - context['swh_anchor_id_context'] = deposit.swh_anchor_id_context + keys = ('status', 'swh_id', 'swh_id_context', 'swh_anchor_id', + 'swh_anchor_id_context', 'external_id') + for k in keys: + context[k] = getattr(deposit, k, None) return render(req, 'deposit/status.xml', context=context, content_type='application/xml', status=status.HTTP_200_OK) diff --git a/swh/deposit/api/private/__init__.py b/swh/deposit/api/private/__init__.py index b1c5fb98..986a5351 100644 --- a/swh/deposit/api/private/__init__.py +++ b/swh/deposit/api/private/__init__.py @@ -1,51 +1,51 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.deposit import utils from ...config import METADATA_TYPE from ...models import DepositRequest, Deposit class DepositReadMixin: """Deposit Read mixin """ def _deposit_requests(self, deposit, request_type): """Given a deposit, yields its associated deposit_request Args: deposit (Deposit): Deposit to list requests for - request_type (str): Archive or metadata type + request_type (str): 'archive' or 'metadata' Yields: deposit requests of type request_type associated to the deposit """ if isinstance(deposit, int): deposit = Deposit.objects.get(pk=deposit) deposit_requests = DepositRequest.objects.filter( - type=self.deposit_request_types[request_type], + type=request_type, deposit=deposit).order_by('id') for deposit_request in deposit_requests: yield deposit_request def _metadata_get(self, deposit): """Given a deposit, aggregate all metadata requests. Args: deposit (Deposit): The deposit instance to extract metadata from. Returns: metadata dict from the deposit. """ metadata = (m.metadata for m in self._deposit_requests( deposit, request_type=METADATA_TYPE)) return utils.merge(*metadata) diff --git a/swh/deposit/api/private/deposit_check.py b/swh/deposit/api/private/deposit_check.py index cefcd926..4b170aad 100644 --- a/swh/deposit/api/private/deposit_check.py +++ b/swh/deposit/api/private/deposit_check.py @@ -1,246 +1,209 @@ -# Copyright (C) 2017-2018 The Software Heritage developers +# Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json import re import tarfile import zipfile from rest_framework import status from . import DepositReadMixin from ..common import SWHGetDepositAPI, SWHPrivateAPIView from ...config import DEPOSIT_STATUS_VERIFIED, DEPOSIT_STATUS_REJECTED from ...config import ARCHIVE_TYPE from ...models import Deposit MANDATORY_FIELDS_MISSING = 'Mandatory fields are missing' ALTERNATE_FIELDS_MISSING = 'Mandatory alternate fields are missing' -INCOMPATIBLE_URL_FIELDS = "At least one url field must be compatible with the client's domain name" # noqa MANDATORY_ARCHIVE_UNREADABLE = 'At least one of its associated archives is not readable' # noqa MANDATORY_ARCHIVE_INVALID = 'Mandatory archive is invalid (i.e contains only one archive)' # noqa MANDATORY_ARCHIVE_UNSUPPORTED = 'Mandatory archive type is not supported' MANDATORY_ARCHIVE_MISSING = 'Deposit without archive is rejected' ARCHIVE_EXTENSIONS = [ 'zip', 'tar', 'tar.gz', 'xz', 'tar.xz', 'bz2', 'tar.bz2', 'Z', 'tar.Z', 'tgz', '7z' ] PATTERN_ARCHIVE_EXTENSION = re.compile( r'.*\.(%s)$' % '|'.join(ARCHIVE_EXTENSIONS)) class SWHChecksDeposit(SWHGetDepositAPI, SWHPrivateAPIView, DepositReadMixin): """Dedicated class to read a deposit's raw archives content. Only GET is supported. """ def _check_deposit_archives(self, deposit): """Given a deposit, check each deposit request of type archive. Args: The deposit to check archives for Returns tuple (status, error_detail): True, None if all archives are ok, (False, ) otherwise. """ requests = list(self._deposit_requests( deposit, request_type=ARCHIVE_TYPE)) if len(requests) == 0: # no associated archive is refused return False, { 'archive': [{ 'summary': MANDATORY_ARCHIVE_MISSING, }] } errors = [] for archive_request in requests: check, error_message = self._check_archive(archive_request) if not check: errors.append({ 'summary': error_message, 'fields': [archive_request.id] }) if not errors: return True, None return False, { 'archive': errors } def _check_archive(self, archive_request): """Check that a deposit associated archive is ok: - readable - supported archive format - valid content: the archive does not contain a single archive file If any of those checks are not ok, return the corresponding failing check. Args: archive_path (DepositRequest): Archive to check Returns: (True, None) if archive is check compliant, (False, ) otherwise. """ archive_path = archive_request.archive.path try: if zipfile.is_zipfile(archive_path): with zipfile.ZipFile(archive_path) as f: files = f.namelist() elif tarfile.is_tarfile(archive_path): with tarfile.open(archive_path) as f: files = f.getnames() else: return False, MANDATORY_ARCHIVE_UNSUPPORTED except Exception: return False, MANDATORY_ARCHIVE_UNREADABLE if len(files) > 1: return True, None element = files[0] if PATTERN_ARCHIVE_EXTENSION.match(element): # archive in archive! return False, MANDATORY_ARCHIVE_INVALID return True, None def _check_metadata(self, metadata): """Check to execute on all metadata for mandatory field presence. Args: metadata (dict): Metadata dictionary to check for mandatory fields Returns: tuple (status, error_detail): True, None if metadata are ok (False, ) otherwise. """ required_fields = { - 'url': False, - 'external_identifier': False, 'author': False, } alternate_fields = { ('name', 'title'): False, # alternate field, at least one # of them must be present } for field, value in metadata.items(): for name in required_fields: if name in field: required_fields[name] = True for possible_names in alternate_fields: for possible_name in possible_names: if possible_name in field: alternate_fields[possible_names] = True continue mandatory_result = [k for k, v in required_fields.items() if not v] optional_result = [ ' or '.join(k) for k, v in alternate_fields.items() if not v] if mandatory_result == [] and optional_result == []: return True, None detail = [] if mandatory_result != []: detail.append({ 'summary': MANDATORY_FIELDS_MISSING, 'fields': mandatory_result }) if optional_result != []: detail.append({ 'summary': ALTERNATE_FIELDS_MISSING, 'fields': optional_result, }) return False, { 'metadata': detail } - def _check_url(self, client_domain, metadata): - """Check compatibility between client_domain and url field in metadata - - Args: - client_domain (str): url associated with the deposit's client - metadata (dict): Metadata where to find url - - Returns: - tuple (status, error_detail): True, None if url associated - with the deposit's client is ok, (False, - ) otherwise. - - """ - url_fields = [] - for field in metadata: - if 'url' in field: - if client_domain in metadata[field]: - return True, None - url_fields.append(field) - - detail = { - 'url': { - 'summary': INCOMPATIBLE_URL_FIELDS, - } - } - if url_fields: - detail['url']['fields'] = url_fields - return False, detail - def process_get(self, req, collection_name, deposit_id): """Build a unique tarball from the multiple received and stream that content to the client. Args: req (Request): collection_name (str): Collection owning the deposit deposit_id (id): Deposit concerned by the reading Returns: Tuple status, stream of content, content-type """ deposit = Deposit.objects.get(pk=deposit_id) - client_domain = deposit.client.domain metadata = self._metadata_get(deposit) problems = {} # will check each deposit's associated request (both of type # archive and metadata) for errors archives_status, error_detail = self._check_deposit_archives(deposit) if not archives_status: problems.update(error_detail) metadata_status, error_detail = self._check_metadata(metadata) if not metadata_status: problems.update(error_detail) - url_status, error_detail = self._check_url(client_domain, metadata) - if not url_status: - problems.update(error_detail) - - deposit_status = archives_status and metadata_status and url_status + deposit_status = archives_status and metadata_status # if any problems arose, the deposit is rejected if not deposit_status: deposit.status = DEPOSIT_STATUS_REJECTED deposit.status_detail = problems response = { 'status': deposit.status, 'details': deposit.status_detail, } else: deposit.status = DEPOSIT_STATUS_VERIFIED response = { 'status': deposit.status, } deposit.save() return status.HTTP_200_OK, json.dumps(response), 'application/json' diff --git a/swh/deposit/api/private/deposit_read.py b/swh/deposit/api/private/deposit_read.py index 6d28f106..1df08f36 100644 --- a/swh/deposit/api/private/deposit_read.py +++ b/swh/deposit/api/private/deposit_read.py @@ -1,209 +1,234 @@ -# Copyright (C) 2017-2018 The Software Heritage developers +# Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json import os import shutil import tempfile from contextlib import contextmanager from django.http import FileResponse from rest_framework import status from swh.core import tarball from swh.model import identifiers +from swh.deposit.utils import normalize_date +from swh.deposit import utils from . import DepositReadMixin from ...config import SWH_PERSON, ARCHIVE_TYPE from ..common import SWHGetDepositAPI, SWHPrivateAPIView from ...models import Deposit @contextmanager def aggregate_tarballs(extraction_dir, archive_paths): """Aggregate multiple tarballs into one and returns this new archive's path. Args: extraction_dir (path): Path to use for the tarballs computation archive_paths ([str]): Deposit's archive paths Returns: Tuple (directory to clean up, archive path (aggregated or not)) """ if len(archive_paths) > 1: # need to rebuild one archive from multiple ones os.makedirs(extraction_dir, 0o755, exist_ok=True) dir_path = tempfile.mkdtemp(prefix='swh.deposit-', dir=extraction_dir) # root folder to build an aggregated tarball aggregated_tarball_rootdir = os.path.join(dir_path, 'aggregate') os.makedirs(aggregated_tarball_rootdir, 0o755, exist_ok=True) # uncompress in a temporary location all archives for archive_path in archive_paths: tarball.uncompress(archive_path, aggregated_tarball_rootdir) # Aggregate into one big tarball the multiple smaller ones temp_tarpath = tarball.compress( aggregated_tarball_rootdir + '.zip', nature='zip', dirpath_or_files=aggregated_tarball_rootdir) # can already clean up temporary directory shutil.rmtree(aggregated_tarball_rootdir) try: yield temp_tarpath finally: shutil.rmtree(dir_path) else: # only 1 archive, no need to do fancy actions (and no cleanup step) yield archive_paths[0] class SWHDepositReadArchives(SWHGetDepositAPI, SWHPrivateAPIView, DepositReadMixin): """Dedicated class to read a deposit's raw archives content. Only GET is supported. """ ADDITIONAL_CONFIG = { 'extraction_dir': ('str', '/tmp/swh-deposit/archive/'), } def __init__(self): super().__init__() self.extraction_dir = self.config['extraction_dir'] if not os.path.exists(self.extraction_dir): os.makedirs(self.extraction_dir) def process_get(self, req, collection_name, deposit_id): """Build a unique tarball from the multiple received and stream that content to the client. Args: req (Request): collection_name (str): Collection owning the deposit deposit_id (id): Deposit concerned by the reading Returns: Tuple status, stream of content, content-type """ archive_paths = [r.archive.path for r in self._deposit_requests( deposit_id, request_type=ARCHIVE_TYPE)] with aggregate_tarballs(self.extraction_dir, archive_paths) as path: return FileResponse(open(path, 'rb'), status=status.HTTP_200_OK, content_type='application/octet-stream') class SWHDepositReadMetadata(SWHGetDepositAPI, SWHPrivateAPIView, DepositReadMixin): """Class in charge of aggregating metadata on a deposit. """ ADDITIONAL_CONFIG = { 'provider': ('dict', { # 'provider_name': '', # those are not set since read from the # 'provider_url': '', # deposit's client 'provider_type': 'deposit_client', 'metadata': {} }), 'tool': ('dict', { 'name': 'swh-deposit', 'version': '0.0.1', 'configuration': { 'sword_version': '2' } }) } def __init__(self): super().__init__() self.provider = self.config['provider'] self.tool = self.config['tool'] - def _retrieve_url(self, deposit, metadata): - client_domain = deposit.client.domain - for field in metadata: - if 'url' in field: - if client_domain in metadata[field]: - return metadata[field] + def _normalize_dates(self, deposit, metadata): + """Normalize the date to use as a tuple of author date, committer date + from the incoming metadata. + + Args: + deposit (Deposit): Deposit model representation + metadata (Dict): Metadata dict representation + + Returns: + Tuple of author date, committer date. Those dates are + swh normalized. + + """ + commit_date = metadata.get('codemeta:datePublished') + author_date = metadata.get('codemeta:dateCreated') + + if author_date and commit_date: + pass + elif commit_date: + author_date = commit_date + elif author_date: + commit_date = author_date + else: + author_date = deposit.complete_date + commit_date = deposit.complete_date + return ( + normalize_date(author_date), + normalize_date(commit_date) + ) def metadata_read(self, deposit): """Read and aggregate multiple data on deposit into one unified data dictionary. Args: deposit (Deposit): Deposit concerned by the data aggregation. Returns: Dictionary of data representing the deposit to inject in swh. """ - data = {} metadata = self._metadata_get(deposit) - # create origin_url from metadata only after deposit_check validates it - origin_url = self._retrieve_url(deposit, metadata) # Read information metadata - data['origin'] = { - 'type': 'deposit', - 'url': origin_url + data = { + 'origin': { + 'type': 'deposit', + 'url': utils.origin_url_from(deposit), + } } # revision fullname = deposit.client.username author_committer = SWH_PERSON # metadata provider self.provider['provider_name'] = deposit.client.last_name self.provider['provider_url'] = deposit.client.provider_url revision_type = 'tar' revision_msg = '%s: Deposit %s in collection %s' % ( fullname, deposit.id, deposit.collection.name) - complete_date = identifiers.normalize_timestamp(deposit.complete_date) + + author_date, commit_date = self._normalize_dates(deposit, metadata) data['revision'] = { 'synthetic': True, - 'date': complete_date, - 'committer_date': complete_date, + 'date': author_date, + 'committer_date': commit_date, 'author': author_committer, 'committer': author_committer, 'type': revision_type, 'message': revision_msg, 'metadata': metadata, } if deposit.parent: swh_persistent_id = deposit.parent.swh_id persistent_identifier = identifiers.parse_persistent_identifier( swh_persistent_id) parent_revision = persistent_identifier.object_id data['revision']['parents'] = [parent_revision] data['branch_name'] = 'master' data['origin_metadata'] = { 'provider': self.provider, 'tool': self.tool, 'metadata': metadata } return data def process_get(self, req, collection_name, deposit_id): deposit = Deposit.objects.get(pk=deposit_id) data = self.metadata_read(deposit) d = {} if data: d = json.dumps(data) return status.HTTP_200_OK, d, 'application/json' diff --git a/swh/deposit/api/service_document.py b/swh/deposit/api/service_document.py index 0b04103a..9b79065c 100644 --- a/swh/deposit/api/service_document.py +++ b/swh/deposit/api/service_document.py @@ -1,33 +1,33 @@ -# Copyright (C) 2017-2018 The Software Heritage developers +# Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.shortcuts import render -from django.core.urlresolvers import reverse +from django.urls import reverse from .common import SWHBaseDeposit, ACCEPT_PACKAGINGS from .common import ACCEPT_ARCHIVE_CONTENT_TYPES from ..config import COL_IRI from ..models import DepositClient, DepositCollection class SWHServiceDocument(SWHBaseDeposit): def get(self, req, *args, **kwargs): client = DepositClient.objects.get(username=req.user) collections = {} for col_id in client.collections: col = DepositCollection.objects.get(pk=col_id) col_uri = req.build_absolute_uri(reverse(COL_IRI, args=[col.name])) collections[col.name] = col_uri context = { 'max_upload_size': self.config['max_upload_size'], 'accept_packagings': ACCEPT_PACKAGINGS, 'accept_content_types': ACCEPT_ARCHIVE_CONTENT_TYPES, 'collections': collections, } return render(req, 'deposit/service_document.xml', context, content_type='application/xml') diff --git a/swh/deposit/cli/__init__.py b/swh/deposit/cli/__init__.py new file mode 100644 index 00000000..5a209626 --- /dev/null +++ b/swh/deposit/cli/__init__.py @@ -0,0 +1,37 @@ +# Copyright (C) 2017-2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import click +import logging + +logger = logging.getLogger(__name__) + + +CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help']) + + +@click.group(context_settings=CONTEXT_SETTINGS) +@click.option('--log-level', '-l', default='INFO', + type=click.Choice(logging._nameToLevel.keys()), + help="Log level (default to INFO)") +@click.pass_context +def cli(ctx, log_level): + logger.setLevel(log_level) + ctx.ensure_object(dict) + + +def main(): + logging.basicConfig() + from . import deposit # noqa + try: + from . import admin # noqa + except ImportError: # server part is optional + pass + + return cli(auto_envvar_prefix='SWH_DEPOSIT') + + +if __name__ == '__main__': + main() diff --git a/swh/deposit/cli/admin.py b/swh/deposit/cli/admin.py new file mode 100644 index 00000000..9b918335 --- /dev/null +++ b/swh/deposit/cli/admin.py @@ -0,0 +1,178 @@ +# Copyright (C) 2017-2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import click + +from swh.deposit.config import setup_django_for +from swh.deposit.cli import cli + + +@cli.group('admin') +@click.option('--config-file', '-C', default=None, + type=click.Path(exists=True, dir_okay=False,), + help="Optional extra configuration file.") +@click.option('--platform', default='development', + type=click.Choice(['development', 'production']), + help='development or production platform') +@click.pass_context +def admin(ctx, config_file, platform): + """Server administration tasks (manipulate user or collections)""" + # configuration happens here + setup_django_for(platform, config_file=config_file) + + +@admin.group('user') +@click.pass_context +def user(ctx): + """Manipulate user.""" + # configuration happens here + pass + + +def _create_collection(name): + """Create the collection with name if it does not exist. + + Args: + name (str): collection's name + + Returns: + collection (DepositCollection): the existing collection object + (created or not) + + """ + # to avoid loading too early django namespaces + from swh.deposit.models import DepositCollection + + try: + collection = DepositCollection.objects.get(name=name) + click.echo('Collection %s exists, nothing to do.' % name) + except DepositCollection.DoesNotExist: + click.echo('Create new collection %s' % name) + collection = DepositCollection.objects.create(name=name) + click.echo('Collection %s created' % name) + return collection + + +@user.command('create') +@click.option('--username', required=True, help="User's name") +@click.option('--password', required=True, + help="Desired user's password (plain).") +@click.option('--firstname', default='', help="User's first name") +@click.option('--lastname', default='', help="User's last name") +@click.option('--email', default='', help="User's email") +@click.option('--collection', help="User's collection") +@click.option('--provider-url', default='', help="Provider URL") +@click.option('--domain', help="The domain") +@click.pass_context +def user_create(ctx, username, password, firstname, lastname, email, + collection, provider_url, domain): + """Create a user with some needed information (password, collection) + + If the collection does not exist, the collection is then created + alongside. + + The password is stored encrypted using django's utilies. + + """ + # to avoid loading too early django namespaces + from swh.deposit.models import DepositClient + + # If collection is not provided, fallback to username + if not collection: + collection = username + click.echo('collection: %s' % collection) + # create the collection if it does not exist + collection = _create_collection(collection) + + # user create/update + try: + user = DepositClient.objects.get(username=username) + click.echo('User %s exists, updating information.' % user) + user.set_password(password) + except DepositClient.DoesNotExist: + click.echo('Create new user %s' % username) + user = DepositClient.objects.create_user( + username=username, + password=password) + + user.collections = [collection.id] + user.first_name = firstname + user.last_name = lastname + user.email = email + user.is_active = True + user.provider_url = provider_url + user.domain = domain + user.save() + + click.echo('Information registered for user %s' % user) + + +@user.command('list') +@click.pass_context +def user_list(ctx): + """List existing users. + + This entrypoint is not paginated yet as there is not a lot of + entry. + + """ + # to avoid loading too early django namespaces + from swh.deposit.models import DepositClient + users = DepositClient.objects.all() + if not users: + output = 'Empty user list' + else: + output = '\n'.join((user.username for user in users)) + click.echo(output) + + +@user.command('exists') +@click.argument('username', required=True) +@click.pass_context +def user_exists(ctx, username): + """Check if user exists. + """ + # to avoid loading too early django namespaces + from swh.deposit.models import DepositClient + try: + DepositClient.objects.get(username=username) + click.echo('User %s exists.' % username) + ctx.exit(0) + except DepositClient.DoesNotExist: + click.echo('User %s does not exists.' % username) + ctx.exit(1) + + +@admin.group('collection') +@click.pass_context +def collection(ctx): + """Manipulate collections.""" + pass + + +@collection.command('create') +@click.option('--name', required=True, help="Collection's name") +@click.pass_context +def collection_create(ctx, name): + _create_collection(name) + + +@collection.command('list') +@click.pass_context +def collection_list(ctx): + """List existing collections. + + This entrypoint is not paginated yet as there is not a lot of + entry. + + """ + # to avoid loading too early django namespaces + from swh.deposit.models import DepositCollection + collections = DepositCollection.objects.all() + if not collections: + output = 'Empty collection list' + else: + output = '\n'.join((col.name for col in collections)) + click.echo(output) diff --git a/swh/deposit/cli/deposit.py b/swh/deposit/cli/deposit.py new file mode 100644 index 00000000..d8db2592 --- /dev/null +++ b/swh/deposit/cli/deposit.py @@ -0,0 +1,353 @@ +# Copyright (C) 2017-2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import os +import logging +import tempfile +import uuid + +import click +import xmltodict + +from swh.deposit.client import PublicApiDepositClient +from swh.deposit.cli import cli + + +logger = logging.getLogger(__name__) + + +class InputError(ValueError): + """Input script error + + """ + pass + + +def generate_slug(): + """Generate a slug (sample purposes). + + """ + return str(uuid.uuid4()) + + +def generate_metadata_file(name, external_id, authors): + """Generate a temporary metadata file with the minimum required metadata + + This generates a xml file in a temporary location and returns the + path to that file. + + This is up to the client of that function to clean up the + temporary file. + + Args: + name (str): Software's name + external_id (str): External identifier (slug) or generated one + authors (List[str]): List of author names + + Returns: + Filepath to the metadata generated file + + """ + _, tmpfile = tempfile.mkstemp(prefix='swh.deposit.cli.') + + # generate a metadata file with the minimum required metadata + codemetadata = { + 'entry': { + '@xmlns': "http://www.w3.org/2005/Atom", + '@xmlns:codemeta': "https://doi.org/10.5063/SCHEMA/CODEMETA-2.0", + 'codemeta:name': name, + 'codemeta:identifier': external_id, + 'codemeta:author': [{ + 'codemeta:name': author_name + } for author_name in authors], + }, + } + + logging.debug('Temporary file: %s', tmpfile) + logging.debug('Metadata dict to generate as xml: %s', codemetadata) + s = xmltodict.unparse(codemetadata, pretty=True) + logging.debug('Metadata dict as xml generated: %s', s) + with open(tmpfile, 'w') as fp: + fp.write(s) + return tmpfile + + +def _cleanup_tempfile(config): + """Clean up the temporary metadata file generated. + + Args: + + config (Dict): A configuration dict with 2 important keys for + that routine, 'cleanup_tempfile' (bool) and 'metadata' (path + to eventually clean up) + + """ + if config['cleanup_tempfile']: + path = config['metadata'] + if os.path.exists(path): + os.unlink(path) + + +def client_command_parse_input( + username, password, archive, metadata, + archive_deposit, metadata_deposit, + collection, slug, partial, deposit_id, replace, + url, status, name, authors): + """Parse the client subcommand options and make sure the combination + is acceptable*. If not, an InputError exception is raised + explaining the issue. + + By acceptable, we mean: + + - A multipart deposit (create or update) needs both an + existing software archive and an existing metadata file + + - A binary deposit (create/update) needs an existing + software archive + + - A metadata deposit (create/update) needs an existing + metadata file + + - A deposit update needs a deposit_id to be provided + + This won't prevent all failure cases though. The remaining + errors are already dealt with the underlying api client. + + Raises: + InputError explaining the issue + + Returns: + dict with the following keys: + + 'archive': the software archive to deposit + 'username': username + 'password': associated password + 'metadata': the metadata file to deposit + 'collection': the username's associated client + 'slug': the slug or external id identifying the deposit to make + 'partial': if the deposit is partial or not + 'client': instantiated class + 'url': deposit's server main entry point + 'deposit_type': deposit's type (binary, multipart, metadata) + 'deposit_id': optional deposit identifier + + """ + cleanup_tempfile = False + + try: + if status and not deposit_id: + raise InputError("Deposit id must be provided for status check") + + if status and deposit_id: # status is higher priority over deposit + archive_deposit = False + metadata_deposit = False + archive = None + metadata = None + + if archive_deposit and metadata_deposit: + # too many flags use, remove redundant ones (-> multipart deposit) + archive_deposit = False + metadata_deposit = False + + if archive and not os.path.exists(archive): + raise InputError('Software Archive %s must exist!' % archive) + + if not slug: # generate one as this is mandatory + slug = generate_slug() + + if archive and not metadata: # we need to have the metadata + if name and authors: + metadata = generate_metadata_file(name, slug, authors) + cleanup_tempfile = True + else: + raise InputError('Either metadata deposit file or (`--name` ' + ' and `--author`) fields must be provided') + + if metadata_deposit: + archive = None + + if archive_deposit: + metadata = None + + if metadata_deposit and not metadata: + raise InputError( + "Metadata deposit filepath must be provided for metadata " + "deposit") + + if metadata and not os.path.exists(metadata): + raise InputError('Software Archive metadata %s must exist!' % ( + metadata, )) + + if not status and not archive and not metadata: + raise InputError( + 'Please provide an actionable command. See --help for more ' + 'information.') + + if replace and not deposit_id: + raise InputError( + 'To update an existing deposit, you must provide its id') + + client = PublicApiDepositClient({ + 'url': url, + 'auth': { + 'username': username, + 'password': password + }, + }) + + if not collection: + # retrieve user's collection + sd_content = client.service_document() + if 'error' in sd_content: + raise InputError('Service document retrieval: %s' % ( + sd_content['error'], )) + collection = sd_content[ + 'service']['workspace']['collection']['sword:name'] + + return { + 'archive': archive, + 'username': username, + 'password': password, + 'metadata': metadata, + 'cleanup_tempfile': cleanup_tempfile, + 'collection': collection, + 'slug': slug, + 'in_progress': partial, + 'client': client, + 'url': url, + 'deposit_id': deposit_id, + 'replace': replace, + } + except Exception: # to be clean, cleanup prior to raise + _cleanup_tempfile({ + 'cleanup_tempfile': cleanup_tempfile, + 'metadata': metadata + }) + raise + + +def _subdict(d, keys): + 'return a dict from d with only given keys' + return {k: v for k, v in d.items() if k in keys} + + +def deposit_status(config, logger): + logger.debug('Status deposit') + keys = ('collection', 'deposit_id') + client = config['client'] + return client.deposit_status( + **_subdict(config, keys)) + + +def deposit_create(config, logger): + """Delegate the actual deposit to the deposit client. + + """ + logger.debug('Create deposit') + + client = config['client'] + keys = ('collection', 'archive', 'metadata', 'slug', 'in_progress') + return client.deposit_create( + **_subdict(config, keys)) + + +def deposit_update(config, logger): + """Delegate the actual deposit to the deposit client. + + """ + logger.debug('Update deposit') + + client = config['client'] + keys = ('collection', 'deposit_id', 'archive', 'metadata', + 'slug', 'in_progress', 'replace') + return client.deposit_update( + **_subdict(config, keys)) + + +@cli.command() +@click.option('--username', required=1, + help="(Mandatory) User's name") +@click.option('--password', required=1, + help="(Mandatory) User's associated password") +@click.option('--archive', + help='(Optional) Software archive to deposit') +@click.option('--metadata', + help="(Optional) Path to xml metadata file. If not provided, this will use a file named .metadata.xml") # noqa +@click.option('--archive-deposit/--no-archive-deposit', default=False, + help='(Optional) Software archive only deposit') +@click.option('--metadata-deposit/--no-metadata-deposit', default=False, + help='(Optional) Metadata only deposit') +@click.option('--collection', + help="(Optional) User's collection. If not provided, this will be fetched.") # noqa +@click.option('--slug', + help="""(Optional) External system information identifier. If not provided, it will be generated""") # noqa +@click.option('--partial/--no-partial', default=False, + help='(Optional) The deposit will be partial, other deposits will have to take place to finalize it.') # noqa +@click.option('--deposit-id', default=None, + help='(Optional) Update an existing partial deposit with its identifier') # noqa +@click.option('--replace/--no-replace', default=False, + help='(Optional) Update by replacing existing metadata to a deposit') # noqa +@click.option('--url', default='https://deposit.softwareheritage.org/1', + help="(Optional) Deposit server api endpoint. By default, https://deposit.softwareheritage.org/1") # noqa +@click.option('--status/--no-status', default=False, + help="(Optional) Deposit's status") +@click.option('--verbose/--no-verbose', default=False, + help='Verbose mode') +@click.option('--name', + help='Software name') +@click.option('--author', multiple=True, + help='Software author(s), this can be repeated as many times' + ' as there are authors') +@click.pass_context +def deposit(ctx, + username, password, archive=None, metadata=None, + archive_deposit=False, metadata_deposit=False, + collection=None, slug=None, partial=False, deposit_id=None, + replace=False, status=False, + url='https://deposit.softwareheritage.org/1', + verbose=False, name=None, author=None): + """Software Heritage Public Deposit Client + + Create/Update deposit through the command line or access its + status. + +More documentation can be found at +https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html. + + """ + config = {} + + try: + logger.debug('Parsing cli options') + config = client_command_parse_input( + username, password, archive, metadata, archive_deposit, + metadata_deposit, collection, slug, partial, deposit_id, + replace, url, status, name, author) + except InputError as e: + msg = 'Problem during parsing options: %s' % e + r = { + 'error': msg, + } + logger.info(r) + return 1 + + try: + if verbose: + logger.info("Parsed configuration: %s" % ( + config, )) + + deposit_id = config['deposit_id'] + + if status and deposit_id: + r = deposit_status(config, logger) + elif not status and deposit_id: + r = deposit_update(config, logger) + elif not status and not deposit_id: + r = deposit_create(config, logger) + + logger.info(r) + + finally: + _cleanup_tempfile(config) diff --git a/swh/deposit/client/__init__.py b/swh/deposit/client/__init__.py index a1296b10..6f0d2e19 100644 --- a/swh/deposit/client/__init__.py +++ b/swh/deposit/client/__init__.py @@ -1,574 +1,577 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """Module in charge of defining an swh-deposit client """ import hashlib import os import requests import xmltodict +import logging from abc import ABCMeta, abstractmethod from swh.core.config import SWHConfig +logger = logging.getLogger(__name__) + + def _parse(stream, encoding='utf-8'): """Given a xml stream, parse the result. Args: stream (bytes/text): The stream to parse encoding (str): The encoding to use if to decode the bytes stream Returns: A dict of values corresponding to the parsed xml """ if isinstance(stream, bytes): stream = stream.decode(encoding) data = xmltodict.parse(stream, encoding=encoding, process_namespaces=False) if 'entry' in data: data = data['entry'] return dict(data) def _parse_with_filter(stream, encoding='utf-8', keys=[]): """Given a xml stream, parse the result and filter with keys. Args: stream (bytes/text): The stream to parse encoding (str): The encoding to use if to decode the bytes stream keys ([str]): Keys to filter the parsed result Returns: A dict of values corresponding to the parsed xml filtered by the keys provided. """ data = _parse(stream, encoding=encoding) m = {} for key in keys: m[key] = data.get(key) return m class BaseApiDepositClient(SWHConfig): """Deposit client base class """ CONFIG_BASE_FILENAME = 'deposit/client' DEFAULT_CONFIG = { 'url': ('str', 'http://localhost:5006'), 'auth': ('dict', {}), # with optional 'username'/'password' keys } def __init__(self, config=None, _client=requests): super().__init__() if config is None: self.config = super().parse_config_file() else: self.config = config self._client = _client self.base_url = self.config['url'] auth = self.config['auth'] if auth == {}: self.auth = None else: self.auth = (auth['username'], auth['password']) def do(self, method, url, *args, **kwargs): """Internal method to deal with requests, possibly with basic http authentication. Args: method (str): supported http methods as in self._methods' keys Returns: The request's execution """ if hasattr(self._client, method): method_fn = getattr(self._client, method) else: raise ValueError('Development error, unsupported method %s' % ( method)) if self.auth: kwargs['auth'] = self.auth full_url = '%s%s' % (self.base_url.rstrip('/'), url) return method_fn(full_url, *args, **kwargs) class PrivateApiDepositClient(BaseApiDepositClient): """Private API deposit client to: - read a given deposit's archive(s) - read a given deposit's metadata - update a given deposit's status """ - def archive_get(self, archive_update_url, archive_path, log=None): + def archive_get(self, archive_update_url, archive): """Retrieve the archive from the deposit to a local directory. Args: archive_update_url (str): The full deposit archive(s)'s raw content to retrieve locally - archive_path (str): the local archive's path where to store + archive (str): the local archive's path where to store the raw content Returns: The archive path to the local archive to load. Or None if any problem arose. """ r = self.do('get', archive_update_url, stream=True) if r.ok: - with open(archive_path, 'wb') as f: + with open(archive, 'wb') as f: for chunk in r.iter_content(): f.write(chunk) - return archive_path + return archive msg = 'Problem when retrieving deposit archive at %s' % ( archive_update_url, ) - if log: - log.error(msg) + logger.error(msg) raise ValueError(msg) - def metadata_get(self, metadata_url, log=None): + def metadata_get(self, metadata_url): """Retrieve the metadata information on a given deposit. Args: metadata_url (str): The full deposit metadata url to retrieve locally Returns: The dictionary of metadata for that deposit or None if any problem arose. """ r = self.do('get', metadata_url) if r.ok: return r.json() msg = 'Problem when retrieving metadata at %s' % metadata_url - if log: - log.error(msg) + logger.error(msg) raise ValueError(msg) def status_update(self, update_status_url, status, revision_id=None, directory_id=None, origin_url=None): """Update the deposit's status. Args: update_status_url (str): the full deposit's archive status (str): The status to update the deposit with revision_id (str/None): the revision's identifier to update to directory_id (str/None): the directory's identifier to update to origin_url (str/None): deposit's associated origin url """ payload = {'status': status} if revision_id: payload['revision_id'] = revision_id if directory_id: payload['directory_id'] = directory_id if origin_url: payload['origin_url'] = origin_url self.do('put', update_status_url, json=payload) - def check(self, check_url, log=None): + def check(self, check_url): """Check the deposit's associated data (metadata, archive(s)) Args: check_url (str): the full deposit's check url """ r = self.do('get', check_url) if r.ok: data = r.json() return data['status'] msg = 'Problem when checking deposit %s' % check_url - if log: - log.error(msg) + logger.error(msg) raise ValueError(msg) class BaseDepositClient(BaseApiDepositClient, metaclass=ABCMeta): """Base Deposit client to access the public api. """ def __init__(self, config, error_msg=None, empty_result={}): super().__init__(config) self.error_msg = error_msg self.empty_result = empty_result @abstractmethod def compute_url(self, *args, **kwargs): """Compute api url endpoint to query.""" pass @abstractmethod def compute_method(self, *args, **kwargs): """Http method to use on the url""" pass @abstractmethod def parse_result_ok(self, xml_content): """Given an xml result from the api endpoint, parse it and returns a dict. """ pass def compute_information(self, *args, **kwargs): """Compute some more information given the inputs (e.g http headers, ...) """ return {} def parse_result_error(self, xml_content): """Given an error response in xml, parse it into a dict. Returns: dict with following keys: 'error': The error message 'detail': Some more detail about the error if any """ return _parse_with_filter(xml_content, keys=['summary', 'detail']) def do_execute(self, method, url, info): """Execute the http query to url using method and info information. By default, execute a simple query to url with the http method. Override this in daughter class to improve the default behavior if needed. """ return self.do(method, url) def execute(self, *args, **kwargs): """Main endpoint to prepare and execute the http query to the api. """ url = self.compute_url(*args, **kwargs) method = self.compute_method(*args, **kwargs) info = self.compute_information(*args, **kwargs) try: r = self.do_execute(method, url, info) except Exception as e: msg = self.error_msg % (url, e) r = self.empty_result r.update({ 'error': msg, }) return r else: if r.ok: if int(r.status_code) == 204: # 204 returns no body return {'status': r.status_code} else: return self.parse_result_ok(r.text) else: error = self.parse_result_error(r.text) empty = self.empty_result error.update(empty) error.update({ 'status': r.status_code, }) return error class ServiceDocumentDepositClient(BaseDepositClient): """Service Document information retrieval. """ def __init__(self, config): super().__init__(config, error_msg='Service document failure at %s: %s', empty_result={'collection': None}) def compute_url(self, *args, **kwargs): return '/servicedocument/' def compute_method(self, *args, **kwargs): return 'get' def parse_result_ok(self, xml_content): """Parse service document's success response. """ - return _parse_with_filter(xml_content, keys=['collection']) + return _parse(xml_content) class StatusDepositClient(BaseDepositClient): """Status information on a deposit. """ def __init__(self, config): super().__init__(config, error_msg='Status check failure at %s: %s', empty_result={ 'deposit_status': None, 'deposit_status_detail': None, 'deposit_swh_id': None, }) def compute_url(self, collection, deposit_id): return '/%s/%s/status/' % (collection, deposit_id) def compute_method(self, *args, **kwargs): return 'get' def parse_result_ok(self, xml_content): """Given an xml content as string, returns a deposit dict. """ return _parse_with_filter(xml_content, keys=[ 'deposit_id', 'deposit_status', 'deposit_status_detail', 'deposit_swh_id', 'deposit_swh_id_context', 'deposit_swh_anchor_id', - 'deposit_swh_anchor_id_context']) + 'deposit_swh_anchor_id_context', + 'deposit_external_id', + ]) class BaseCreateDepositClient(BaseDepositClient): """Deposit client base class to post new deposit. """ def __init__(self, config): super().__init__(config, error_msg='Post Deposit failure at %s: %s', empty_result={ 'deposit_id': None, 'deposit_status': None, }) def compute_url(self, collection, *args, **kwargs): return '/%s/' % collection def compute_method(self, *args, **kwargs): return 'post' def parse_result_ok(self, xml_content): """Given an xml content as string, returns a deposit dict. """ return _parse_with_filter(xml_content, keys=['deposit_id', 'deposit_status', 'deposit_status_detail', 'deposit_date']) def _compute_information(self, collection, filepath, in_progress, slug, is_archive=True): """Given a filepath, compute necessary information on that file. Args: filepath (str): Path to a file is_archive (bool): is it an archive or not? Returns: dict with keys: 'content-type': content type associated 'md5sum': md5 sum 'filename': filename """ filename = os.path.basename(filepath) if is_archive: md5sum = hashlib.md5(open(filepath, 'rb').read()).hexdigest() extension = filename.split('.')[-1] if 'zip' in extension: content_type = 'application/zip' else: content_type = 'application/x-tar' else: content_type = None md5sum = None return { 'slug': slug, 'in_progress': in_progress, 'content-type': content_type, 'md5sum': md5sum, 'filename': filename, 'filepath': filepath, } def compute_information(self, collection, filepath, in_progress, slug, is_archive=True, **kwargs): info = self._compute_information(collection, filepath, in_progress, slug, is_archive=is_archive) info['headers'] = self.compute_headers(info) return info def do_execute(self, method, url, info): with open(info['filepath'], 'rb') as f: return self.do(method, url, data=f, headers=info['headers']) class CreateArchiveDepositClient(BaseCreateDepositClient): """Post an archive (binary) deposit client.""" def compute_headers(self, info): return { 'SLUG': info['slug'], 'CONTENT_MD5': info['md5sum'], 'IN-PROGRESS': str(info['in_progress']), 'CONTENT-TYPE': info['content-type'], 'CONTENT-DISPOSITION': 'attachment; filename=%s' % ( info['filename'], ), } class UpdateArchiveDepositClient(CreateArchiveDepositClient): """Update (add/replace) an archive (binary) deposit client.""" def compute_url(self, collection, *args, deposit_id=None, **kwargs): return '/%s/%s/media/' % (collection, deposit_id) def compute_method(self, *args, replace=False, **kwargs): return 'put' if replace else 'post' class CreateMetadataDepositClient(BaseCreateDepositClient): """Post a metadata deposit client.""" def compute_headers(self, info): return { 'SLUG': info['slug'], 'IN-PROGRESS': str(info['in_progress']), 'CONTENT-TYPE': 'application/atom+xml;type=entry', } class UpdateMetadataDepositClient(CreateMetadataDepositClient): """Update (add/replace) a metadata deposit client.""" def compute_url(self, collection, *args, deposit_id=None, **kwargs): return '/%s/%s/metadata/' % (collection, deposit_id) def compute_method(self, *args, replace=False, **kwargs): return 'put' if replace else 'post' class CreateMultipartDepositClient(BaseCreateDepositClient): """Create a multipart deposit client.""" def _multipart_info(self, info, info_meta): files = [ ('file', (info['filename'], open(info['filepath'], 'rb'), info['content-type'])), ('atom', (info_meta['filename'], open(info_meta['filepath'], 'rb'), 'application/atom+xml')), ] headers = { 'SLUG': info['slug'], 'CONTENT_MD5': info['md5sum'], 'IN-PROGRESS': str(info['in_progress']), } return files, headers - def compute_information(self, collection, archive_path, metadata_path, + def compute_information(self, collection, archive, metadata, in_progress, slug, **kwargs): info = self._compute_information( - collection, archive_path, in_progress, slug) + collection, archive, in_progress, slug) info_meta = self._compute_information( - collection, metadata_path, in_progress, slug, is_archive=False) + collection, metadata, in_progress, slug, is_archive=False) files, headers = self._multipart_info(info, info_meta) return {'files': files, 'headers': headers} def do_execute(self, method, url, info): return self.do( method, url, files=info['files'], headers=info['headers']) class UpdateMultipartDepositClient(CreateMultipartDepositClient): """Update a multipart deposit client.""" def compute_url(self, collection, *args, deposit_id=None, **kwargs): return '/%s/%s/metadata/' % (collection, deposit_id) def compute_method(self, *args, replace=False, **kwargs): return 'put' if replace else 'post' class PublicApiDepositClient(BaseApiDepositClient): """Public api deposit client.""" - def service_document(self, log=None): + def service_document(self): """Retrieve service document endpoint's information.""" return ServiceDocumentDepositClient(self.config).execute() - def deposit_status(self, collection, deposit_id, log=None): + def deposit_status(self, collection, deposit_id): """Retrieve status information on a deposit.""" return StatusDepositClient(self.config).execute( collection, deposit_id) - def deposit_create(self, collection, slug, archive_path=None, - metadata_path=None, in_progress=False, log=None): + def deposit_create(self, collection, slug, archive=None, + metadata=None, in_progress=False): """Create a new deposit (archive, metadata, both as multipart).""" - if archive_path and not metadata_path: + if archive and not metadata: return CreateArchiveDepositClient(self.config).execute( - collection, archive_path, in_progress, slug) - elif not archive_path and metadata_path: + collection, archive, in_progress, slug) + elif not archive and metadata: return CreateMetadataDepositClient(self.config).execute( - collection, metadata_path, in_progress, slug, + collection, metadata, in_progress, slug, is_archive=False) else: return CreateMultipartDepositClient(self.config).execute( - collection, archive_path, metadata_path, in_progress, + collection, archive, metadata, in_progress, slug) - def deposit_update(self, collection, deposit_id, slug, archive_path=None, - metadata_path=None, in_progress=False, - replace=False, log=None): + def deposit_update(self, collection, deposit_id, slug, archive=None, + metadata=None, in_progress=False, + replace=False): """Update (add/replace) existing deposit (archive, metadata, both).""" - r = self.deposit_status(collection, deposit_id, log=log) + r = self.deposit_status(collection, deposit_id) if 'error' in r: return r status = r['deposit_status'] if status != 'partial': return { 'error': "You can only act on deposit with status 'partial'", 'detail': "The deposit %s has status '%s'" % ( deposit_id, status), 'deposit_status': status, 'deposit_id': deposit_id, } - if archive_path and not metadata_path: + if archive and not metadata: r = UpdateArchiveDepositClient(self.config).execute( - collection, archive_path, in_progress, slug, - deposit_id=deposit_id, replace=replace, log=log) - elif not archive_path and metadata_path: + collection, archive, in_progress, slug, + deposit_id=deposit_id, replace=replace) + elif not archive and metadata: r = UpdateMetadataDepositClient(self.config).execute( - collection, metadata_path, in_progress, slug, - deposit_id=deposit_id, replace=replace, log=log) + collection, metadata, in_progress, slug, + deposit_id=deposit_id, replace=replace) else: r = UpdateMultipartDepositClient(self.config).execute( - collection, archive_path, metadata_path, in_progress, - slug, deposit_id=deposit_id, replace=replace, log=log) + collection, archive, metadata, in_progress, + slug, deposit_id=deposit_id, replace=replace) if 'error' in r: return r - return self.deposit_status(collection, deposit_id, log=log) + return self.deposit_status(collection, deposit_id) diff --git a/swh/deposit/client/cli.py b/swh/deposit/client/cli.py deleted file mode 100755 index ee2ad582..00000000 --- a/swh/deposit/client/cli.py +++ /dev/null @@ -1,296 +0,0 @@ -# Copyright (C) 2018 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - - -"""Script to demonstrate software deposit scenario to -https://deposit.sofwareheritage.org. - -Use: python3 -m swh.deposit.client.cli --help - -Documentation: https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html # noqa - -""" - -import os -import click -import logging -import uuid - - -from . import PublicApiDepositClient - - -class InputError(ValueError): - """Input script error - - """ - pass - - -def generate_slug(prefix='swh-sample'): - """Generate a slug (sample purposes). - - """ - return '%s-%s' % (prefix, uuid.uuid4()) - - -def parse_cli_options(username, password, archive, metadata, - archive_deposit, metadata_deposit, - collection, slug, partial, deposit_id, replace, - url, status): - """Parse the cli options and make sure the combination is acceptable*. - If not, an InputError exception is raised explaining the issue. - - By acceptable, we mean: - - - A multipart deposit (create or update) needs both an - existing software archive and an existing metadata file - - - A binary deposit (create/update) needs an existing - software archive - - - A metadata deposit (create/update) needs an existing - metadata file - - - A deposit update needs a deposit_id to be provided - - This won't prevent all failure cases though. The remaining - errors are already dealt with the underlying api client. - - Raises: - InputError explaining the issue - - Returns: - dict with the following keys: - - 'archive': the software archive to deposit - 'username': username - 'password': associated password - 'metadata': the metadata file to deposit - 'collection': the username's associated client - 'slug': the slug or external id identifying the deposit to make - 'partial': if the deposit is partial or not - 'client': instantiated class - 'url': deposit's server main entry point - 'deposit_type': deposit's type (binary, multipart, metadata) - 'deposit_id': optional deposit identifier - - """ - if status and not deposit_id: - raise InputError("Deposit id must be provided for status check") - - if status and deposit_id: # status is higher priority over deposit - archive_deposit = False - metadata_deposit = False - archive = None - metadata = None - - if archive_deposit and metadata_deposit: - # too many flags use, remove redundant ones (-> multipart deposit) - archive_deposit = False - metadata_deposit = False - - if archive and not os.path.exists(archive): - raise InputError('Software Archive %s must exist!' % archive) - - if archive and not metadata: - metadata = '%s.metadata.xml' % archive - - if metadata_deposit: - archive = None - - if archive_deposit: - metadata = None - - if metadata_deposit and not metadata: - raise InputError( - "Metadata deposit filepath must be provided for metadata deposit") - - if metadata and not os.path.exists(metadata): - raise InputError('Software Archive metadata %s must exist!' % metadata) - - if not status and not archive and not metadata: - raise InputError( - 'Please provide an actionable command. See --help for more ' - 'information.') - - if replace and not deposit_id: - raise InputError( - 'To update an existing deposit, you must provide its id') - - client = PublicApiDepositClient({ - 'url': url, - 'auth': { - 'username': username, - 'password': password - }, - }) - - if not collection: - # retrieve user's collection - sd_content = client.service_document() - if 'error' in sd_content: - raise InputError('Service document retrieval: %s' % ( - sd_content['error'], )) - collection = sd_content['collection'] - - if not slug: - # generate slug - slug = generate_slug() - - return { - 'archive': archive, - 'username': username, - 'password': password, - 'metadata': metadata, - 'collection': collection, - 'slug': slug, - 'partial': partial, - 'client': client, - 'url': url, - 'deposit_id': deposit_id, - 'replace': replace, - } - - -def deposit_status(config, dry_run, log): - log.debug('Status deposit') - client = config['client'] - collection = config['collection'] - deposit_id = config['deposit_id'] - if not dry_run: - r = client.deposit_status(collection, deposit_id, log) - return r - return {} - - -def deposit_create(config, dry_run, log): - """Delegate the actual deposit to the deposit client. - - """ - log.debug('Create deposit') - - client = config['client'] - collection = config['collection'] - archive_path = config['archive'] - metadata_path = config['metadata'] - slug = config['slug'] - in_progress = config['partial'] - if not dry_run: - r = client.deposit_create(collection, slug, archive_path, - metadata_path, in_progress, log) - return r - return {} - - -def deposit_update(config, dry_run, log): - """Delegate the actual deposit to the deposit client. - - """ - log.debug('Update deposit') - - client = config['client'] - collection = config['collection'] - deposit_id = config['deposit_id'] - archive_path = config['archive'] - metadata_path = config['metadata'] - slug = config['slug'] - in_progress = config['partial'] - replace = config['replace'] - if not dry_run: - r = client.deposit_update(collection, deposit_id, slug, archive_path, - metadata_path, in_progress, replace, log) - return r - return {} - - -@click.command() -@click.option('--username', required=1, - help="(Mandatory) User's name") -@click.option('--password', required=1, - help="(Mandatory) User's associated password") -@click.option('--archive', - help='(Optional) Software archive to deposit') -@click.option('--metadata', - help="(Optional) Path to xml metadata file. If not provided, this will use a file named .metadata.xml") # noqa -@click.option('--archive-deposit/--no-archive-deposit', default=False, - help='(Optional) Software archive only deposit') -@click.option('--metadata-deposit/--no-metadata-deposit', default=False, - help='(Optional) Metadata only deposit') -@click.option('--collection', - help="(Optional) User's collection. If not provided, this will be fetched.") # noqa -@click.option('--slug', - help="""(Optional) External system information identifier. If not provided, it will be generated""") # noqa -@click.option('--partial/--no-partial', default=False, - help='(Optional) The deposit will be partial, other deposits will have to take place to finalize it.') # noqa -@click.option('--deposit-id', default=None, - help='(Optional) Update an existing partial deposit with its identifier') # noqa -@click.option('--replace/--no-replace', default=False, - help='(Optional) Update by replacing existing metadata to a deposit') # noqa -@click.option('--url', default='https://deposit.softwareheritage.org/1', - help="(Optional) Deposit server api endpoint. By default, https://deposit.softwareheritage.org/1") # noqa -@click.option('--status/--no-status', default=False, - help="(Optional) Deposit's status") -@click.option('--dry-run/--no-dry-run', default=False, - help='(Optional) No-op deposit') -@click.option('--verbose/--no-verbose', default=False, - help='Verbose mode') -def main(username, password, archive=None, metadata=None, - archive_deposit=False, metadata_deposit=False, - collection=None, slug=None, partial=False, deposit_id=None, - replace=False, status=False, - url='https://deposit.softwareheritage.org/1', dry_run=True, - verbose=False): - """Software Heritage Deposit client - Create (or update partial) -deposit through the command line. - -More documentation can be found at -https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html. - - """ - - log = logging.getLogger('swh-deposit') - log.addHandler(logging.StreamHandler()) - _loglevel = logging.DEBUG if verbose else logging.INFO - log.setLevel(_loglevel) - - if dry_run: - log.info("**DRY RUN**") - - config = {} - - try: - log.debug('Parsing cli options') - config = parse_cli_options( - username, password, archive, metadata, archive_deposit, - metadata_deposit, collection, slug, partial, deposit_id, - replace, url, status) - - except InputError as e: - msg = 'Problem during parsing options: %s' % e - r = { - 'error': msg, - } - log.info(r) - return 1 - - if verbose: - log.info("Parsed configuration: %s" % ( - config, )) - - deposit_id = config['deposit_id'] - - if status and deposit_id: - r = deposit_status(config, dry_run, log) - elif not status and deposit_id: - r = deposit_update(config, dry_run, log) - elif not status and not deposit_id: - r = deposit_create(config, dry_run, log) - - log.info(r) - - -if __name__ == '__main__': - main() diff --git a/swh/deposit/config.py b/swh/deposit/config.py index 1800d67c..eab7deea 100644 --- a/swh/deposit/config.py +++ b/swh/deposit/config.py @@ -1,101 +1,109 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os import logging from swh.core.config import SWHConfig from swh.scheduler import get_scheduler # IRIs (Internationalized Resource identifier) sword 2.0 specified EDIT_SE_IRI = 'edit_se_iri' EM_IRI = 'em_iri' CONT_FILE_IRI = 'cont_file_iri' SD_IRI = 'servicedocument' COL_IRI = 'upload' STATE_IRI = 'state_iri' PRIVATE_GET_RAW_CONTENT = 'private-download' PRIVATE_CHECK_DEPOSIT = 'check-deposit' PRIVATE_PUT_DEPOSIT = 'private-update' PRIVATE_GET_DEPOSIT_METADATA = 'private-read' PRIVATE_LIST_DEPOSITS = 'private-deposit-list' ARCHIVE_KEY = 'archive' METADATA_KEY = 'metadata' RAW_METADATA_KEY = 'raw-metadata' ARCHIVE_TYPE = 'archive' METADATA_TYPE = 'metadata' + AUTHORIZED_PLATFORMS = ['development', 'production', 'testing'] DEPOSIT_STATUS_REJECTED = 'rejected' DEPOSIT_STATUS_PARTIAL = 'partial' DEPOSIT_STATUS_DEPOSITED = 'deposited' DEPOSIT_STATUS_VERIFIED = 'verified' DEPOSIT_STATUS_LOAD_SUCCESS = 'done' DEPOSIT_STATUS_LOAD_FAILURE = 'failed' # Revision author for deposit SWH_PERSON = { 'name': 'Software Heritage', 'fullname': 'Software Heritage', 'email': 'robot@softwareheritage.org' } -def setup_django_for(platform): +def setup_django_for(platform=None, config_file=None): """Setup function for command line tools (swh.deposit.create_user) to initialize the needed db access. Note: Do not import any django related module prior to this function call. Otherwise, this will raise an django.core.exceptions.ImproperlyConfigured error message. Args: platform (str): the platform the scheduling is running + config_file (str): Extra configuration file (typically for the + production platform) Raises: ValueError in case of wrong platform inputs. """ - if platform not in AUTHORIZED_PLATFORMS: - raise ValueError('Platform should be one of %s' % AUTHORIZED_PLATFORMS) - - os.environ.setdefault('DJANGO_SETTINGS_MODULE', - 'swh.deposit.settings.%s' % platform) + if platform is not None: + if platform not in AUTHORIZED_PLATFORMS: + raise ValueError('Platform should be one of %s' % + AUTHORIZED_PLATFORMS) + if 'DJANGO_SETTINGS_MODULE' not in os.environ: + os.environ['DJANGO_SETTINGS_MODULE'] = ( + 'swh.deposit.settings.%s' % platform) + + if config_file: + os.environ.setdefault('SWH_CONFIG_FILENAME', config_file) import django django.setup() class SWHDefaultConfig(SWHConfig): """Mixin intended to enrich views with SWH configuration. """ CONFIG_BASE_FILENAME = 'deposit/server' DEFAULT_CONFIG = { 'max_upload_size': ('int', 209715200), 'checks': ('bool', True), 'scheduler': ('dict', { 'cls': 'remote', 'args': { 'url': 'http://localhost:5008/' } }) } ADDITIONAL_CONFIG = {} def __init__(self, **config): super().__init__() self.config = self.parse_config_file( additional_configs=[self.ADDITIONAL_CONFIG]) self.config.update(config) self.log = logging.getLogger('swh.deposit') if self.config['checks']: self.scheduler = get_scheduler(**self.config['scheduler']) diff --git a/swh/deposit/create_user.py b/swh/deposit/create_user.py deleted file mode 100755 index 371668d2..00000000 --- a/swh/deposit/create_user.py +++ /dev/null @@ -1,56 +0,0 @@ -#!/usr/bin/env python3 - -# Copyright (C) 2017 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -import click - -from swh.deposit.config import setup_django_for - - -@click.command( - help='Create a user with some needed information (password, collection)') -@click.option('--platform', default='development', - help='development or production platform') -@click.option('--username', required=True, help="User's name") -@click.option('--password', required=True, help="Desired user's password.") -@click.option('--firstname', default='', help="User's first name") -@click.option('--lastname', default='', help="User's last name") -@click.option('--email', default='', help="User's email") -@click.option('--collection', help="User's collection") -def main(platform, username, password, firstname, lastname, email, collection): - setup_django_for(platform) - - from swh.deposit.models import DepositClient, DepositCollection - - try: - collection = DepositCollection.objects.get(name=collection) - except DepositCollection.DoesNotExist: - raise ValueError( - 'Collection %s does not exist, skipping' % collection) - - # user create/update - try: - user = DepositClient.objects.get(username=username) - print('User %s exists, updating information.' % user) - user.set_password(password) - except DepositClient.DoesNotExist: - print('Create new user %s' % username) - user = DepositClient.objects.create_user( - username=username, - password=password) - - user.collections = [collection.id] - user.first_name = firstname - user.last_name = lastname - user.email = email - user.is_active = True - user.save() - - print('Information registered for user %s' % user) - - -if __name__ == '__main__': - main() diff --git a/swh/deposit/errors.py b/swh/deposit/errors.py index f81601cc..bd51a451 100644 --- a/swh/deposit/errors.py +++ b/swh/deposit/errors.py @@ -1,134 +1,147 @@ -# Copyright (C) 2017 The Software Heritage developers +# Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """Module in charge of providing the standard sword errors """ from rest_framework import status from django.shortcuts import render FORBIDDEN = 'forbidden' UNAUTHORIZED = 'unauthorized' NOT_FOUND = 'unknown' BAD_REQUEST = 'bad-request' ERROR_CONTENT = 'error-content' CHECKSUM_MISMATCH = 'checksum-mismatch' MEDIATION_NOT_ALLOWED = 'mediation-not-allowed' METHOD_NOT_ALLOWED = 'method-not-allowed' MAX_UPLOAD_SIZE_EXCEEDED = 'max_upload_size_exceeded' +PARSING_ERROR = 'parsing-error' + + +class ParserError(ValueError): + """Specific parsing error detected when parsing the xml metadata input + + """ + pass ERRORS = { FORBIDDEN: { 'status': status.HTTP_403_FORBIDDEN, 'iri': 'http://purl.org/net/sword/error/ErrorForbidden', 'tag': 'sword:ErrorForbidden', }, UNAUTHORIZED: { 'status': status.HTTP_401_UNAUTHORIZED, 'iri': 'http://purl.org/net/sword/error/ErrorUnauthorized', 'tag': 'sword:ErrorUnauthorized', }, NOT_FOUND: { 'status': status.HTTP_404_NOT_FOUND, 'iri': 'http://purl.org/net/sword/error/ErrorNotFound', 'tag': 'sword:ErrorNotFound', }, ERROR_CONTENT: { 'status': status.HTTP_415_UNSUPPORTED_MEDIA_TYPE, 'iri': 'http://purl.org/net/sword/error/ErrorContent', 'tag': 'sword:ErrorContent', }, CHECKSUM_MISMATCH: { 'status': status.HTTP_412_PRECONDITION_FAILED, 'iri': 'http://purl.org/net/sword/error/ErrorChecksumMismatch', 'tag': 'sword:ErrorChecksumMismatch', }, BAD_REQUEST: { 'status': status.HTTP_400_BAD_REQUEST, 'iri': 'http://purl.org/net/sword/error/ErrorBadRequest', 'tag': 'sword:ErrorBadRequest', }, + PARSING_ERROR: { + 'status': status.HTTP_400_BAD_REQUEST, + 'iri': 'http://purl.org/net/sword/error/ErrorBadRequest', + 'tag': 'sword:ErrorBadRequest', + }, MEDIATION_NOT_ALLOWED: { 'status': status.HTTP_412_PRECONDITION_FAILED, 'iri': 'http://purl.org/net/sword/error/MediationNotAllowed', 'tag': 'sword:MediationNotAllowed', }, METHOD_NOT_ALLOWED: { 'status': status.HTTP_405_METHOD_NOT_ALLOWED, 'iri': 'http://purl.org/net/sword/error/MethodNotAllowed', 'tag': 'sword:MethodNotAllowed', }, MAX_UPLOAD_SIZE_EXCEEDED: { 'status': status.HTTP_413_REQUEST_ENTITY_TOO_LARGE, 'iri': 'http://purl.org/net/sword/error/MaxUploadSizeExceeded', 'tag': 'sword:MaxUploadSizeExceeded', }, } def make_error_dict(key, summary=None, verbose_description=None): """Utility function to factorize error message dictionary. Args: key (str): Error status key referenced in swh.deposit.errors module summary (str/None): Error message clarifying the status verbose_description (str/None): A more verbose description or work around a potential problem. Returns: Dictionary with key 'error' detailing the 'status' and associated 'message' """ return { 'error': { 'key': key, 'summary': summary, 'verboseDescription': verbose_description, }, } def make_error_response_from_dict(req, error): """Utility function to return an http response with error detail. Args: req (Request): original request error (dict): Error described as dict, typically generated from the make_error_dict function. Returns: HttpResponse with detailed error. """ error_information = ERRORS[error['key']] context = error context.update(error_information) return render(req, 'deposit/error.xml', context=error, content_type='application/xml', status=error_information['status']) def make_error_response(req, key, summary=None, verbose_description=None): """Utility function to create an http response with detailed error. Args: req (Request): original request key (str): Error status key referenced in swh.deposit.errors module summary (str): Error message clarifying the status verbose_description (str / None): A more verbose description or work around a potential problem. Returns: Dictionary with key 'error' detailing the 'status' and associated 'message' """ error = make_error_dict(key, summary, verbose_description) return make_error_response_from_dict(req, error['error']) diff --git a/swh/deposit/loader/loader.py b/swh/deposit/loader/loader.py index 95ca7a43..a7ff91f7 100644 --- a/swh/deposit/loader/loader.py +++ b/swh/deposit/loader/loader.py @@ -1,141 +1,141 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os import tempfile from swh.model import hashutil from swh.loader.tar import loader from swh.loader.core.loader import BufferedLoader from ..client import PrivateApiDepositClient class DepositLoader(loader.LegacyLocalTarLoader): """Deposit loader implementation. This is a subclass of the :class:TarLoader as the main goal of this class is to first retrieve the deposit's tarball contents as one and its associated metadata. Then provide said tarball to be loaded by the TarLoader. This will: - retrieves the deposit's archive locally - provide the archive to be loaded by the tar loader - clean up the temporary location used to retrieve the archive locally - update the deposit's status accordingly """ CONFIG_BASE_FILENAME = 'loader/deposit' ADDITIONAL_CONFIG = { 'extraction_dir': ('str', '/tmp/swh.deposit.loader/'), } def __init__(self, client=None): super().__init__( logging_class='swh.deposit.loader.loader.DepositLoader') self.deposit_client = client if client else PrivateApiDepositClient() def load(self, *, archive_url, deposit_meta_url, deposit_update_url): return BufferedLoader.load( self, archive_url=archive_url, deposit_meta_url=deposit_meta_url, deposit_update_url=deposit_update_url) def prepare_origin_visit(self, *, deposit_meta_url, **kwargs): self.metadata = self.deposit_client.metadata_get( - deposit_meta_url, log=self.log) + deposit_meta_url) self.origin = self.metadata['origin'] self.visit_date = None def prepare(self, *, archive_url, deposit_meta_url, deposit_update_url): """Prepare the loading by first retrieving the deposit's raw archive content. """ self.deposit_update_url = deposit_update_url self.deposit_client.status_update(deposit_update_url, 'loading') temporary_directory = tempfile.TemporaryDirectory() self.temporary_directory = temporary_directory archive_path = os.path.join(temporary_directory.name, 'archive.zip') archive = self.deposit_client.archive_get( - archive_url, archive_path, log=self.log) + archive_url, archive_path) metadata = self.metadata revision = metadata['revision'] branch_name = metadata['branch_name'] self.origin_metadata = metadata['origin_metadata'] self.prepare_metadata() super().prepare(tar_path=archive, origin=self.origin, revision=revision, branch_name=branch_name) def store_metadata(self): """Storing the origin_metadata during the load processus. Provider_id and tool_id are resolved during the prepare() method. """ origin_id = self.origin_id visit_date = self.visit_date provider_id = self.origin_metadata['provider']['provider_id'] tool_id = self.origin_metadata['tool']['tool_id'] metadata = self.origin_metadata['metadata'] try: self.send_origin_metadata(origin_id, visit_date, provider_id, tool_id, metadata) except Exception: self.log.exception('Problem when storing origin_metadata') raise def post_load(self, success=True): """Updating the deposit's status according to its loading status. If not successful, we update its status to 'failed'. Otherwise, we update its status to 'done' and pass along its associated revision. """ try: if not success: self.deposit_client.status_update(self.deposit_update_url, status='failed') return revisions = self.objects['revision'] # Retrieve the revision [rev_id] = revisions.keys() rev = revisions[rev_id] if rev_id: rev_id = hashutil.hash_to_hex(rev_id) dir_id = rev['directory'] if dir_id: dir_id = hashutil.hash_to_hex(dir_id) # update the deposit's status to success with its # revision-id and directory-id self.deposit_client.status_update( self.deposit_update_url, status='done', revision_id=rev_id, directory_id=dir_id, origin_url=self.origin['url']) except Exception: self.log.exception( 'Problem when trying to update the deposit\'s status') def cleanup(self): """Clean up temporary directory where we retrieved the tarball. """ super().cleanup() self.temporary_directory.cleanup() diff --git a/swh/manage.py b/swh/deposit/manage.py similarity index 96% rename from swh/manage.py rename to swh/deposit/manage.py index 5d758e80..2be65708 100755 --- a/swh/manage.py +++ b/swh/deposit/manage.py @@ -1,53 +1,53 @@ #!/usr/bin/env python3 -# Copyright (C) 2017 The Software Heritage developers +# Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os import sys from swh.core import config DEFAULT_CONFIG = { 'port': ('int', 5006), 'host': ('str', '127.0.0.1'), } if __name__ == "__main__": settings_file = 'development' if sys.argv[1] == 'runserver': # override the default host:port for the 'runserver' task conf = config.load_named_config('deposit/server', default_conf=DEFAULT_CONFIG) extra_cmd = ['%s:%s' % (conf['host'], conf['port'])] cmd = sys.argv + extra_cmd elif sys.argv[1] == 'test': # override the default settings file to read in testing mode settings_file = 'testing' cmd = sys.argv else: # otherwise, do nothing cmd = sys.argv os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'swh.deposit.settings.%s' % settings_file) try: from django.core.management import execute_from_command_line except ImportError: # The above import may fail for some other reason. Ensure that the # issue is really that Django is missing to avoid masking other # exceptions on Python 2. try: import django # noqa except ImportError: raise ImportError( "Couldn't import Django. Are you sure it's installed and " "available on your PYTHONPATH environment variable? Did you " "forget to activate a virtual environment?" ) raise execute_from_command_line(cmd) diff --git a/swh/deposit/migrations/0015_depositrequest_typemigration.py b/swh/deposit/migrations/0015_depositrequest_typemigration.py new file mode 100644 index 00000000..046c84dd --- /dev/null +++ b/swh/deposit/migrations/0015_depositrequest_typemigration.py @@ -0,0 +1,43 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.18 on 2019-04-12 16:40 +from __future__ import unicode_literals + +from django.db import migrations, models + + +def populate_deposit_type2(apps, schema_editor): + # We can't import the DepositRequest model directly as it may be a newer + # version than this migration expects. We use the historical version. + DepositRequest = apps.get_model('deposit', 'DepositRequest') + + for deposit in DepositRequest.objects.all(): + deposit.type2 = deposit.type.name + deposit.save() + + +class Migration(migrations.Migration): + + dependencies = [ + ('deposit', '0014_auto_20180720_1221'), + ] + + operations = [ + migrations.AddField( + model_name='depositrequest', + name='type2', + field=models.CharField(choices=[('archive', 'archive'), ('metadata', 'metadata')], max_length=8, null=True), + ), + migrations.RunPython(populate_deposit_type2), + migrations.RemoveField( + model_name='depositrequest', + name='type', + ), + migrations.RenameField( + model_name='depositrequest', + old_name='type2', + new_name='type', + ), + migrations.DeleteModel( + name='DepositRequestType', + ), + ] diff --git a/swh/deposit/models.py b/swh/deposit/models.py index ba6d9320..f169e447 100644 --- a/swh/deposit/models.py +++ b/swh/deposit/models.py @@ -1,221 +1,212 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information # Generated from: # cd swh_deposit && \ # python3 -m manage inspectdb from django.contrib.postgres.fields import JSONField, ArrayField from django.contrib.auth.models import User, UserManager from django.db import models from django.utils.timezone import now from .config import ( DEPOSIT_STATUS_VERIFIED, DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_PARTIAL, DEPOSIT_STATUS_LOAD_SUCCESS, DEPOSIT_STATUS_LOAD_FAILURE, - DEPOSIT_STATUS_REJECTED + DEPOSIT_STATUS_REJECTED, ARCHIVE_TYPE, METADATA_TYPE ) class Dbversion(models.Model): """Db version """ version = models.IntegerField(primary_key=True) release = models.DateTimeField(default=now, null=True) description = models.TextField(blank=True, null=True) class Meta: db_table = 'dbversion' def __str__(self): return str({ 'version': self.version, 'release': self.release, 'description': self.description }) """Possible status""" DEPOSIT_STATUS = [ (DEPOSIT_STATUS_PARTIAL, DEPOSIT_STATUS_PARTIAL), ('expired', 'expired'), (DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_DEPOSITED), (DEPOSIT_STATUS_VERIFIED, DEPOSIT_STATUS_VERIFIED), (DEPOSIT_STATUS_REJECTED, DEPOSIT_STATUS_REJECTED), ('loading', 'loading'), (DEPOSIT_STATUS_LOAD_SUCCESS, DEPOSIT_STATUS_LOAD_SUCCESS), (DEPOSIT_STATUS_LOAD_FAILURE, DEPOSIT_STATUS_LOAD_FAILURE), ] """Possible status and the detailed meaning.""" DEPOSIT_STATUS_DETAIL = { DEPOSIT_STATUS_PARTIAL: 'Deposit is partially received. To finalize it, ' 'In-Progress header should be false', 'expired': 'Deposit has been there too long and is now ' 'deemed ready to be garbage collected', DEPOSIT_STATUS_DEPOSITED: 'Deposit is ready for additional checks ' '(tarball ok, metadata, etc...)', DEPOSIT_STATUS_VERIFIED: 'Deposit is fully received, checked, and ' 'ready for loading', DEPOSIT_STATUS_REJECTED: 'Deposit failed the checks', 'loading': "Loading is ongoing on swh's side", DEPOSIT_STATUS_LOAD_SUCCESS: 'The deposit has been successfully ' 'loaded into the Software Heritage archive', DEPOSIT_STATUS_LOAD_FAILURE: 'The deposit loading into the ' 'Software Heritage archive failed', } class DepositClient(User): """Deposit client """ collections = ArrayField(models.IntegerField(), null=True) objects = UserManager() provider_url = models.TextField(null=False) domain = models.TextField(null=False) class Meta: db_table = 'deposit_client' def __str__(self): return str({ 'id': self.id, 'collections': self.collections, 'username': super().username, 'domain': self.domain, 'provider_url': self.provider_url, }) class Deposit(models.Model): """Deposit reception table """ id = models.BigAutoField(primary_key=True) # First deposit reception date reception_date = models.DateTimeField(auto_now_add=True) # Date when the deposit is deemed complete and ready for loading complete_date = models.DateTimeField(null=True) # collection concerned by the deposit collection = models.ForeignKey( 'DepositCollection', models.DO_NOTHING) # Deposit's external identifier external_id = models.TextField() # Deposit client client = models.ForeignKey('DepositClient', models.DO_NOTHING) # SWH's loading result identifier swh_id = models.TextField(blank=True, null=True) swh_id_context = models.TextField(blank=True, null=True) swh_anchor_id = models.TextField(blank=True, null=True) swh_anchor_id_context = models.TextField(blank=True, null=True) # Deposit's status regarding loading status = models.TextField( choices=DEPOSIT_STATUS, default=DEPOSIT_STATUS_PARTIAL) status_detail = JSONField(null=True) # deposit can have one parent parent = models.ForeignKey('self', null=True) class Meta: db_table = 'deposit' def __str__(self): d = { 'id': self.id, 'reception_date': self.reception_date, 'collection': self.collection.name, 'external_id': self.external_id, 'client': self.client.username, 'status': self.status, } if self.status in (DEPOSIT_STATUS_REJECTED): d['status_detail'] = self.status_detail return str(d) -class DepositRequestType(models.Model): - """Deposit request type made by clients (either archive or metadata) - - """ - id = models.BigAutoField(primary_key=True) - name = models.TextField() - - class Meta: - db_table = 'deposit_request_type' - - def __str__(self): - return str({'id': self.id, 'name': self.name}) - - def client_directory_path(instance, filename): """Callable to upload archive in MEDIA_ROOT/user_/ Args: instance (DepositRequest): DepositRequest concerned by the upload filename (str): Filename of the uploaded file Returns: A path to be prefixed by the MEDIA_ROOT to access physically to the file uploaded. """ return 'client_{0}/{1}'.format(instance.deposit.client.id, filename) +REQUEST_TYPES = [(ARCHIVE_TYPE, ARCHIVE_TYPE), + (METADATA_TYPE, METADATA_TYPE)] + + class DepositRequest(models.Model): """Deposit request associated to one deposit. """ id = models.BigAutoField(primary_key=True) # Deposit concerned by the request deposit = models.ForeignKey(Deposit, models.DO_NOTHING) date = models.DateTimeField(auto_now_add=True) # Deposit request information on the data to inject # this can be null when type is 'archive' metadata = JSONField(null=True) raw_metadata = models.TextField(null=True) # this can be null when type is 'metadata' archive = models.FileField(null=True, upload_to=client_directory_path) - type = models.ForeignKey( - 'DepositRequestType', models.DO_NOTHING) + type = models.CharField(max_length=8, + choices=REQUEST_TYPES, + null=True) class Meta: db_table = 'deposit_request' def __str__(self): meta = None if self.metadata: from json import dumps meta = dumps(self.metadata) archive_name = None if self.archive: archive_name = self.archive.name return str({ 'id': self.id, 'deposit': self.deposit, 'metadata': meta, 'archive': archive_name }) class DepositCollection(models.Model): id = models.BigAutoField(primary_key=True) # Human readable name for the collection type e.g HAL, arXiv, etc... name = models.TextField() class Meta: db_table = 'deposit_collection' def __str__(self): return str({'id': self.id, 'name': self.name}) diff --git a/swh/deposit/parsers.py b/swh/deposit/parsers.py index 52fcc10b..70f328fd 100644 --- a/swh/deposit/parsers.py +++ b/swh/deposit/parsers.py @@ -1,83 +1,92 @@ -# Copyright (C) 2017-2018 The Software Heritage developers +# Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """Module in charge of defining parsers with SWORD 2.0 supported mediatypes. """ import xmltodict from django.conf import settings from rest_framework.parsers import BaseParser from rest_framework.parsers import FileUploadParser from rest_framework.parsers import MultiPartParser +from xml.parsers.expat import ExpatError + +from swh.deposit.errors import ParserError class SWHFileUploadZipParser(FileUploadParser): """File upload parser limited to zip archive. """ media_type = 'application/zip' class SWHFileUploadTarParser(FileUploadParser): """File upload parser limited to tarball (tar, tar.gz, tar.*) archives. """ media_type = 'application/x-tar' class SWHXMLParser(BaseParser): """ XML parser. """ media_type = 'application/xml' def parse(self, stream, media_type=None, parser_context=None): """ Parses the incoming bytestream as XML and returns the resulting data. """ parser_context = parser_context or {} encoding = parser_context.get('encoding', settings.DEFAULT_CHARSET) data = xmltodict.parse(stream, encoding=encoding, process_namespaces=False) if 'entry' in data: data = data['entry'] return data class SWHAtomEntryParser(SWHXMLParser): """Atom entry parser limited to specific mediatype """ media_type = 'application/atom+xml;type=entry' def parse(self, stream, media_type=None, parser_context=None): # We do not actually want to parse the stream yet # because we want to keep the raw data as well # this is done later in the atom entry call # (cf. swh.deposit.api.common.SWHBaseDeposit._atom_entry) return stream class SWHMultiPartParser(MultiPartParser): """Multipart parser limited to a subset of mediatypes. """ media_type = 'multipart/*; *' def parse_xml(raw_content): """Parse xml body. Args: raw_content (bytes): The content to parse + Raises: + ParserError in case of a malformed xml + Returns: content parsed as dict. """ - return SWHXMLParser().parse(raw_content) + try: + return SWHXMLParser().parse(raw_content) + except ExpatError as e: + raise ParserError(str(e)) diff --git a/swh/deposit/settings/production.py b/swh/deposit/settings/production.py index bf848528..dea38a8d 100644 --- a/swh/deposit/settings/production.py +++ b/swh/deposit/settings/production.py @@ -1,107 +1,113 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os from .common import * # noqa from .common import ALLOWED_HOSTS from swh.core import config ALLOWED_HOSTS += ['deposit.softwareheritage.org'] # Setup support for proxy headers USE_X_FORWARDED_HOST = True SECURE_PROXY_SSL_HEADER = ('HTTP_X_FORWARDED_PROTO', 'https') DEBUG = False # Database # https://docs.djangoproject.com/en/1.10/ref/settings/#databases # https://docs.djangoproject.com/en/1.10/ref/settings/#std:setting-DATABASES # https://docs.djangoproject.com/en/1.10/howto/deployment/checklist/#databases # Retrieve the deposit's configuration file # and check the required setup is ok # If not raise an error explaining the errors config_file = os.environ.get('SWH_CONFIG_FILENAME') +if not config_file: + raise ValueError('Production: SWH_CONFIG_FILENANE must be set to the' + ' configuration file needed!') + if not os.path.exists(config_file): raise ValueError('Production: configuration file %s does not exist!' % ( config_file, )) conf = config.load_named_config(config_file) if not conf: raise ValueError( 'Production: configuration %s does not exist.' % ( config_file, )) for key in ('scheduler', 'private'): if not conf.get(key): raise ValueError( "Production: invalid configuration; missing %s config entry." % ( key, )) +ALLOWED_HOSTS += conf.get('allowed_hosts', []) + private_conf = conf['private'] SECRET_KEY = private_conf['secret_key'] # https://docs.djangoproject.com/en/1.10/ref/settings/#logging LOGGING = { 'version': 1, 'disable_existing_loggers': False, 'formatters': { 'standard': { 'format': "[%(asctime)s] %(levelname)s [%(name)s:%(lineno)s] %(message)s", # noqa 'datefmt': "%d/%b/%Y %H:%M:%S" }, }, 'handlers': { 'console': { 'level': 'INFO', 'class': 'logging.StreamHandler', 'formatter': 'standard' }, }, 'loggers': { 'django': { 'handlers': ['console'], 'level': 'INFO', 'propagate': True, }, }, } # database db_conf = private_conf.get('db', {'name': 'unset'}) db = { 'ENGINE': 'django.db.backends.postgresql', 'NAME': db_conf['name'], } db_user = db_conf.get('user') if db_user: db['USER'] = db_user db_pass = db_conf.get('password') if db_pass: db['PASSWORD'] = db_pass db_host = db_conf.get('host') if db_host: db['HOST'] = db_host db_port = db_conf.get('port') if db_port: db['PORT'] = db_port # https://docs.djangoproject.com/en/1.10/ref/settings/#databases DATABASES = { 'default': db, } # Upload user directory # https://docs.djangoproject.com/en/1.11/ref/settings/#std:setting-MEDIA_ROOT MEDIA_ROOT = private_conf.get('media_root') diff --git a/swh/deposit/signals.py b/swh/deposit/signals.py index 83d893a0..13a6739e 100644 --- a/swh/deposit/signals.py +++ b/swh/deposit/signals.py @@ -1,83 +1,83 @@ -# Copyright (C) 2017-2018 The Software Heritage developers +# Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """Module in charge of defining some uncoupled actions on deposit. Typically, checking that the archives deposited are ok are not directly testing in the request/answer to avoid too long computations. So this is done in the deposit_on_status_ready_for_check callback. """ from django.db.models.signals import post_save from django.dispatch import receiver from .models import Deposit from .config import SWHDefaultConfig, DEPOSIT_STATUS_VERIFIED from .config import DEPOSIT_STATUS_DEPOSITED @receiver(post_save, sender=Deposit) def post_deposit_save(sender, instance, created, raw, using, update_fields, **kwargs): """When a deposit is saved, check for the deposit's status change and schedule actions accordingly. When the status passes to deposited, schedule checks. When the status pass to ready, schedule loading. Otherwise, do nothing. Args: sender (Deposit): The model class instance (Deposit): The actual instance being saved created (bool): True if a new record was created raw (bool): True if the model is saved exactly as presented (i.e. when loading a fixture). One should not query/modify other records in the database as the database might not be in a consistent state yet using: The database alias being used update_fields: The set of fields to update as passed to Model.save(), or None if update_fields wasn’t passed to save() """ default_config = SWHDefaultConfig() if not default_config.config['checks']: return if instance.status not in {DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_VERIFIED}: return - from django.core.urlresolvers import reverse + from django.urls import reverse from swh.scheduler.utils import create_oneshot_task_dict args = [instance.collection.name, instance.id] if instance.status == DEPOSIT_STATUS_DEPOSITED: # schedule archive check from swh.deposit.config import PRIVATE_CHECK_DEPOSIT check_url = reverse(PRIVATE_CHECK_DEPOSIT, args=args) task = create_oneshot_task_dict( 'swh-deposit-archive-checks', deposit_check_url=check_url) else: # instance.status == DEPOSIT_STATUS_VERIFIED: # schedule loading from swh.deposit.config import PRIVATE_GET_RAW_CONTENT from swh.deposit.config import PRIVATE_GET_DEPOSIT_METADATA from swh.deposit.config import PRIVATE_PUT_DEPOSIT archive_url = reverse(PRIVATE_GET_RAW_CONTENT, args=args) meta_url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=args) update_url = reverse(PRIVATE_PUT_DEPOSIT, args=args) task = create_oneshot_task_dict( 'swh-deposit-archive-loading', archive_url=archive_url, deposit_meta_url=meta_url, deposit_update_url=update_url) default_config.scheduler.create_tasks([task]) diff --git a/swh/deposit/templates/deposit/status.xml b/swh/deposit/templates/deposit/status.xml index a8378ef7..4e6ecf7f 100644 --- a/swh/deposit/templates/deposit/status.xml +++ b/swh/deposit/templates/deposit/status.xml @@ -1,11 +1,12 @@ {{ deposit_id }} {{ status }} {{ status_detail }} {% if swh_id is not None %}{{ swh_id }}{% endif %} {% if swh_id_context is not None %}{{ swh_id_context }}{% endif %} {% if swh_anchor_id is not None %}{{ swh_anchor_id }}{% endif %} {% if swh_anchor_id_context is not None %}{{ swh_anchor_id_context }}{% endif %} + {% if external_id is not None %}{{ external_id }}{% endif %} diff --git a/swh/deposit/tests/api/test_common.py b/swh/deposit/tests/api/test_common.py index 74bc0b8b..74479973 100644 --- a/swh/deposit/tests/api/test_common.py +++ b/swh/deposit/tests/api/test_common.py @@ -1,39 +1,39 @@ -# Copyright (C) 2017 The Software Heritage developers +# Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from django.core.urlresolvers import reverse +from django.urls import reverse from rest_framework import status from rest_framework.test import APITestCase from ..common import BasicTestCase, WithAuthTestCase class IndexNoAuthCase(APITestCase, BasicTestCase): """Access to main entry point is ok without authentication """ def test_get_home_is_ok(self): """Without authentication, endpoint refuses access with 401 response """ url = reverse('home') response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertIn(b'The Software Heritage Deposit', response.content) class IndexWithAuthCase(WithAuthTestCase, APITestCase, BasicTestCase): """Access to main entry point is ok with authentication as well """ def test_get_home_is_ok_2(self): """Without authentication, endpoint refuses access with 401 response """ url = reverse('home') response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertIn(b'The Software Heritage Deposit', response.content) diff --git a/swh/deposit/tests/api/test_deposit.py b/swh/deposit/tests/api/test_deposit.py index fcfac4e9..eb984002 100644 --- a/swh/deposit/tests/api/test_deposit.py +++ b/swh/deposit/tests/api/test_deposit.py @@ -1,160 +1,160 @@ -# Copyright (C) 2017-2018 The Software Heritage developers +# Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import hashlib -from django.core.urlresolvers import reverse +from django.urls import reverse from io import BytesIO from rest_framework import status from rest_framework.test import APITestCase from swh.deposit.config import COL_IRI, EDIT_SE_IRI, DEPOSIT_STATUS_REJECTED from swh.deposit.config import DEPOSIT_STATUS_PARTIAL from swh.deposit.config import DEPOSIT_STATUS_LOAD_SUCCESS from swh.deposit.config import DEPOSIT_STATUS_LOAD_FAILURE from swh.deposit.models import Deposit, DepositClient, DepositCollection from swh.deposit.parsers import parse_xml from ..common import BasicTestCase, WithAuthTestCase, CommonCreationRoutine class DepositNoAuthCase(APITestCase, BasicTestCase): """Deposit access are protected with basic authentication. """ def test_post_will_fail_with_401(self): """Without authentication, endpoint refuses access with 401 response """ url = reverse(COL_IRI, args=[self.collection.name]) # when response = self.client.post(url) # then self.assertEqual(response.status_code, status.HTTP_401_UNAUTHORIZED) class DepositFailuresTest(APITestCase, WithAuthTestCase, BasicTestCase, CommonCreationRoutine): """Deposit access are protected with basic authentication. """ def setUp(self): super().setUp() # Add another user _collection2 = DepositCollection(name='some') _collection2.save() _user = DepositClient.objects.create_user(username='user', password='user') _user.collections = [_collection2.id] self.collection2 = _collection2 def test_access_to_another_user_collection_is_forbidden(self): """Access to another user collection should return a 403 """ url = reverse(COL_IRI, args=[self.collection2.name]) response = self.client.post(url) self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) self.assertRegex(response.content.decode('utf-8'), 'Client hal cannot access collection %s' % ( self.collection2.name, )) def test_delete_on_col_iri_not_supported(self): """Delete on col iri should return a 405 response """ url = reverse(COL_IRI, args=[self.collection.name]) response = self.client.delete(url) self.assertEqual(response.status_code, status.HTTP_405_METHOD_NOT_ALLOWED) self.assertRegex(response.content.decode('utf-8'), 'DELETE method is not supported on this endpoint') def create_deposit_with_rejection_status(self): url = reverse(COL_IRI, args=[self.collection.name]) data = b'some data which is clearly not a zip file' md5sum = hashlib.md5(data).hexdigest() external_id = 'some-external-id-1' # when response = self.client.post( url, content_type='application/zip', # as zip data=data, # + headers CONTENT_LENGTH=len(data), # other headers needs HTTP_ prefix to be taken into account HTTP_SLUG=external_id, HTTP_CONTENT_MD5=md5sum, HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') self.assertEqual(response.status_code, status.HTTP_201_CREATED) response_content = parse_xml(BytesIO(response.content)) actual_state = response_content['deposit_status'] self.assertEqual(actual_state, DEPOSIT_STATUS_REJECTED) def test_act_on_deposit_rejected_is_not_permitted(self): deposit_id = self.create_deposit_with_status(DEPOSIT_STATUS_REJECTED) deposit = Deposit.objects.get(pk=deposit_id) assert deposit.status == DEPOSIT_STATUS_REJECTED response = self.client.post( reverse(EDIT_SE_IRI, args=[self.collection.name, deposit_id]), content_type='application/atom+xml;type=entry', data=self.atom_entry_data1, HTTP_SLUG='external-id') self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) self.assertRegex( response.content.decode('utf-8'), "You can only act on deposit with status '%s'" % ( DEPOSIT_STATUS_PARTIAL, )) def test_add_deposit_with_parent(self): # given multiple deposit already loaded deposit_id = self.create_deposit_with_status( status=DEPOSIT_STATUS_LOAD_SUCCESS, external_id='some-external-id') deposit1 = Deposit.objects.get(pk=deposit_id) self.assertIsNotNone(deposit1) self.assertEqual(deposit1.external_id, 'some-external-id') self.assertEqual(deposit1.status, DEPOSIT_STATUS_LOAD_SUCCESS) deposit_id2 = self.create_deposit_with_status( status=DEPOSIT_STATUS_LOAD_SUCCESS, external_id='some-external-id') deposit2 = Deposit.objects.get(pk=deposit_id2) self.assertIsNotNone(deposit2) self.assertEqual(deposit2.external_id, 'some-external-id') self.assertEqual(deposit2.status, DEPOSIT_STATUS_LOAD_SUCCESS) deposit_id3 = self.create_deposit_with_status( status=DEPOSIT_STATUS_LOAD_FAILURE, external_id='some-external-id') deposit3 = Deposit.objects.get(pk=deposit_id3) self.assertIsNotNone(deposit3) self.assertEqual(deposit3.external_id, 'some-external-id') self.assertEqual(deposit3.status, DEPOSIT_STATUS_LOAD_FAILURE) # when deposit_id3 = self.create_simple_deposit_partial( external_id='some-external-id') # then deposit4 = Deposit.objects.get(pk=deposit_id3) self.assertIsNotNone(deposit4) self.assertEqual(deposit4.external_id, 'some-external-id') self.assertEqual(deposit4.status, DEPOSIT_STATUS_PARTIAL) self.assertEqual(deposit4.parent, deposit2) diff --git a/swh/deposit/tests/api/test_deposit_atom.py b/swh/deposit/tests/api/test_deposit_atom.py index 2f50f050..b04da6d6 100644 --- a/swh/deposit/tests/api/test_deposit_atom.py +++ b/swh/deposit/tests/api/test_deposit_atom.py @@ -1,528 +1,473 @@ -# Copyright (C) 2017-2018 The Software Heritage developers +# Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from django.core.urlresolvers import reverse +from django.urls import reverse from io import BytesIO from rest_framework import status from rest_framework.test import APITestCase from swh.deposit.config import COL_IRI, DEPOSIT_STATUS_DEPOSITED from swh.deposit.models import Deposit, DepositRequest from swh.deposit.parsers import parse_xml from ..common import BasicTestCase, WithAuthTestCase class DepositAtomEntryTestCase(APITestCase, WithAuthTestCase, BasicTestCase): """Try and post atom entry deposit. """ def setUp(self): super().setUp() self.atom_entry_data0 = b""" Awesome Compiler hal urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a %s 2017-10-07T15:17:08Z some awesome author something awesome-compiler This is an awesome compiler destined to awesomely compile stuff and other stuff compiler,programming,language 2005-10-07T17:17:08Z 2005-10-07T17:17:08Z release note related link Awesome https://hoster.org/awesome-compiler GNU/Linux 0.0.1 running all """ self.atom_entry_data1 = b""" hal urn:uuid:2225c695-cfb8-4ebb-aaaa-80da344efa6a 2017-10-07T15:17:08Z some awesome author something awesome-compiler This is an awesome compiler destined to awesomely compile stuff and other stuff compiler,programming,language 2005-10-07T17:17:08Z 2005-10-07T17:17:08Z release note related link Awesome https://hoster.org/awesome-compiler GNU/Linux 0.0.1 running all """ - self.atom_entry_data2 = b""" - - %s -""" - - self.atom_entry_data_empty_body = b""" -""" - - self.atom_entry_data3 = b""" - - something -""" - - self.atom_entry_data_atom_only = b""" - - Awesome Compiler - urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a - 1785io25c695 - 2017-10-07T15:17:08Z - some awesome author - """ - - self.atom_entry_data_codemeta = b""" - - Awesome Compiler - urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a - 1785io25c695 - 1785io25c695 - origin url - other identifier, DOI, ARK - Domain - - description - key-word 1 - key-word 2 - creation date - publication date - comment - - article name - article id - - - Collaboration/Projet - project name - id - - see also - Sponsor A - Sponsor B - Platform/OS - dependencies - Version - active - - license - url spdx - - .Net Framework 3.0 - Python2.3 - - author1 - Inria - UPMC - - - author2 - Inria - UPMC - - http://code.com - language 1 - language 2 - http://issuetracker.com - """ # noqa - - self.atom_entry_data_dc_codemeta = b""" - - - - %s - hal-01587361 - https://hal.inria.fr/hal-01587361 - https://hal.inria.fr/hal-01587361/document - https://hal.inria.fr/hal-01587361/file/AffectationRO-v1.0.0.zip - doi:10.5281/zenodo.438684 - The assignment problem - AffectationRO - Gruenpeter, Morane - [INFO] Computer Science [cs] - [INFO.INFO-RO] Computer Science [cs]/Operations Research [cs.RO] - SOFTWARE - Project in OR: The assignment problemA java implementation for the assignment problem first release - description fr - 2015-06-01 - 2017-10-19 - en - - - url stable - Version sur hal - Version entre par lutilisateur - Mots-cls - Commentaire - Rfrence interne - - Collaboration/Projet - nom du projet - id - - Voir aussi - Financement - Projet ANR - Projet Europen - Platform/OS - Dpendances - Etat du dveloppement - - license - url spdx - - Outils de dveloppement- outil no1 - Outils de dveloppement- outil no2 - http://code.com - language 1 - language 2 - """ # noqa - - self.atom_entry_tei = b"""HAL TEI export of hal-01587083CCSDDistributed under a Creative Commons Attribution 4.0 International License

HAL API platform

questionnaire software metadataMoraneGruenpeter7de56c632362954fa84172cad80afe4einria.fr1556733MoraneGruenpeterf85a43a5fb4a2e0778a77e017f28c8fdgmail.com2017-09-29 11:21:322017-10-03 17:20:132017-10-03 17:20:132017-09-292017-09-29contributorMoraneGruenpeterf85a43a5fb4a2e0778a77e017f28c8fdgmail.comCCSDhal-01587083https://hal.inria.fr/hal-01587083gruenpeter:hal-0158708320172017questionnaire software metadataMoraneGruenpeter7de56c632362954fa84172cad80afe4einria.fr1556733EnglishComputer Science [cs]SoftwareIRILLInitiative pour la Recherche et l'Innovation sur le Logiciel Libre
https://www.irill.org/
Universite Pierre et Marie Curie - Paris 6UPMC
4 place Jussieu - 75005 Paris
http://www.upmc.fr/
Institut National de Recherche en Informatique et en AutomatiqueInria
Domaine de VoluceauRocquencourt - BP 10578153 Le Chesnay Cedex
http://www.inria.fr/en/
Universite Paris Diderot - Paris 7UPD7
5 rue Thomas-Mann - 75205 Paris cedex 13
http://www.univ-paris-diderot.fr
""" # noqa - self.atom_entry_data_badly_formatted = b""" """ self.atom_error_with_decimal = b""" Composing a Web of Audio Applications hal hal-01243065 hal-01243065 https://hal-test.archives-ouvertes.fr/hal-01243065 test DSP programming,Web,Composability,Faust 2017-05-03T16:08:47+02:00 The Web offers a great opportunity to share, deploy and use programs without installation difficulties. In this article we explore the idea of freely combining/composing real-time audio applications deployed on the Web using Faust audio DSP language. 1 10.4 phpstorm stable linux php python C GNU General Public License v3.0 only CeCILL Free Software License Agreement v1.1 HAL hal@ccsd.cnrs.fr Someone Nice someone@nice.fr FFJ """ # noqa - def test_post_deposit_atom_entry_serialization_error(self): + def test_post_deposit_atom_201_even_with_decimal(self): """Posting an initial atom entry should return 201 with deposit receipt """ # given # when response = self.client.post( reverse(COL_IRI, args=[self.collection.name]), content_type='application/atom+xml;type=entry', data=self.atom_error_with_decimal, HTTP_SLUG='external-id', HTTP_IN_PROGRESS='false') # then self.assertEqual(response.status_code, status.HTTP_201_CREATED) response_content = parse_xml(BytesIO(response.content)) deposit_id = response_content['deposit_id'] deposit = Deposit.objects.get(pk=deposit_id) dr = DepositRequest.objects.get(deposit=deposit) self.assertIsNotNone(dr.metadata) sw_version = dr.metadata.get('codemeta:softwareVersion') self.assertEqual(sw_version, '10.4') - def test_post_deposit_atom_empty_body_request(self): + def test_post_deposit_atom_400_with_empty_body(self): """Posting empty body request should return a 400 response """ + atom_entry_data_empty_body = b""" +""" + response = self.client.post( reverse(COL_IRI, args=[self.collection.name]), content_type='application/atom+xml;type=entry', - data=self.atom_entry_data_empty_body) + data=atom_entry_data_empty_body) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - def test_post_deposit_atom_badly_formatted_is_a_bad_request(self): + def test_post_deposit_atom_400_badly_formatted_atom(self): """Posting a badly formatted atom should return a 400 response """ response = self.client.post( reverse(COL_IRI, args=[self.collection.name]), content_type='application/atom+xml;type=entry', data=self.atom_entry_data_badly_formatted) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - def test_post_deposit_atom_without_slug_header_is_bad_request(self): + def test_post_deposit_atom_400_with_parsing_error(self): + """Posting parsing error prone atom should return 400 + + """ + atom_entry_data_parsing_error_prone = b""" + + Composing a Web of Audio Applications + + +""" + response = self.client.post( + reverse(COL_IRI, args=[self.collection.name]), + content_type='application/atom+xml;type=entry', + data=atom_entry_data_parsing_error_prone) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + + def test_post_deposit_atom_400_without_slug_header(self): """Posting an atom entry without a slug header should return a 400 """ url = reverse(COL_IRI, args=[self.collection.name]) # when response = self.client.post( url, content_type='application/atom+xml;type=entry', data=self.atom_entry_data0, # + headers HTTP_IN_PROGRESS='false') self.assertIn(b'Missing SLUG header', response.content) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - def test_post_deposit_atom_unknown_collection(self): + def test_post_deposit_atom_404_unknown_collection(self): """Posting an atom entry to an unknown collection should return a 404 """ + atom_entry_data3 = b""" + + something +""" + response = self.client.post( reverse(COL_IRI, args=['unknown-one']), content_type='application/atom+xml;type=entry', - data=self.atom_entry_data3, + data=atom_entry_data3, HTTP_SLUG='something') self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) def test_post_deposit_atom_entry_initial(self): """Posting an initial atom entry should return 201 with deposit receipt """ # given external_id = 'urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a' with self.assertRaises(Deposit.DoesNotExist): Deposit.objects.get(external_id=external_id) atom_entry_data = self.atom_entry_data0 % external_id.encode('utf-8') # when response = self.client.post( reverse(COL_IRI, args=[self.collection.name]), content_type='application/atom+xml;type=entry', data=atom_entry_data, HTTP_SLUG='external-id', HTTP_IN_PROGRESS='false') # then self.assertEqual(response.status_code, status.HTTP_201_CREATED) response_content = parse_xml(BytesIO(response.content)) deposit_id = response_content['deposit_id'] deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(deposit.collection, self.collection) self.assertEqual(deposit.external_id, external_id) self.assertEqual(deposit.status, DEPOSIT_STATUS_DEPOSITED) self.assertEqual(deposit.client, self.user) # one associated request to a deposit deposit_request = DepositRequest.objects.get(deposit=deposit) self.assertIsNotNone(deposit_request.metadata) self.assertEqual( deposit_request.raw_metadata, atom_entry_data.decode('utf-8')) self.assertFalse(bool(deposit_request.archive)) def test_post_deposit_atom_entry_with_codemeta(self): """Posting an initial atom entry should return 201 with deposit receipt """ # given external_id = 'urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a' with self.assertRaises(Deposit.DoesNotExist): Deposit.objects.get(external_id=external_id) - atom_entry_data = self.atom_entry_data_dc_codemeta % ( - external_id.encode('utf-8'), ) + atom_entry_data = b""" + + + + %s + hal-01587361 + https://hal.inria.fr/hal-01587361 + https://hal.inria.fr/hal-01587361/document + https://hal.inria.fr/hal-01587361/file/AffectationRO-v1.0.0.zip + doi:10.5281/zenodo.438684 + The assignment problem + AffectationRO + Gruenpeter, Morane + [INFO] Computer Science [cs] + [INFO.INFO-RO] Computer Science [cs]/Operations Research [cs.RO] + SOFTWARE + Project in OR: The assignment problemA java implementation for the assignment problem first release + description fr + 2015-06-01 + 2017-10-19 + en + + + url stable + Version sur hal + Version entre par lutilisateur + Mots-cls + Commentaire + Rfrence interne + + Collaboration/Projet + nom du projet + id + + Voir aussi + Financement + Projet ANR + Projet Europen + Platform/OS + Dpendances + Etat du dveloppement + + license + url spdx + + Outils de dveloppement- outil no1 + Outils de dveloppement- outil no2 + http://code.com + language 1 + language 2 + """ % external_id.encode('utf-8') # noqa # when response = self.client.post( reverse(COL_IRI, args=[self.collection.name]), content_type='application/atom+xml;type=entry', data=atom_entry_data, HTTP_SLUG='external-id', HTTP_IN_PROGRESS='false') # then self.assertEqual(response.status_code, status.HTTP_201_CREATED) response_content = parse_xml(BytesIO(response.content)) deposit_id = response_content['deposit_id'] deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(deposit.collection, self.collection) self.assertEqual(deposit.external_id, external_id) self.assertEqual(deposit.status, DEPOSIT_STATUS_DEPOSITED) self.assertEqual(deposit.client, self.user) # one associated request to a deposit deposit_request = DepositRequest.objects.get(deposit=deposit) self.assertIsNotNone(deposit_request.metadata) self.assertEqual( deposit_request.raw_metadata, atom_entry_data.decode('utf-8')) self.assertFalse(bool(deposit_request.archive)) def test_post_deposit_atom_entry_tei(self): """Posting initial atom entry as TEI should return 201 with receipt """ # given external_id = 'urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a' with self.assertRaises(Deposit.DoesNotExist): Deposit.objects.get(external_id=external_id) - atom_entry_data = self.atom_entry_tei + atom_entry_data = b"""HAL TEI export of hal-01587083CCSDDistributed under a Creative Commons Attribution 4.0 International License

HAL API platform

questionnaire software metadataMoraneGruenpeter7de56c632362954fa84172cad80afe4einria.fr1556733MoraneGruenpeterf85a43a5fb4a2e0778a77e017f28c8fdgmail.com2017-09-29 11:21:322017-10-03 17:20:132017-10-03 17:20:132017-09-292017-09-29contributorMoraneGruenpeterf85a43a5fb4a2e0778a77e017f28c8fdgmail.comCCSDhal-01587083https://hal.inria.fr/hal-01587083gruenpeter:hal-0158708320172017questionnaire software metadataMoraneGruenpeter7de56c632362954fa84172cad80afe4einria.fr1556733EnglishComputer Science [cs]SoftwareIRILLInitiative pour la Recherche et l'Innovation sur le Logiciel Libre
https://www.irill.org/
Universite Pierre et Marie Curie - Paris 6UPMC
4 place Jussieu - 75005 Paris
http://www.upmc.fr/
Institut National de Recherche en Informatique et en AutomatiqueInria
Domaine de VoluceauRocquencourt - BP 10578153 Le Chesnay Cedex
http://www.inria.fr/en/
Universite Paris Diderot - Paris 7UPD7
5 rue Thomas-Mann - 75205 Paris cedex 13
http://www.univ-paris-diderot.fr
""" # noqa # when response = self.client.post( reverse(COL_IRI, args=[self.collection.name]), content_type='application/atom+xml;type=entry', data=atom_entry_data, HTTP_SLUG=external_id, HTTP_IN_PROGRESS='false') # then self.assertEqual(response.status_code, status.HTTP_201_CREATED) response_content = parse_xml(BytesIO(response.content)) deposit_id = response_content['deposit_id'] deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(deposit.collection, self.collection) self.assertEqual(deposit.external_id, external_id) self.assertEqual(deposit.status, DEPOSIT_STATUS_DEPOSITED) self.assertEqual(deposit.client, self.user) # one associated request to a deposit deposit_request = DepositRequest.objects.get(deposit=deposit) self.assertIsNotNone(deposit_request.metadata) self.assertEqual( deposit_request.raw_metadata, atom_entry_data.decode('utf-8')) self.assertFalse(bool(deposit_request.archive)) def test_post_deposit_atom_entry_multiple_steps(self): """After initial deposit, updating a deposit should return a 201 """ # given external_id = 'urn:uuid:2225c695-cfb8-4ebb-aaaa-80da344efa6a' with self.assertRaises(Deposit.DoesNotExist): deposit = Deposit.objects.get(external_id=external_id) # when response = self.client.post( reverse(COL_IRI, args=[self.collection.name]), content_type='application/atom+xml;type=entry', data=self.atom_entry_data1, HTTP_IN_PROGRESS='True', HTTP_SLUG=external_id) # then self.assertEqual(response.status_code, status.HTTP_201_CREATED) response_content = parse_xml(BytesIO(response.content)) deposit_id = int(response_content['deposit_id']) deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(deposit.collection, self.collection) self.assertEqual(deposit.external_id, external_id) self.assertEqual(deposit.status, 'partial') self.assertEqual(deposit.client, self.user) # one associated request to a deposit deposit_requests = DepositRequest.objects.filter(deposit=deposit) self.assertEqual(len(deposit_requests), 1) - atom_entry_data = self.atom_entry_data2 % external_id.encode('utf-8') + atom_entry_data = b""" + + %s +""" % external_id.encode('utf-8') update_uri = response._headers['location'][1] # when updating the first deposit post response = self.client.post( update_uri, content_type='application/atom+xml;type=entry', data=atom_entry_data, HTTP_IN_PROGRESS='False') # then self.assertEqual(response.status_code, status.HTTP_201_CREATED) response_content = parse_xml(BytesIO(response.content)) deposit_id = int(response_content['deposit_id']) deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(deposit.collection, self.collection) self.assertEqual(deposit.external_id, external_id) self.assertEqual(deposit.status, DEPOSIT_STATUS_DEPOSITED) self.assertEqual(deposit.client, self.user) self.assertEqual(len(Deposit.objects.all()), 1) # now 2 associated requests to a same deposit deposit_requests = DepositRequest.objects.filter( deposit=deposit).order_by('id') self.assertEqual(len(deposit_requests), 2) expected_meta = [ { 'metadata': parse_xml(self.atom_entry_data1), 'raw_metadata': self.atom_entry_data1.decode('utf-8'), }, { 'metadata': parse_xml(atom_entry_data), 'raw_metadata': atom_entry_data.decode('utf-8'), } ] for i, deposit_request in enumerate(deposit_requests): actual_metadata = deposit_request.metadata self.assertEqual(actual_metadata, expected_meta[i]['metadata']) self.assertEqual(deposit_request.raw_metadata, expected_meta[i]['raw_metadata']) self.assertFalse(bool(deposit_request.archive)) diff --git a/swh/deposit/tests/api/test_deposit_binary.py b/swh/deposit/tests/api/test_deposit_binary.py index f2549e7a..59aeb15c 100644 --- a/swh/deposit/tests/api/test_deposit_binary.py +++ b/swh/deposit/tests/api/test_deposit_binary.py @@ -1,645 +1,645 @@ -# Copyright (C) 2017-2018 The Software Heritage developers +# Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.core.files.uploadedfile import InMemoryUploadedFile -from django.core.urlresolvers import reverse +from django.urls import reverse from io import BytesIO from rest_framework import status from rest_framework.test import APITestCase from swh.deposit.tests import TEST_CONFIG from swh.deposit.config import COL_IRI, EM_IRI from swh.deposit.config import DEPOSIT_STATUS_DEPOSITED from swh.deposit.models import Deposit, DepositRequest from swh.deposit.parsers import parse_xml from ..common import ( BasicTestCase, WithAuthTestCase, create_arborescence_archive, FileSystemCreationRoutine ) class DepositTestCase(APITestCase, WithAuthTestCase, BasicTestCase, FileSystemCreationRoutine): """Try and upload one single deposit """ def setUp(self): super().setUp() self.atom_entry_data0 = b""" Awesome Compiler hal urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a %s 2017-10-07T15:17:08Z some awesome author something awesome-compiler This is an awesome compiler destined to awesomely compile stuff and other stuff compiler,programming,language 2005-10-07T17:17:08Z 2005-10-07T17:17:08Z release note related link Awesome https://hoster.org/awesome-compiler GNU/Linux 0.0.1 running all """ self.atom_entry_data1 = b""" hal urn:uuid:2225c695-cfb8-4ebb-aaaa-80da344efa6a 2017-10-07T15:17:08Z some awesome author something awesome-compiler This is an awesome compiler destined to awesomely compile stuff and other stuff compiler,programming,language 2005-10-07T17:17:08Z 2005-10-07T17:17:08Z release note related link Awesome https://hoster.org/awesome-compiler GNU/Linux 0.0.1 running all """ self.atom_entry_data2 = b""" %s """ self.atom_entry_data_empty_body = b""" """ self.atom_entry_data3 = b""" something """ self.data_atom_entry_ok = b""" Title urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 2005-10-07T17:17:08Z Contributor The abstract The abstract Access Rights Alternative Title Date Available Bibliographic Citation # noqa Contributor Description Has Part Has Version Identifier Is Part Of Publisher References Rights Holder Source Title Type """ def test_post_deposit_binary_without_slug_header_is_bad_request(self): """Posting a binary deposit without slug header should return 400 """ url = reverse(COL_IRI, args=[self.collection.name]) # when response = self.client.post( url, content_type='application/zip', # as zip data=self.archive['data'], # + headers CONTENT_LENGTH=self.archive['length'], HTTP_CONTENT_MD5=self.archive['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS='false', HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') self.assertIn(b'Missing SLUG header', response.content) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) def test_post_deposit_binary_upload_final_and_status_check(self): """Binary upload with correct headers should return 201 with receipt """ # given url = reverse(COL_IRI, args=[self.collection.name]) external_id = 'some-external-id-1' # when response = self.client.post( url, content_type='application/zip', # as zip data=self.archive['data'], # + headers CONTENT_LENGTH=self.archive['length'], # other headers needs HTTP_ prefix to be taken into account HTTP_SLUG=external_id, HTTP_CONTENT_MD5=self.archive['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS='false', HTTP_CONTENT_DISPOSITION='attachment; filename=%s' % ( self.archive['name'], )) # then response_content = parse_xml(BytesIO(response.content)) self.assertEqual(response.status_code, status.HTTP_201_CREATED) deposit_id = response_content['deposit_id'] deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(deposit.status, DEPOSIT_STATUS_DEPOSITED) self.assertEqual(deposit.external_id, external_id) self.assertEqual(deposit.collection, self.collection) self.assertEqual(deposit.client, self.user) self.assertIsNone(deposit.swh_id) deposit_request = DepositRequest.objects.get(deposit=deposit) self.assertEqual(deposit_request.deposit, deposit) self.assertRegex(deposit_request.archive.name, self.archive['name']) self.assertIsNone(deposit_request.metadata) self.assertIsNone(deposit_request.raw_metadata) response_content = parse_xml(BytesIO(response.content)) self.assertEqual(response_content['deposit_archive'], self.archive['name']) self.assertEqual(int(response_content['deposit_id']), deposit.id) self.assertEqual(response_content['deposit_status'], deposit.status) edit_se_iri = reverse('edit_se_iri', args=[self.collection.name, deposit.id]) self.assertEqual(response._headers['location'], ('Location', 'http://testserver' + edit_se_iri)) def test_post_deposit_binary_upload_supports_zip_or_tar(self): """Binary upload with content-type not in [zip,x-tar] should return 415 """ # given url = reverse(COL_IRI, args=[self.collection.name]) external_id = 'some-external-id-1' # when response = self.client.post( url, content_type='application/octet-stream', data=self.archive['data'], # + headers CONTENT_LENGTH=self.archive['length'], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=self.archive['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS='false', HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') # then self.assertEqual(response.status_code, status.HTTP_415_UNSUPPORTED_MEDIA_TYPE) with self.assertRaises(Deposit.DoesNotExist): Deposit.objects.get(external_id=external_id) def test_post_deposit_binary_fails_if_unsupported_packaging_header( self): """Bin deposit without supported content_disposition header returns 400 """ # given url = reverse(COL_IRI, args=[self.collection.name]) external_id = 'some-external-id' # when response = self.client.post( url, content_type='application/zip', data=self.archive['data'], # + headers CONTENT_LENGTH=self.archive['length'], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=self.archive['md5sum'], HTTP_PACKAGING='something-unsupported', HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') # then self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) with self.assertRaises(Deposit.DoesNotExist): Deposit.objects.get(external_id=external_id) def test_post_deposit_binary_upload_fail_if_no_content_disposition_header( self): """Binary upload without content_disposition header should return 400 """ # given url = reverse(COL_IRI, args=[self.collection.name]) external_id = 'some-external-id' # when response = self.client.post( url, content_type='application/zip', data=self.archive['data'], # + headers CONTENT_LENGTH=self.archive['length'], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=self.archive['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS='false') # then self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) with self.assertRaises(Deposit.DoesNotExist): Deposit.objects.get(external_id=external_id) def test_post_deposit_mediation_not_supported(self): """Binary upload with mediation should return a 412 response """ # given url = reverse(COL_IRI, args=[self.collection.name]) external_id = 'some-external-id-1' # when response = self.client.post( url, content_type='application/zip', data=self.archive['data'], # + headers CONTENT_LENGTH=self.archive['length'], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=self.archive['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS='false', HTTP_ON_BEHALF_OF='someone', HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') # then self.assertEqual(response.status_code, status.HTTP_412_PRECONDITION_FAILED) with self.assertRaises(Deposit.DoesNotExist): Deposit.objects.get(external_id=external_id) def test_post_deposit_binary_upload_fail_if_upload_size_limit_exceeded( self): """Binary upload must not exceed the limit set up... """ # given url = reverse(COL_IRI, args=[self.collection.name]) archive = create_arborescence_archive( self.root_path, 'archive2', 'file2', b'some content in file', up_to_size=TEST_CONFIG['max_upload_size']) external_id = 'some-external-id' # when response = self.client.post( url, content_type='application/zip', data=archive['data'], # + headers CONTENT_LENGTH=archive['length'], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=archive['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS='false', HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') # then self.assertEqual(response.status_code, status.HTTP_413_REQUEST_ENTITY_TOO_LARGE) self.assertRegex(response.content, b'Upload size limit exceeded') with self.assertRaises(Deposit.DoesNotExist): Deposit.objects.get(external_id=external_id) def test_post_deposit_2_post_2_different_deposits(self): """2 posting deposits should return 2 different 201 with receipt """ url = reverse(COL_IRI, args=[self.collection.name]) # when response = self.client.post( url, content_type='application/zip', # as zip data=self.archive['data'], # + headers CONTENT_LENGTH=self.archive['length'], HTTP_SLUG='some-external-id-1', HTTP_CONTENT_MD5=self.archive['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS='false', HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') # then self.assertEqual(response.status_code, status.HTTP_201_CREATED) response_content = parse_xml(BytesIO(response.content)) deposit_id = response_content['deposit_id'] deposit = Deposit.objects.get(pk=deposit_id) deposits = Deposit.objects.all() self.assertEqual(len(deposits), 1) self.assertEqual(deposits[0], deposit) # second post response = self.client.post( url, content_type='application/x-tar', # as zip data=self.archive['data'], # + headers CONTENT_LENGTH=self.archive['length'], HTTP_SLUG='another-external-id', HTTP_CONTENT_MD5=self.archive['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS='false', HTTP_CONTENT_DISPOSITION='attachment; filename=filename1') self.assertEqual(response.status_code, status.HTTP_201_CREATED) response_content = parse_xml(BytesIO(response.content)) deposit_id2 = response_content['deposit_id'] deposit2 = Deposit.objects.get(pk=deposit_id2) self.assertNotEqual(deposit, deposit2) deposits = Deposit.objects.all().order_by('id') self.assertEqual(len(deposits), 2) self.assertEqual(list(deposits), [deposit, deposit2]) def test_post_deposit_binary_and_post_to_add_another_archive(self): """Updating a deposit should return a 201 with receipt """ # given url = reverse(COL_IRI, args=[self.collection.name]) external_id = 'some-external-id-1' # when response = self.client.post( url, content_type='application/zip', # as zip data=self.archive['data'], # + headers CONTENT_LENGTH=self.archive['length'], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=self.archive['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS='true', HTTP_CONTENT_DISPOSITION='attachment; filename=%s' % ( self.archive['name'], )) # then self.assertEqual(response.status_code, status.HTTP_201_CREATED) response_content = parse_xml(BytesIO(response.content)) deposit_id = response_content['deposit_id'] deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(deposit.status, 'partial') self.assertEqual(deposit.external_id, external_id) self.assertEqual(deposit.collection, self.collection) self.assertEqual(deposit.client, self.user) self.assertIsNone(deposit.swh_id) deposit_request = DepositRequest.objects.get(deposit=deposit) self.assertEqual(deposit_request.deposit, deposit) - self.assertEqual(deposit_request.type.name, 'archive') + self.assertEqual(deposit_request.type, 'archive') self.assertRegex(deposit_request.archive.name, self.archive['name']) # 2nd archive to upload archive2 = create_arborescence_archive( self.root_path, 'archive2', 'file2', b'some other content in file') # uri to update the content update_uri = reverse(EM_IRI, args=[self.collection.name, deposit_id]) # adding another archive for the deposit and finalizing it response = self.client.post( update_uri, content_type='application/zip', # as zip data=archive2['data'], # + headers CONTENT_LENGTH=archive2['length'], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=archive2['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_CONTENT_DISPOSITION='attachment; filename=%s' % ( archive2['name'])) self.assertEqual(response.status_code, status.HTTP_201_CREATED) response_content = parse_xml(BytesIO(response.content)) deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(deposit.status, DEPOSIT_STATUS_DEPOSITED) self.assertEqual(deposit.external_id, external_id) self.assertEqual(deposit.collection, self.collection) self.assertEqual(deposit.client, self.user) self.assertIsNone(deposit.swh_id) deposit_requests = list(DepositRequest.objects.filter(deposit=deposit). order_by('id')) # 2 deposit requests for the same deposit self.assertEqual(len(deposit_requests), 2) self.assertEqual(deposit_requests[0].deposit, deposit) - self.assertEqual(deposit_requests[0].type.name, 'archive') + self.assertEqual(deposit_requests[0].type, 'archive') self.assertRegex(deposit_requests[0].archive.name, self.archive['name']) self.assertEqual(deposit_requests[1].deposit, deposit) - self.assertEqual(deposit_requests[1].type.name, 'archive') + self.assertEqual(deposit_requests[1].type, 'archive') self.assertRegex(deposit_requests[1].archive.name, archive2['name']) # only 1 deposit in db deposits = Deposit.objects.all() self.assertEqual(len(deposits), 1) def test_post_deposit_then_post_or_put_is_refused_when_status_ready(self): """Updating a deposit with status 'ready' should return a 400 """ url = reverse(COL_IRI, args=[self.collection.name]) external_id = 'some-external-id-1' # when response = self.client.post( url, content_type='application/zip', # as zip data=self.archive['data'], # + headers CONTENT_LENGTH=self.archive['length'], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=self.archive['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS='false', HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') # then self.assertEqual(response.status_code, status.HTTP_201_CREATED) response_content = parse_xml(BytesIO(response.content)) deposit_id = response_content['deposit_id'] deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(deposit.status, DEPOSIT_STATUS_DEPOSITED) self.assertEqual(deposit.external_id, external_id) self.assertEqual(deposit.collection, self.collection) self.assertEqual(deposit.client, self.user) self.assertIsNone(deposit.swh_id) deposit_request = DepositRequest.objects.get(deposit=deposit) self.assertEqual(deposit_request.deposit, deposit) self.assertRegex(deposit_request.archive.name, 'filename0') # updating/adding is forbidden # uri to update the content edit_se_iri = reverse( 'edit_se_iri', args=[self.collection.name, deposit_id]) em_iri = reverse( 'em_iri', args=[self.collection.name, deposit_id]) # Testing all update/add endpoint should fail # since the status is ready archive2 = create_arborescence_archive( self.root_path, 'archive2', 'file2', b'some content in file 2') # replacing file is no longer possible since the deposit's # status is ready r = self.client.put( em_iri, content_type='application/zip', data=archive2['data'], CONTENT_LENGTH=archive2['length'], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=archive2['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS='false', HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST) # adding file is no longer possible since the deposit's status # is ready r = self.client.post( em_iri, content_type='application/zip', data=archive2['data'], CONTENT_LENGTH=archive2['length'], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=archive2['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS='false', HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST) # replacing metadata is no longer possible since the deposit's # status is ready r = self.client.put( edit_se_iri, content_type='application/atom+xml;type=entry', data=self.data_atom_entry_ok, CONTENT_LENGTH=len(self.data_atom_entry_ok), HTTP_SLUG=external_id) self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST) # adding new metadata is no longer possible since the # deposit's status is ready r = self.client.post( edit_se_iri, content_type='application/atom+xml;type=entry', data=self.data_atom_entry_ok, CONTENT_LENGTH=len(self.data_atom_entry_ok), HTTP_SLUG=external_id) self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST) archive_content = b'some content representing archive' archive = InMemoryUploadedFile( BytesIO(archive_content), field_name='archive0', name='archive0', content_type='application/zip', size=len(archive_content), charset=None) atom_entry = InMemoryUploadedFile( BytesIO(self.data_atom_entry_ok), field_name='atom0', name='atom0', content_type='application/atom+xml; charset="utf-8"', size=len(self.data_atom_entry_ok), charset='utf-8') # replacing multipart metadata is no longer possible since the # deposit's status is ready r = self.client.put( edit_se_iri, format='multipart', data={ 'archive': archive, 'atom_entry': atom_entry, }) self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST) # adding new metadata is no longer possible since the # deposit's status is ready r = self.client.post( edit_se_iri, format='multipart', data={ 'archive': archive, 'atom_entry': atom_entry, }) self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST) diff --git a/swh/deposit/tests/api/test_deposit_check.py b/swh/deposit/tests/api/test_deposit_check.py index 1b9c5d2d..680cb034 100644 --- a/swh/deposit/tests/api/test_deposit_check.py +++ b/swh/deposit/tests/api/test_deposit_check.py @@ -1,236 +1,234 @@ -# Copyright (C) 2017-2018 The Software Heritage developers +# Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest -from django.core.urlresolvers import reverse +from django.urls import reverse import pytest from rest_framework import status from rest_framework.test import APITestCase from swh.deposit.config import ( DEPOSIT_STATUS_VERIFIED, PRIVATE_CHECK_DEPOSIT, DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_REJECTED ) from swh.deposit.api.private.deposit_check import ( SWHChecksDeposit, MANDATORY_ARCHIVE_INVALID, - MANDATORY_FIELDS_MISSING, INCOMPATIBLE_URL_FIELDS, + MANDATORY_FIELDS_MISSING, MANDATORY_ARCHIVE_UNSUPPORTED, ALTERNATE_FIELDS_MISSING, MANDATORY_ARCHIVE_MISSING ) from swh.deposit.models import Deposit from ..common import BasicTestCase, WithAuthTestCase, CommonCreationRoutine from ..common import FileSystemCreationRoutine @pytest.mark.fs class CheckDepositTest(APITestCase, WithAuthTestCase, BasicTestCase, CommonCreationRoutine, FileSystemCreationRoutine): """Check deposit endpoints. """ def setUp(self): super().setUp() def test_deposit_ok(self): """Proper deposit should succeed the checks (-> status ready) """ deposit_id = self.create_simple_binary_deposit(status_partial=True) deposit_id = self.update_binary_deposit(deposit_id, status_partial=False) deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(deposit.status, DEPOSIT_STATUS_DEPOSITED) url = reverse(PRIVATE_CHECK_DEPOSIT, args=[self.collection.name, deposit.id]) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) data = response.json() self.assertEqual(data['status'], DEPOSIT_STATUS_VERIFIED) deposit = Deposit.objects.get(pk=deposit.id) self.assertEqual(deposit.status, DEPOSIT_STATUS_VERIFIED) def test_deposit_invalid_tarball(self): """Deposit with tarball (of 1 tarball) should fail the checks: rejected """ for archive_extension in ['zip', 'tar', 'tar.gz', 'tar.bz2', 'tar.xz']: deposit_id = self.create_deposit_archive_with_archive( archive_extension) deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(DEPOSIT_STATUS_DEPOSITED, deposit.status) url = reverse(PRIVATE_CHECK_DEPOSIT, args=[self.collection.name, deposit.id]) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) data = response.json() self.assertEqual(data['status'], DEPOSIT_STATUS_REJECTED) details = data['details'] # archive checks failure self.assertEqual(len(details['archive']), 1) self.assertEqual(details['archive'][0]['summary'], MANDATORY_ARCHIVE_INVALID) deposit = Deposit.objects.get(pk=deposit.id) self.assertEqual(deposit.status, DEPOSIT_STATUS_REJECTED) def test_deposit_ko_missing_tarball(self): """Deposit without archive should fail the checks: rejected """ deposit_id = self.create_deposit_ready() # no archive, only atom deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(DEPOSIT_STATUS_DEPOSITED, deposit.status) url = reverse(PRIVATE_CHECK_DEPOSIT, args=[self.collection.name, deposit.id]) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) data = response.json() self.assertEqual(data['status'], DEPOSIT_STATUS_REJECTED) details = data['details'] # archive checks failure self.assertEqual(len(details['archive']), 1) self.assertEqual(details['archive'][0]['summary'], MANDATORY_ARCHIVE_MISSING) deposit = Deposit.objects.get(pk=deposit.id) self.assertEqual(deposit.status, DEPOSIT_STATUS_REJECTED) def test_deposit_ko_unsupported_tarball(self): """Deposit with an unsupported tarball should fail the checks: rejected """ deposit_id = self.create_deposit_with_invalid_archive() deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(DEPOSIT_STATUS_DEPOSITED, deposit.status) url = reverse(PRIVATE_CHECK_DEPOSIT, args=[self.collection.name, deposit.id]) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) data = response.json() self.assertEqual(data['status'], DEPOSIT_STATUS_REJECTED) details = data['details'] # archive checks failure self.assertEqual(len(details['archive']), 1) self.assertEqual(details['archive'][0]['summary'], MANDATORY_ARCHIVE_UNSUPPORTED) # metadata check failure self.assertEqual(len(details['metadata']), 2) mandatory = details['metadata'][0] self.assertEqual(mandatory['summary'], MANDATORY_FIELDS_MISSING) self.assertEqual(set(mandatory['fields']), - set(['url', 'external_identifier', 'author'])) + set(['author'])) alternate = details['metadata'][1] self.assertEqual(alternate['summary'], ALTERNATE_FIELDS_MISSING) self.assertEqual(alternate['fields'], ['name or title']) - # url check failure - self.assertEqual(details['url']['summary'], INCOMPATIBLE_URL_FIELDS) deposit = Deposit.objects.get(pk=deposit.id) self.assertEqual(deposit.status, DEPOSIT_STATUS_REJECTED) def test_check_deposit_metadata_ok(self): """Proper deposit should succeed the checks (-> status ready) with all **MUST** metadata using the codemeta metadata test set """ deposit_id = self.create_simple_binary_deposit(status_partial=True) deposit_id_metadata = self.add_metadata_to_deposit(deposit_id) self.assertEqual(deposit_id, deposit_id_metadata) deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(deposit.status, DEPOSIT_STATUS_DEPOSITED) url = reverse(PRIVATE_CHECK_DEPOSIT, args=[self.collection.name, deposit.id]) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) data = response.json() self.assertEqual(data['status'], DEPOSIT_STATUS_VERIFIED) deposit = Deposit.objects.get(pk=deposit.id) self.assertEqual(deposit.status, DEPOSIT_STATUS_VERIFIED) class CheckMetadata(unittest.TestCase, SWHChecksDeposit): def test_check_metadata_ok(self): actual_check, detail = self._check_metadata({ 'url': 'something', 'external_identifier': 'something-else', 'name': 'foo', 'author': 'someone', }) self.assertTrue(actual_check) self.assertIsNone(detail) def test_check_metadata_ok2(self): actual_check, detail = self._check_metadata({ 'url': 'something', 'external_identifier': 'something-else', 'title': 'bar', 'author': 'someone', }) self.assertTrue(actual_check) self.assertIsNone(detail) def test_check_metadata_ko(self): """Missing optional field should be caught """ actual_check, error_detail = self._check_metadata({ 'url': 'something', 'external_identifier': 'something-else', 'author': 'someone', }) expected_error = { 'metadata': [{ 'summary': 'Mandatory alternate fields are missing', 'fields': ['name or title'], }] } self.assertFalse(actual_check) self.assertEqual(error_detail, expected_error) def test_check_metadata_ko2(self): """Missing mandatory fields should be caught """ actual_check, error_detail = self._check_metadata({ 'url': 'something', 'external_identifier': 'something-else', 'title': 'foobar', }) expected_error = { 'metadata': [{ 'summary': 'Mandatory fields are missing', 'fields': ['author'], }] } self.assertFalse(actual_check) self.assertEqual(error_detail, expected_error) diff --git a/swh/deposit/tests/api/test_deposit_delete.py b/swh/deposit/tests/api/test_deposit_delete.py index 9bf963cc..806d6325 100644 --- a/swh/deposit/tests/api/test_deposit_delete.py +++ b/swh/deposit/tests/api/test_deposit_delete.py @@ -1,113 +1,113 @@ -# Copyright (C) 2017-2018 The Software Heritage developers +# Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from django.core.urlresolvers import reverse +from django.urls import reverse from rest_framework import status from rest_framework.test import APITestCase from swh.deposit.config import EDIT_SE_IRI, EM_IRI, ARCHIVE_KEY, METADATA_KEY from swh.deposit.config import DEPOSIT_STATUS_DEPOSITED from swh.deposit.models import Deposit, DepositRequest from ..common import BasicTestCase, WithAuthTestCase, CommonCreationRoutine class DepositDeleteTest(APITestCase, WithAuthTestCase, BasicTestCase, CommonCreationRoutine): def test_delete_archive_on_partial_deposit_works(self): """Removing partial deposit's archive should return a 204 response """ # given deposit_id = self.create_deposit_partial() deposit = Deposit.objects.get(pk=deposit_id) deposit_requests = DepositRequest.objects.filter(deposit=deposit) self.assertEqual(len(deposit_requests), 2) for dr in deposit_requests: - if dr.type.name == ARCHIVE_KEY: + if dr.type == ARCHIVE_KEY: continue - elif dr.type.name == METADATA_KEY: + elif dr.type == METADATA_KEY: continue else: self.fail('only archive and metadata type should exist ' 'in this test context') # when update_uri = reverse(EM_IRI, args=[self.collection.name, deposit_id]) response = self.client.delete(update_uri) # then self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) deposit = Deposit.objects.get(pk=deposit_id) requests = list(DepositRequest.objects.filter(deposit=deposit)) self.assertEqual(len(requests), 2) - self.assertEqual(requests[0].type.name, 'metadata') - self.assertEqual(requests[1].type.name, 'metadata') + self.assertEqual(requests[0].type, 'metadata') + self.assertEqual(requests[1].type, 'metadata') def test_delete_archive_on_undefined_deposit_fails(self): """Delete undefined deposit returns a 404 response """ # when update_uri = reverse(EM_IRI, args=[self.collection.name, 999]) response = self.client.delete(update_uri) # then self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) def test_delete_archive_on_non_partial_deposit_fails(self): """Delete !partial status deposit should return a 400 response""" deposit_id = self.create_deposit_ready() deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(deposit.status, DEPOSIT_STATUS_DEPOSITED) # when update_uri = reverse(EM_IRI, args=[self.collection.name, deposit_id]) response = self.client.delete(update_uri) # then self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) deposit = Deposit.objects.get(pk=deposit_id) self.assertIsNotNone(deposit) def test_delete_partial_deposit_works(self): """Delete deposit should return a 204 response """ # given deposit_id = self.create_simple_deposit_partial() deposit = Deposit.objects.get(pk=deposit_id) assert deposit.id == deposit_id # when url = reverse(EDIT_SE_IRI, args=[self.collection.name, deposit_id]) response = self.client.delete(url) # then self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) deposit_requests = list(DepositRequest.objects.filter(deposit=deposit)) self.assertEqual(deposit_requests, []) deposits = list(Deposit.objects.filter(pk=deposit_id)) self.assertEqual(deposits, []) def test_delete_on_edit_se_iri_cannot_delete_non_partial_deposit(self): """Delete !partial deposit should return a 400 response """ # given deposit_id = self.create_deposit_ready() deposit = Deposit.objects.get(pk=deposit_id) assert deposit.id == deposit_id # when url = reverse(EDIT_SE_IRI, args=[self.collection.name, deposit_id]) response = self.client.delete(url) # then self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) deposit = Deposit.objects.get(pk=deposit_id) self.assertIsNotNone(deposit) diff --git a/swh/deposit/tests/api/test_deposit_list.py b/swh/deposit/tests/api/test_deposit_list.py index 6fb84349..0b21fbdc 100644 --- a/swh/deposit/tests/api/test_deposit_list.py +++ b/swh/deposit/tests/api/test_deposit_list.py @@ -1,94 +1,94 @@ -# Copyright (C) 2017-2018 The Software Heritage developers +# Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from django.core.urlresolvers import reverse +from django.urls import reverse import pytest from rest_framework import status from rest_framework.test import APITestCase from swh.deposit.api.converters import convert_status_detail from ...config import DEPOSIT_STATUS_PARTIAL, PRIVATE_LIST_DEPOSITS from ..common import BasicTestCase, WithAuthTestCase, CommonCreationRoutine from ...models import Deposit @pytest.mark.fs class CheckDepositListTest(APITestCase, WithAuthTestCase, BasicTestCase, CommonCreationRoutine): """Check deposit list endpoints. """ def setUp(self): super().setUp() def test_deposit_list(self): """Deposit list api should return the deposits """ deposit_id = self.create_deposit_partial() # amend the deposit with a status_detail deposit = Deposit.objects.get(pk=deposit_id) status_detail = { 'url': { 'summary': 'At least one compatible url field. Failed', 'fields': ['testurl'], }, 'metadata': [ { 'summary': 'Mandatory fields missing', 'fields': ['9', 10, 1.212], }, ], 'archive': [ { 'summary': 'Invalid archive', 'fields': ['3'], }, { 'summary': 'Unsupported archive', 'fields': [2], } ], } deposit.status_detail = status_detail deposit.save() deposit_id2 = self.create_deposit_partial() # NOTE: does not work as documented # https://docs.djangoproject.com/en/1.11/ref/urlresolvers/#django.core.urlresolvers.reverse # noqa # url = reverse(PRIVATE_LIST_DEPOSITS, kwargs={'page_size': 1}) main_url = reverse(PRIVATE_LIST_DEPOSITS) url = '%s?page_size=1' % main_url response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) data = response.json() self.assertEqual(data['count'], 2) # 2 deposits expected_next = '%s?page=2&page_size=1' % main_url self.assertTrue(data['next'].endswith(expected_next)) self.assertIsNone(data['previous']) self.assertEqual(len(data['results']), 1) # page of size 1 deposit = data['results'][0] self.assertEqual(deposit['id'], deposit_id) self.assertEqual(deposit['status'], DEPOSIT_STATUS_PARTIAL) expected_status_detail = convert_status_detail(status_detail) self.assertEqual(deposit['status_detail'], expected_status_detail) # then 2nd page response2 = self.client.get(expected_next) self.assertEqual(response2.status_code, status.HTTP_200_OK) data2 = response2.json() self.assertEqual(data2['count'], 2) # still 2 deposits self.assertIsNone(data2['next']) expected_previous = '%s?page_size=1' % main_url self.assertTrue(data2['previous'].endswith(expected_previous)) self.assertEqual(len(data2['results']), 1) # page of size 1 deposit2 = data2['results'][0] self.assertEqual(deposit2['id'], deposit_id2) self.assertEqual(deposit2['status'], DEPOSIT_STATUS_PARTIAL) diff --git a/swh/deposit/tests/api/test_deposit_multipart.py b/swh/deposit/tests/api/test_deposit_multipart.py index 4bd37b6a..05a03832 100644 --- a/swh/deposit/tests/api/test_deposit_multipart.py +++ b/swh/deposit/tests/api/test_deposit_multipart.py @@ -1,402 +1,448 @@ -# Copyright (C) 2017-2018 The Software Heritage developers +# Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.core.files.uploadedfile import InMemoryUploadedFile -from django.core.urlresolvers import reverse +from django.urls import reverse from io import BytesIO from rest_framework import status from rest_framework.test import APITestCase from swh.deposit.config import COL_IRI from swh.deposit.config import DEPOSIT_STATUS_DEPOSITED from swh.deposit.models import Deposit, DepositRequest from swh.deposit.parsers import parse_xml from ..common import BasicTestCase, WithAuthTestCase from ..common import FileSystemCreationRoutine class DepositMultipartTestCase(APITestCase, WithAuthTestCase, BasicTestCase, FileSystemCreationRoutine): """Post multipart deposit scenario """ def setUp(self): super().setUp() self.data_atom_entry_ok = b""" Title urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 2005-10-07T17:17:08Z Contributor The abstract The abstract Access Rights Alternative Title Date Available Bibliographic Citation # noqa Contributor Description Has Part Has Version Identifier Is Part Of Publisher References Rights Holder Source Title Type """ self.data_atom_entry_update_in_place = """ urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa7b Title Type """ def test_post_deposit_multipart_without_slug_header_is_bad_request(self): # given url = reverse(COL_IRI, args=[self.collection.name]) data_atom_entry = self.data_atom_entry_ok archive_content = b'some content representing archive' archive = InMemoryUploadedFile( BytesIO(archive_content), field_name='archive0', name='archive0', content_type='application/zip', size=len(archive_content), charset=None) atom_entry = InMemoryUploadedFile( BytesIO(data_atom_entry), field_name='atom0', name='atom0', content_type='application/atom+xml; charset="utf-8"', size=len(data_atom_entry), charset='utf-8') # when response = self.client.post( url, format='multipart', data={ 'archive': archive, 'atom_entry': atom_entry, }, # + headers HTTP_IN_PROGRESS='false') self.assertIn(b'Missing SLUG header', response.content) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) def test_post_deposit_multipart_zip(self): """one multipart deposit (zip+xml) should be accepted """ # given url = reverse(COL_IRI, args=[self.collection.name]) # from django.core.files import uploadedfile data_atom_entry = self.data_atom_entry_ok archive = InMemoryUploadedFile( BytesIO(self.archive['data']), field_name=self.archive['name'], name=self.archive['name'], content_type='application/zip', size=self.archive['length'], charset=None) atom_entry = InMemoryUploadedFile( BytesIO(data_atom_entry), field_name='atom0', name='atom0', content_type='application/atom+xml; charset="utf-8"', size=len(data_atom_entry), charset='utf-8') external_id = 'external-id' # when response = self.client.post( url, format='multipart', data={ 'archive': archive, 'atom_entry': atom_entry, }, # + headers HTTP_IN_PROGRESS='false', HTTP_SLUG=external_id) # then self.assertEqual(response.status_code, status.HTTP_201_CREATED) response_content = parse_xml(BytesIO(response.content)) deposit_id = response_content['deposit_id'] deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(deposit.status, DEPOSIT_STATUS_DEPOSITED) self.assertEqual(deposit.external_id, external_id) self.assertEqual(deposit.collection, self.collection) self.assertEqual(deposit.client, self.user) self.assertIsNone(deposit.swh_id) deposit_requests = DepositRequest.objects.filter(deposit=deposit) self.assertEqual(len(deposit_requests), 2) for deposit_request in deposit_requests: self.assertEqual(deposit_request.deposit, deposit) - if deposit_request.type.name == 'archive': + if deposit_request.type == 'archive': self.assertRegex(deposit_request.archive.name, self.archive['name']) self.assertIsNone(deposit_request.metadata) self.assertIsNone(deposit_request.raw_metadata) else: self.assertEqual( deposit_request.metadata['id'], 'urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a') self.assertEqual(deposit_request.raw_metadata, data_atom_entry.decode('utf-8')) def test_post_deposit_multipart_tar(self): """one multipart deposit (tar+xml) should be accepted """ # given url = reverse(COL_IRI, args=[self.collection.name]) # from django.core.files import uploadedfile data_atom_entry = self.data_atom_entry_ok archive = InMemoryUploadedFile( BytesIO(self.archive['data']), field_name=self.archive['name'], name=self.archive['name'], content_type='application/x-tar', size=self.archive['length'], charset=None) atom_entry = InMemoryUploadedFile( BytesIO(data_atom_entry), field_name='atom0', name='atom0', content_type='application/atom+xml; charset="utf-8"', size=len(data_atom_entry), charset='utf-8') external_id = 'external-id' # when response = self.client.post( url, format='multipart', data={ 'archive': archive, 'atom_entry': atom_entry, }, # + headers HTTP_IN_PROGRESS='false', HTTP_SLUG=external_id) # then self.assertEqual(response.status_code, status.HTTP_201_CREATED) response_content = parse_xml(BytesIO(response.content)) deposit_id = response_content['deposit_id'] deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(deposit.status, DEPOSIT_STATUS_DEPOSITED) self.assertEqual(deposit.external_id, external_id) self.assertEqual(deposit.collection, self.collection) self.assertEqual(deposit.client, self.user) self.assertIsNone(deposit.swh_id) deposit_requests = DepositRequest.objects.filter(deposit=deposit) self.assertEqual(len(deposit_requests), 2) for deposit_request in deposit_requests: self.assertEqual(deposit_request.deposit, deposit) - if deposit_request.type.name == 'archive': + if deposit_request.type == 'archive': self.assertRegex(deposit_request.archive.name, self.archive['name']) self.assertIsNone(deposit_request.metadata) self.assertIsNone(deposit_request.raw_metadata) else: self.assertEqual( deposit_request.metadata['id'], 'urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a') self.assertEqual(deposit_request.raw_metadata, data_atom_entry.decode('utf-8')) def test_post_deposit_multipart_put_to_replace_metadata(self): """One multipart deposit followed by a metadata update should be accepted """ # given url = reverse(COL_IRI, args=[self.collection.name]) data_atom_entry = self.data_atom_entry_ok archive = InMemoryUploadedFile( BytesIO(self.archive['data']), field_name=self.archive['name'], name=self.archive['name'], content_type='application/zip', size=self.archive['length'], charset=None) atom_entry = InMemoryUploadedFile( BytesIO(data_atom_entry), field_name='atom0', name='atom0', content_type='application/atom+xml; charset="utf-8"', size=len(data_atom_entry), charset='utf-8') external_id = 'external-id' # when response = self.client.post( url, format='multipart', data={ 'archive': archive, 'atom_entry': atom_entry, }, # + headers HTTP_IN_PROGRESS='true', HTTP_SLUG=external_id) # then self.assertEqual(response.status_code, status.HTTP_201_CREATED) response_content = parse_xml(BytesIO(response.content)) deposit_id = response_content['deposit_id'] deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(deposit.status, 'partial') self.assertEqual(deposit.external_id, external_id) self.assertEqual(deposit.collection, self.collection) self.assertEqual(deposit.client, self.user) self.assertIsNone(deposit.swh_id) deposit_requests = DepositRequest.objects.filter(deposit=deposit) self.assertEqual(len(deposit_requests), 2) for deposit_request in deposit_requests: self.assertEqual(deposit_request.deposit, deposit) - if deposit_request.type.name == 'archive': + if deposit_request.type == 'archive': self.assertRegex(deposit_request.archive.name, self.archive['name']) else: self.assertEqual( deposit_request.metadata['id'], 'urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a') self.assertEqual(deposit_request.raw_metadata, data_atom_entry.decode('utf-8')) replace_metadata_uri = response._headers['location'][1] response = self.client.put( replace_metadata_uri, content_type='application/atom+xml;type=entry', data=self.data_atom_entry_update_in_place, HTTP_IN_PROGRESS='false') self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) # deposit_id did not change deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(deposit.status, DEPOSIT_STATUS_DEPOSITED) self.assertEqual(deposit.external_id, external_id) self.assertEqual(deposit.collection, self.collection) self.assertEqual(deposit.client, self.user) self.assertIsNone(deposit.swh_id) deposit_requests = DepositRequest.objects.filter(deposit=deposit) self.assertEqual(len(deposit_requests), 2) for deposit_request in deposit_requests: self.assertEqual(deposit_request.deposit, deposit) - if deposit_request.type.name == 'archive': + if deposit_request.type == 'archive': self.assertRegex(deposit_request.archive.name, self.archive['name']) else: self.assertEqual( deposit_request.metadata['id'], 'urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa7b') self.assertEqual( deposit_request.raw_metadata, self.data_atom_entry_update_in_place) # FAILURE scenarios def test_post_deposit_multipart_only_archive_and_atom_entry(self): """Multipart deposit only accepts one archive and one atom+xml""" # given url = reverse(COL_IRI, args=[self.collection.name]) archive_content = b'some content representing archive' archive = InMemoryUploadedFile(BytesIO(archive_content), field_name='archive0', name='archive0', content_type='application/x-tar', size=len(archive_content), charset=None) other_archive_content = b"some-other-content" other_archive = InMemoryUploadedFile(BytesIO(other_archive_content), field_name='atom0', name='atom0', content_type='application/x-tar', size=len(other_archive_content), charset='utf-8') # when response = self.client.post( url, format='multipart', data={ 'archive': archive, 'atom_entry': other_archive, }, # + headers HTTP_IN_PROGRESS='false', HTTP_SLUG='external-id') # then self.assertEqual(response.status_code, status.HTTP_415_UNSUPPORTED_MEDIA_TYPE) self.assertTrue( 'Only 1 application/zip (or application/x-tar) archive' in response.content.decode('utf-8')) # when archive.seek(0) response = self.client.post( url, format='multipart', data={ 'archive': archive, }, # + headers HTTP_IN_PROGRESS='false', HTTP_SLUG='external-id') # then self.assertEqual(response.status_code, status.HTTP_415_UNSUPPORTED_MEDIA_TYPE) self.assertTrue( 'You must provide both 1 application/zip (or ' 'application/x-tar) and 1 atom+xml entry for ' 'multipart deposit' in response.content.decode('utf-8') ) + + def test_post_deposit_multipart_400_when_badly_formatted_xml(self): + # given + url = reverse(COL_IRI, args=[self.collection.name]) + + data_atom_entry_ko = b""" + + + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + +""" + + archive_content = b'some content representing archive' + archive = InMemoryUploadedFile( + BytesIO(archive_content), + field_name='archive0', + name='archive0', + content_type='application/zip', + size=len(archive_content), + charset=None) + + atom_entry = InMemoryUploadedFile( + BytesIO(data_atom_entry_ko), + field_name='atom0', + name='atom0', + content_type='application/atom+xml; charset="utf-8"', + size=len(data_atom_entry_ko), + charset='utf-8') + + # when + response = self.client.post( + url, + format='multipart', + data={ + 'archive': archive, + 'atom_entry': atom_entry, + }, + # + headers + HTTP_IN_PROGRESS='false', + HTTP_SLUG='external-id', + ) + + self.assertIn(b'Malformed xml metadata', response.content) + self.assertEqual(response.status_code, + status.HTTP_400_BAD_REQUEST) diff --git a/swh/deposit/tests/api/test_deposit_read_archive.py b/swh/deposit/tests/api/test_deposit_read_archive.py index 4c82ad2b..07d16c3f 100644 --- a/swh/deposit/tests/api/test_deposit_read_archive.py +++ b/swh/deposit/tests/api/test_deposit_read_archive.py @@ -1,125 +1,125 @@ -# Copyright (C) 2017 The Software Heritage developers +# Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import hashlib import os -from django.core.urlresolvers import reverse +from django.urls import reverse import pytest from rest_framework import status from rest_framework.test import APITestCase from swh.core import tarball from swh.deposit.config import PRIVATE_GET_RAW_CONTENT from swh.deposit.tests import TEST_CONFIG from ..common import BasicTestCase, WithAuthTestCase, CommonCreationRoutine from ..common import FileSystemCreationRoutine, create_arborescence_archive @pytest.mark.fs class DepositReadArchivesTest(APITestCase, WithAuthTestCase, BasicTestCase, CommonCreationRoutine, FileSystemCreationRoutine): def setUp(self): super().setUp() self.archive2 = create_arborescence_archive( self.root_path, 'archive2', 'file2', b'some other content in file') self.workdir = os.path.join(self.root_path, 'workdir') def test_access_to_existing_deposit_with_one_archive(self): """Access to deposit should stream a 200 response with its raw content """ deposit_id = self.create_simple_binary_deposit() url = reverse(PRIVATE_GET_RAW_CONTENT, args=[self.collection.name, deposit_id]) r = self.client.get(url) self.assertEqual(r.status_code, status.HTTP_200_OK) self.assertEqual(r._headers['content-type'][1], 'application/octet-stream') # read the stream data = b''.join(r.streaming_content) actual_sha1 = hashlib.sha1(data).hexdigest() self.assertEqual(actual_sha1, self.archive['sha1sum']) # this does not touch the extraction dir so this should stay empty self.assertEqual(os.listdir(TEST_CONFIG['extraction_dir']), []) def _check_tarball_consistency(self, actual_sha1): tarball.uncompress(self.archive['path'], self.workdir) self.assertEqual(os.listdir(self.workdir), ['file1']) tarball.uncompress(self.archive2['path'], self.workdir) lst = set(os.listdir(self.workdir)) self.assertEqual(lst, {'file1', 'file2'}) new_path = self.workdir + '.zip' tarball.compress(new_path, 'zip', self.workdir) with open(new_path, 'rb') as f: h = hashlib.sha1(f.read()).hexdigest() self.assertEqual(actual_sha1, h) self.assertNotEqual(actual_sha1, self.archive['sha1sum']) self.assertNotEqual(actual_sha1, self.archive2['sha1sum']) def test_access_to_existing_deposit_with_multiple_archives(self): """Access to deposit should stream a 200 response with its raw contents """ deposit_id = self.create_complex_binary_deposit() url = reverse(PRIVATE_GET_RAW_CONTENT, args=[self.collection.name, deposit_id]) r = self.client.get(url) self.assertEqual(r.status_code, status.HTTP_200_OK) self.assertEqual(r._headers['content-type'][1], 'application/octet-stream') # read the stream data = b''.join(r.streaming_content) actual_sha1 = hashlib.sha1(data).hexdigest() self._check_tarball_consistency(actual_sha1) # this touches the extraction directory but should clean up # after itself self.assertEqual(os.listdir(TEST_CONFIG['extraction_dir']), []) class DepositReadArchivesFailureTest(APITestCase, WithAuthTestCase, BasicTestCase, CommonCreationRoutine): def test_access_to_nonexisting_deposit_returns_404_response(self): """Read unknown collection should return a 404 response """ unknown_id = '999' url = reverse(PRIVATE_GET_RAW_CONTENT, args=[self.collection.name, unknown_id]) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) self.assertIn('Deposit with id %s does not exist' % unknown_id, response.content.decode('utf-8')) def test_access_to_nonexisting_collection_returns_404_response(self): """Read unknown deposit should return a 404 response """ collection_name = 'non-existing' deposit_id = self.create_deposit_partial() url = reverse(PRIVATE_GET_RAW_CONTENT, args=[collection_name, deposit_id]) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) self.assertIn('Unknown collection name %s' % collection_name, response.content.decode('utf-8')) diff --git a/swh/deposit/tests/api/test_deposit_read_metadata.py b/swh/deposit/tests/api/test_deposit_read_metadata.py index 9f40e701..0e7e38aa 100644 --- a/swh/deposit/tests/api/test_deposit_read_metadata.py +++ b/swh/deposit/tests/api/test_deposit_read_metadata.py @@ -1,205 +1,661 @@ -# Copyright (C) 2017-2018 The Software Heritage developers +# Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from django.core.urlresolvers import reverse +from django.urls import reverse from rest_framework import status from rest_framework.test import APITestCase from swh.deposit.models import Deposit from swh.deposit.config import PRIVATE_GET_DEPOSIT_METADATA from swh.deposit.config import DEPOSIT_STATUS_LOAD_SUCCESS from swh.deposit.config import DEPOSIT_STATUS_PARTIAL from ...config import SWH_PERSON from ..common import BasicTestCase, WithAuthTestCase, CommonCreationRoutine class DepositReadMetadataTest(APITestCase, WithAuthTestCase, BasicTestCase, CommonCreationRoutine): """Deposit access to read metadata information on deposit. """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + self.template_metadata = """ + + Composing a Web of Audio Applications + hal + hal-01243065 + hal-01243065 + https://hal-test.archives-ouvertes.fr/hal-01243065 + test + DSP programming + this is the description + 1 + phpstorm + stable + php + python + C + + GNU General Public License v3.0 only + + + CeCILL Free Software License Agreement v1.1 + + + HAL + hal@ccsd.cnrs.fr + + + Morane Gruenpeter + +%s +""" + def test_read_metadata(self): """Private metadata read api to existing deposit should return metadata """ deposit_id = self.create_deposit_partial() url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=[self.collection.name, deposit_id]) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(response._headers['content-type'][1], 'application/json') data = response.json() expected_meta = { 'origin': { 'url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id', 'type': 'deposit' }, 'origin_metadata': { 'metadata': { '@xmlns': ['http://www.w3.org/2005/Atom'], 'author': ['some awesome author', 'another one', 'no one'], + 'codemeta:dateCreated': '2017-10-07T15:17:08Z', 'external_identifier': 'some-external-id', 'url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id' }, 'provider': { 'provider_name': 'hal', 'provider_type': 'deposit_client', 'provider_url': 'https://hal-test.archives-ouvertes.fr/', 'metadata': {} }, 'tool': { 'name': 'swh-deposit', 'version': '0.0.1', 'configuration': { 'sword_version': '2' } } }, 'revision': { 'synthetic': True, - 'committer_date': None, + 'committer_date': { + 'timestamp': { + 'seconds': 1507389428, + 'microseconds': 0 + }, + 'offset': 0, + 'negative_utc': False + }, 'message': 'hal: Deposit %s in collection hal' % deposit_id, 'author': SWH_PERSON, 'committer': SWH_PERSON, - 'date': None, + 'date': { + 'timestamp': { + 'seconds': 1507389428, + 'microseconds': 0 + }, + 'offset': 0, + 'negative_utc': False + }, 'metadata': { '@xmlns': ['http://www.w3.org/2005/Atom'], 'author': ['some awesome author', 'another one', 'no one'], 'external_identifier': 'some-external-id', + 'codemeta:dateCreated': '2017-10-07T15:17:08Z', 'url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id' }, 'type': 'tar' }, 'branch_name': 'master', } self.assertEqual(data, expected_meta) def test_read_metadata_revision_with_parent(self): """Private read metadata to a deposit (with parent) returns metadata """ swh_id = 'da78a9d4cf1d5d29873693fd496142e3a18c20fa' swh_persistent_id = 'swh:1:rev:%s' % swh_id deposit_id1 = self.create_deposit_with_status( status=DEPOSIT_STATUS_LOAD_SUCCESS, external_id='some-external-id', swh_id=swh_persistent_id) deposit_parent = Deposit.objects.get(pk=deposit_id1) self.assertEqual(deposit_parent.swh_id, swh_persistent_id) self.assertEqual(deposit_parent.external_id, 'some-external-id') self.assertEqual(deposit_parent.status, DEPOSIT_STATUS_LOAD_SUCCESS) deposit_id = self.create_deposit_partial( external_id='some-external-id') deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(deposit.external_id, 'some-external-id') self.assertEqual(deposit.swh_id, None) self.assertEqual(deposit.parent, deposit_parent) self.assertEqual(deposit.status, DEPOSIT_STATUS_PARTIAL) url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=[self.collection.name, deposit_id]) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(response._headers['content-type'][1], 'application/json') data = response.json() expected_meta = { 'origin': { 'url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id', 'type': 'deposit' }, 'origin_metadata': { 'metadata': { '@xmlns': ['http://www.w3.org/2005/Atom'], 'author': ['some awesome author', 'another one', 'no one'], + 'codemeta:dateCreated': '2017-10-07T15:17:08Z', 'external_identifier': 'some-external-id', 'url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id' }, 'provider': { 'provider_name': 'hal', 'provider_type': 'deposit_client', 'provider_url': 'https://hal-test.archives-ouvertes.fr/', 'metadata': {} }, 'tool': { 'name': 'swh-deposit', 'version': '0.0.1', 'configuration': { 'sword_version': '2' } } }, 'revision': { 'synthetic': True, - 'date': None, - 'committer_date': None, + 'date': { + 'timestamp': { + 'seconds': 1507389428, + 'microseconds': 0 + }, + 'offset': 0, + 'negative_utc': False + }, + 'committer_date': { + 'timestamp': { + 'seconds': 1507389428, + 'microseconds': 0 + }, + 'offset': 0, + 'negative_utc': False + }, 'author': SWH_PERSON, 'committer': SWH_PERSON, 'type': 'tar', 'message': 'hal: Deposit %s in collection hal' % deposit_id, 'metadata': { '@xmlns': ['http://www.w3.org/2005/Atom'], 'author': ['some awesome author', 'another one', 'no one'], + 'codemeta:dateCreated': '2017-10-07T15:17:08Z', 'external_identifier': 'some-external-id', 'url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id' }, 'parents': [swh_id] }, 'branch_name': 'master', } self.assertEqual(data, expected_meta) + def test_read_metadata_3(self): + """date(Created|Published) provided, uses author/committer date + + """ + # add metadata to the deposit with datePublished and dateCreated + codemeta_entry_data = self.template_metadata % """ + 2015-04-06T17:08:47+02:00 + 2017-05-03T16:08:47+02:00 +""" + + deposit_id = self.create_deposit_partial_with_data_in_args( + codemeta_entry_data) + + url = reverse(PRIVATE_GET_DEPOSIT_METADATA, + args=[self.collection.name, deposit_id]) + + response = self.client.get(url) + + self.assertEqual(response.status_code, + status.HTTP_200_OK) + self.assertEqual(response._headers['content-type'][1], + 'application/json') + data = response.json() + + expected_origin = { + 'type': 'deposit', + 'url': 'https://hal-test.archives-ouvertes.fr/hal-01243065' + } + expected_metadata = { + '@xmlns': 'http://www.w3.org/2005/Atom', + '@xmlns:codemeta': + 'https://doi.org/10.5063/SCHEMA/CODEMETA-2.0', + 'author': { + 'email': 'hal@ccsd.cnrs.fr', + 'name': 'HAL' + }, + 'client': 'hal', + 'codemeta:applicationCategory': 'test', + 'codemeta:author': { + 'codemeta:name': 'Morane Gruenpeter' + }, + 'codemeta:dateCreated': '2015-04-06T17:08:47+02:00', + 'codemeta:datePublished': '2017-05-03T16:08:47+02:00', + 'codemeta:description': 'this is the description', + 'codemeta:developmentStatus': 'stable', + 'codemeta:keywords': 'DSP programming', + 'codemeta:license': [ + { + 'codemeta:name': 'GNU General Public License v3.0 only' + }, + { + 'codemeta:name': + 'CeCILL Free Software License Agreement v1.1' + } + ], + 'codemeta:programmingLanguage': [ + 'php', 'python', 'C' + ], + 'codemeta:runtimePlatform': 'phpstorm', + 'codemeta:url': 'https://hal-test.archives-ouvertes.fr/hal-01243065', # noqa + 'codemeta:version': '1', + 'external_identifier': 'hal-01243065', + 'id': 'hal-01243065', + 'title': 'Composing a Web of Audio Applications' + } + + expected_origin_metadata = { + 'metadata': expected_metadata, + 'provider': { + 'metadata': {}, + 'provider_name': 'hal', + 'provider_type': 'deposit_client', + 'provider_url': 'https://hal-test.archives-ouvertes.fr/' + }, + 'tool': { + 'configuration': { + 'sword_version': '2' + }, + 'name': 'swh-deposit', + 'version': '0.0.1' + } + } + + expected_revision = { + 'author': { + 'email': 'robot@softwareheritage.org', + 'fullname': 'Software Heritage', + 'name': 'Software Heritage' + }, + 'committer': { + 'email': 'robot@softwareheritage.org', + 'fullname': 'Software Heritage', + 'name': 'Software Heritage' + }, + 'committer_date': { + 'negative_utc': False, + 'offset': 120, + 'timestamp': { + 'microseconds': 0, + 'seconds': 1493820527 + } + }, + 'date': { + 'negative_utc': False, + 'offset': 120, + 'timestamp': { + 'microseconds': 0, + 'seconds': 1428332927 + } + }, + 'message': 'hal: Deposit %s in collection hal' % deposit_id, + 'metadata': expected_metadata, + 'synthetic': True, + 'type': 'tar' + } + + expected_meta = { + 'branch_name': 'master', + 'origin': expected_origin, + 'origin_metadata': expected_origin_metadata, + 'revision': expected_revision, + } + + self.assertEqual(data, expected_meta) + + def test_read_metadata_4(self): + """dateCreated/datePublished not provided, revision uses complete_date + + """ + codemeta_entry_data = self.template_metadata % '' + + deposit_id = self.create_deposit_partial_with_data_in_args( + codemeta_entry_data) + + # will use the deposit completed date as fallback date + deposit = Deposit.objects.get(pk=deposit_id) + deposit.complete_date = '2016-04-06' + deposit.save() + + url = reverse(PRIVATE_GET_DEPOSIT_METADATA, + args=[self.collection.name, deposit_id]) + + response = self.client.get(url) + + self.assertEqual(response.status_code, + status.HTTP_200_OK) + self.assertEqual(response._headers['content-type'][1], + 'application/json') + data = response.json() + + expected_origin = { + 'type': 'deposit', + 'url': 'https://hal-test.archives-ouvertes.fr/hal-01243065' + } + expected_metadata = { + '@xmlns': 'http://www.w3.org/2005/Atom', + '@xmlns:codemeta': + 'https://doi.org/10.5063/SCHEMA/CODEMETA-2.0', + 'author': { + 'email': 'hal@ccsd.cnrs.fr', + 'name': 'HAL' + }, + 'client': 'hal', + 'codemeta:applicationCategory': 'test', + 'codemeta:author': { + 'codemeta:name': 'Morane Gruenpeter' + }, + 'codemeta:description': 'this is the description', + 'codemeta:developmentStatus': 'stable', + 'codemeta:keywords': 'DSP programming', + 'codemeta:license': [ + { + 'codemeta:name': 'GNU General Public License v3.0 only' + }, + { + 'codemeta:name': + 'CeCILL Free Software License Agreement v1.1' + } + ], + 'codemeta:programmingLanguage': [ + 'php', 'python', 'C' + ], + 'codemeta:runtimePlatform': 'phpstorm', + 'codemeta:url': 'https://hal-test.archives-ouvertes.fr/hal-01243065', # noqa + 'codemeta:version': '1', + 'external_identifier': 'hal-01243065', + 'id': 'hal-01243065', + 'title': 'Composing a Web of Audio Applications' + } + + expected_origin_metadata = { + 'metadata': expected_metadata, + 'provider': { + 'metadata': {}, + 'provider_name': 'hal', + 'provider_type': 'deposit_client', + 'provider_url': 'https://hal-test.archives-ouvertes.fr/' + }, + 'tool': { + 'configuration': { + 'sword_version': '2' + }, + 'name': 'swh-deposit', + 'version': '0.0.1' + } + } + + expected_revision = { + 'author': { + 'email': 'robot@softwareheritage.org', + 'fullname': 'Software Heritage', + 'name': 'Software Heritage' + }, + 'committer': { + 'email': 'robot@softwareheritage.org', + 'fullname': 'Software Heritage', + 'name': 'Software Heritage' + }, + 'committer_date': { + 'negative_utc': False, + 'offset': 0, + 'timestamp': { + 'microseconds': 0, + 'seconds': 1459900800 + } + }, + 'date': { + 'negative_utc': False, + 'offset': 0, + 'timestamp': { + 'microseconds': 0, + 'seconds': 1459900800 + } + }, + 'message': 'hal: Deposit %s in collection hal' % deposit_id, + 'metadata': expected_metadata, + 'synthetic': True, + 'type': 'tar' + } + + expected_meta = { + 'branch_name': 'master', + 'origin': expected_origin, + 'origin_metadata': expected_origin_metadata, + 'revision': expected_revision, + } + + self.assertEqual(data, expected_meta) + + def test_read_metadata_5(self): + """dateCreated/datePublished provided, revision uses author/committer + date + + If multiple dateCreated provided, the first occurrence (of + dateCreated) is selected. If multiple datePublished provided, + the first occurrence (of datePublished) is selected. + + """ + # add metadata to the deposit with multiple datePublished/dateCreated + codemeta_entry_data = self.template_metadata % """ + 2015-04-06T17:08:47+02:00 + 2017-05-03T16:08:47+02:00 + 2016-04-06T17:08:47+02:00 + 2018-05-03T16:08:47+02:00 +""" + + deposit_id = self.create_deposit_partial_with_data_in_args( + codemeta_entry_data) + + url = reverse(PRIVATE_GET_DEPOSIT_METADATA, + args=[self.collection.name, deposit_id]) + + response = self.client.get(url) + + self.assertEqual(response.status_code, + status.HTTP_200_OK) + self.assertEqual(response._headers['content-type'][1], + 'application/json') + data = response.json() + + expected_origin = { + 'type': 'deposit', + 'url': 'https://hal-test.archives-ouvertes.fr/hal-01243065' + } + expected_metadata = { + '@xmlns': 'http://www.w3.org/2005/Atom', + '@xmlns:codemeta': + 'https://doi.org/10.5063/SCHEMA/CODEMETA-2.0', + 'author': { + 'email': 'hal@ccsd.cnrs.fr', + 'name': 'HAL' + }, + 'client': 'hal', + 'codemeta:applicationCategory': 'test', + 'codemeta:author': { + 'codemeta:name': 'Morane Gruenpeter' + }, + 'codemeta:dateCreated': [ + '2015-04-06T17:08:47+02:00', + '2016-04-06T17:08:47+02:00', + ], + 'codemeta:datePublished': [ + '2017-05-03T16:08:47+02:00', + '2018-05-03T16:08:47+02:00', + ], + 'codemeta:description': 'this is the description', + 'codemeta:developmentStatus': 'stable', + 'codemeta:keywords': 'DSP programming', + 'codemeta:license': [ + { + 'codemeta:name': 'GNU General Public License v3.0 only' + }, + { + 'codemeta:name': + 'CeCILL Free Software License Agreement v1.1' + } + ], + 'codemeta:programmingLanguage': [ + 'php', 'python', 'C' + ], + 'codemeta:runtimePlatform': 'phpstorm', + 'codemeta:url': 'https://hal-test.archives-ouvertes.fr/hal-01243065', # noqa + 'codemeta:version': '1', + 'external_identifier': 'hal-01243065', + 'id': 'hal-01243065', + 'title': 'Composing a Web of Audio Applications' + } + + expected_origin_metadata = { + 'metadata': expected_metadata, + 'provider': { + 'metadata': {}, + 'provider_name': 'hal', + 'provider_type': 'deposit_client', + 'provider_url': 'https://hal-test.archives-ouvertes.fr/' + }, + 'tool': { + 'configuration': { + 'sword_version': '2' + }, + 'name': 'swh-deposit', + 'version': '0.0.1' + } + } + + expected_revision = { + 'author': { + 'email': 'robot@softwareheritage.org', + 'fullname': 'Software Heritage', + 'name': 'Software Heritage' + }, + 'committer': { + 'email': 'robot@softwareheritage.org', + 'fullname': 'Software Heritage', + 'name': 'Software Heritage' + }, + 'committer_date': { + 'negative_utc': False, + 'offset': 120, + 'timestamp': { + 'microseconds': 0, + 'seconds': 1493820527 + } + }, + 'date': { + 'negative_utc': False, + 'offset': 120, + 'timestamp': { + 'microseconds': 0, + 'seconds': 1428332927 + } + }, + 'message': 'hal: Deposit %s in collection hal' % deposit_id, + 'metadata': expected_metadata, + 'synthetic': True, + 'type': 'tar' + } + + expected_meta = { + 'branch_name': 'master', + 'origin': expected_origin, + 'origin_metadata': expected_origin_metadata, + 'revision': expected_revision, + } + + self.assertEqual(data, expected_meta) + def test_access_to_nonexisting_deposit_returns_404_response(self): """Read unknown collection should return a 404 response """ unknown_id = '999' url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=[self.collection.name, unknown_id]) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) self.assertIn('Deposit with id %s does not exist' % unknown_id, response.content.decode('utf-8')) def test_access_to_nonexisting_collection_returns_404_response(self): """Read unknown deposit should return a 404 response """ collection_name = 'non-existing' deposit_id = self.create_deposit_partial() url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=[collection_name, deposit_id]) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) self.assertIn('Unknown collection name %s' % collection_name, response.content.decode('utf-8'),) diff --git a/swh/deposit/tests/api/test_deposit_status.py b/swh/deposit/tests/api/test_deposit_status.py index 256f2cf6..0d1284cd 100644 --- a/swh/deposit/tests/api/test_deposit_status.py +++ b/swh/deposit/tests/api/test_deposit_status.py @@ -1,144 +1,145 @@ -# Copyright (C) 2017-2018 The Software Heritage developers +# Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from django.core.urlresolvers import reverse +from django.urls import reverse from io import BytesIO from rest_framework import status from rest_framework.test import APITestCase from swh.deposit.config import (COL_IRI, STATE_IRI, DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_REJECTED) from swh.deposit.models import Deposit, DEPOSIT_STATUS_DETAIL from swh.deposit.models import DEPOSIT_STATUS_LOAD_SUCCESS from swh.deposit.parsers import parse_xml from ..common import BasicTestCase, WithAuthTestCase, FileSystemCreationRoutine from ..common import CommonCreationRoutine class DepositStatusTestCase(APITestCase, WithAuthTestCase, BasicTestCase, FileSystemCreationRoutine, CommonCreationRoutine): """Status on deposit """ def test_post_deposit_with_status_check(self): """Binary upload should be accepted """ # given url = reverse(COL_IRI, args=[self.collection.name]) external_id = 'some-external-id-1' # when response = self.client.post( url, content_type='application/zip', # as zip data=self.archive['data'], # + headers CONTENT_LENGTH=self.archive['length'], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=self.archive['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS='false', HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') # then self.assertEqual(response.status_code, status.HTTP_201_CREATED) deposit = Deposit.objects.get(external_id=external_id) status_url = reverse(STATE_IRI, args=[self.collection.name, deposit.id]) # check status status_response = self.client.get(status_url) self.assertEqual(status_response.status_code, status.HTTP_200_OK) r = parse_xml(BytesIO(status_response.content)) self.assertEqual(int(r['deposit_id']), deposit.id) self.assertEqual(r['deposit_status'], DEPOSIT_STATUS_DEPOSITED) self.assertEqual(r['deposit_status_detail'], DEPOSIT_STATUS_DETAIL[DEPOSIT_STATUS_DEPOSITED]) + self.assertEqual(r['deposit_external_id'], external_id) def test_status_with_swh_information(self): _status = DEPOSIT_STATUS_LOAD_SUCCESS _context = 'https://hal.archives-ouvertes.fr/hal-01727745' _swh_id = 'swh:1:dir:42a13fc721c8716ff695d0d62fc851d641f3a12b' _swh_id_context = '%s;%s' % (_swh_id, _context) _swh_anchor_id = 'swh:rev:1:548b3c0a2bb43e1fca191e24b5803ff6b3bc7c10' _swh_anchor_id_context = '%s;%s' % (_swh_anchor_id, _context) # given deposit_id = self.create_deposit_with_status( status=_status, swh_id=_swh_id, swh_id_context=_swh_id_context, swh_anchor_id=_swh_anchor_id, swh_anchor_id_context=_swh_anchor_id_context ) url = reverse(STATE_IRI, args=[self.collection.name, deposit_id]) # when status_response = self.client.get(url) # then self.assertEqual(status_response.status_code, status.HTTP_200_OK) r = parse_xml(BytesIO(status_response.content)) self.assertEqual(int(r['deposit_id']), deposit_id) self.assertEqual(r['deposit_status'], _status) self.assertEqual(r['deposit_status_detail'], DEPOSIT_STATUS_DETAIL[DEPOSIT_STATUS_LOAD_SUCCESS]) self.assertEqual(r['deposit_swh_id'], _swh_id) self.assertEqual(r['deposit_swh_id_context'], _swh_id_context) self.assertEqual(r['deposit_swh_anchor_id'], _swh_anchor_id) self.assertEqual(r['deposit_swh_anchor_id_context'], _swh_anchor_id_context) def test_status_on_unknown_deposit(self): """Asking for the status of unknown deposit returns 404 response""" status_url = reverse(STATE_IRI, args=[self.collection.name, 999]) status_response = self.client.get(status_url) self.assertEqual(status_response.status_code, status.HTTP_404_NOT_FOUND) def test_status_with_http_accept_header_should_not_break(self): """Asking deposit status with Accept header should return 200 """ deposit_id = self.create_deposit_partial() status_url = reverse(STATE_IRI, args=[ self.collection.name, deposit_id]) response = self.client.get( status_url, HTTP_ACCEPT='text/html,application/xml;q=9,*/*,q=8') self.assertEqual(response.status_code, status.HTTP_200_OK) def test_status_on_deposit_rejected(self): _status = DEPOSIT_STATUS_REJECTED _swh_id = '548b3c0a2bb43e1fca191e24b5803ff6b3bc7c10' _status_detail = {'url': {'summary': 'Wrong url'}} # given deposit_id = self.create_deposit_with_status( status=_status, swh_id=_swh_id, status_detail=_status_detail) url = reverse(STATE_IRI, args=[self.collection.name, deposit_id]) # when status_response = self.client.get(url) # then self.assertEqual(status_response.status_code, status.HTTP_200_OK) r = parse_xml(BytesIO(status_response.content)) self.assertEqual(int(r['deposit_id']), deposit_id) self.assertEqual(r['deposit_status'], _status) self.assertEqual(r['deposit_status_detail'], '- Wrong url') self.assertEqual(r['deposit_swh_id'], _swh_id) diff --git a/swh/deposit/tests/api/test_deposit_update.py b/swh/deposit/tests/api/test_deposit_update.py index 45935564..227c1a2d 100644 --- a/swh/deposit/tests/api/test_deposit_update.py +++ b/swh/deposit/tests/api/test_deposit_update.py @@ -1,333 +1,333 @@ -# Copyright (C) 2017-2018 The Software Heritage developers +# Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from django.core.urlresolvers import reverse +from django.urls import reverse from rest_framework import status from rest_framework.test import APITestCase from swh.deposit.models import Deposit, DepositRequest from swh.deposit.config import EDIT_SE_IRI, EM_IRI from ..common import BasicTestCase, WithAuthTestCase, CommonCreationRoutine from ..common import FileSystemCreationRoutine, create_arborescence_archive class DepositUpdateOrReplaceExistingDataTest( APITestCase, WithAuthTestCase, BasicTestCase, FileSystemCreationRoutine, CommonCreationRoutine): """Try put/post (update/replace) query on EM_IRI """ def setUp(self): super().setUp() self.atom_entry_data1 = b""" bar """ self.atom_entry_data1 = b""" bar """ self.archive2 = create_arborescence_archive( self.root_path, 'archive2', 'file2', b'some other content in file') def test_replace_archive_to_deposit_is_possible(self): """Replace all archive with another one should return a 204 response """ # given deposit_id = self.create_simple_binary_deposit(status_partial=True) deposit = Deposit.objects.get(pk=deposit_id) requests = DepositRequest.objects.filter( deposit=deposit, - type=self.deposit_request_types['archive']) + type='archive') assert len(list(requests)) == 1 assert self.archive['name'] in requests[0].archive.name # we have no metadata for that deposit requests = list(DepositRequest.objects.filter( - deposit=deposit, type=self.deposit_request_types['metadata'])) + deposit=deposit, type='metadata')) assert len(requests) == 0 deposit_id = self._update_deposit_with_status(deposit_id, status_partial=True) requests = list(DepositRequest.objects.filter( - deposit=deposit, type=self.deposit_request_types['metadata'])) + deposit=deposit, type='metadata')) assert len(requests) == 1 update_uri = reverse(EM_IRI, args=[self.collection.name, deposit_id]) external_id = 'some-external-id-1' response = self.client.put( update_uri, content_type='application/zip', # as zip data=self.archive2['data'], # + headers CONTENT_LENGTH=self.archive2['length'], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=self.archive2['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS='false', HTTP_CONTENT_DISPOSITION='attachment; filename=%s' % ( self.archive2['name'], )) self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) requests = DepositRequest.objects.filter( deposit=deposit, - type=self.deposit_request_types['archive']) + type='archive') self.assertEqual(len(list(requests)), 1) self.assertRegex(requests[0].archive.name, self.archive2['name']) # check we did not touch the other parts requests = list(DepositRequest.objects.filter( - deposit=deposit, type=self.deposit_request_types['metadata'])) + deposit=deposit, type='metadata')) self.assertEqual(len(requests), 1) def test_replace_metadata_to_deposit_is_possible(self): """Replace all metadata with another one should return a 204 response """ # given deposit_id = self.create_simple_binary_deposit(status_partial=True) deposit = Deposit.objects.get(pk=deposit_id) requests = DepositRequest.objects.filter( deposit=deposit, - type=self.deposit_request_types['metadata']) + type='metadata') assert len(list(requests)) == 0 requests = list(DepositRequest.objects.filter( - deposit=deposit, type=self.deposit_request_types['archive'])) + deposit=deposit, type='archive')) assert len(requests) == 1 update_uri = reverse(EDIT_SE_IRI, args=[self.collection.name, deposit_id]) response = self.client.put( update_uri, content_type='application/atom+xml;type=entry', data=self.atom_entry_data1) self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) requests = DepositRequest.objects.filter( deposit=deposit, - type=self.deposit_request_types['metadata']) + type='metadata') self.assertEqual(len(list(requests)), 1) metadata = requests[0].metadata self.assertEqual(metadata['foobar'], 'bar') # check we did not touch the other parts requests = list(DepositRequest.objects.filter( - deposit=deposit, type=self.deposit_request_types['archive'])) + deposit=deposit, type='archive')) self.assertEqual(len(requests), 1) def test_add_archive_to_deposit_is_possible(self): """Add another archive to a deposit return a 201 response """ # given deposit_id = self.create_simple_binary_deposit(status_partial=True) deposit = Deposit.objects.get(pk=deposit_id) requests = DepositRequest.objects.filter( deposit=deposit, - type=self.deposit_request_types['archive']) + type='archive') assert len(list(requests)) == 1 assert self.archive['name'] in requests[0].archive.name requests = list(DepositRequest.objects.filter( - deposit=deposit, type=self.deposit_request_types['metadata'])) + deposit=deposit, type='metadata')) assert len(requests) == 0 update_uri = reverse(EM_IRI, args=[self.collection.name, deposit_id]) external_id = 'some-external-id-1' response = self.client.post( update_uri, content_type='application/zip', # as zip data=self.archive2['data'], # + headers CONTENT_LENGTH=self.archive2['length'], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=self.archive2['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS='false', HTTP_CONTENT_DISPOSITION='attachment; filename=%s' % ( self.archive2['name'],)) self.assertEqual(response.status_code, status.HTTP_201_CREATED) requests = list(DepositRequest.objects.filter( deposit=deposit, - type=self.deposit_request_types['archive']).order_by('id')) + type='archive').order_by('id')) self.assertEqual(len(requests), 2) # first archive still exists self.assertRegex(requests[0].archive.name, self.archive['name']) # a new one was added self.assertRegex(requests[1].archive.name, self.archive2['name']) # check we did not touch the other parts requests = list(DepositRequest.objects.filter( - deposit=deposit, type=self.deposit_request_types['metadata'])) + deposit=deposit, type='metadata')) self.assertEqual(len(requests), 0) def test_add_metadata_to_deposit_is_possible(self): """Add metadata with another one should return a 204 response """ # given deposit_id = self.create_deposit_partial() deposit = Deposit.objects.get(pk=deposit_id) requests = DepositRequest.objects.filter( deposit=deposit, - type=self.deposit_request_types['metadata']) + type='metadata') assert len(list(requests)) == 2 requests = list(DepositRequest.objects.filter( - deposit=deposit, type=self.deposit_request_types['archive'])) + deposit=deposit, type='archive')) assert len(requests) == 0 update_uri = reverse(EDIT_SE_IRI, args=[self.collection.name, deposit_id]) response = self.client.post( update_uri, content_type='application/atom+xml;type=entry', data=self.atom_entry_data1) self.assertEqual(response.status_code, status.HTTP_201_CREATED) requests = DepositRequest.objects.filter( deposit=deposit, - type=self.deposit_request_types['metadata']).order_by('id') + type='metadata').order_by('id') self.assertEqual(len(list(requests)), 3) # a new one was added self.assertEqual(requests[1].metadata['foobar'], 'bar') # check we did not touch the other parts requests = list(DepositRequest.objects.filter( - deposit=deposit, type=self.deposit_request_types['archive'])) + deposit=deposit, type='archive')) self.assertEqual(len(requests), 0) class DepositUpdateFailuresTest(APITestCase, WithAuthTestCase, BasicTestCase, CommonCreationRoutine): """Failure scenario about add/replace (post/put) query on deposit. """ def test_add_metadata_to_unknown_collection(self): """Replacing metadata to unknown deposit should return a 404 response """ url = reverse(EDIT_SE_IRI, args=['test', 1000]) response = self.client.post( url, content_type='application/atom+xml;type=entry', data=self.atom_entry_data0) self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) self.assertRegex(response.content.decode('utf-8'), 'Unknown collection name test') def test_add_metadata_to_unknown_deposit(self): """Replacing metadata to unknown deposit should return a 404 response """ url = reverse(EDIT_SE_IRI, args=[self.collection.name, 999]) response = self.client.post( url, content_type='application/atom+xml;type=entry', data=self.atom_entry_data0) self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) self.assertRegex(response.content.decode('utf-8'), 'Deposit with id 999 does not exist') def test_replace_metadata_to_unknown_deposit(self): """Adding metadata to unknown deposit should return a 404 response """ url = reverse(EDIT_SE_IRI, args=[self.collection.name, 998]) response = self.client.put( url, content_type='application/atom+xml;type=entry', data=self.atom_entry_data0) self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) self.assertRegex(response.content.decode('utf-8'), 'Deposit with id 998 does not exist') def test_add_archive_to_unknown_deposit(self): """Adding metadata to unknown deposit should return a 404 response """ url = reverse(EM_IRI, args=[self.collection.name, 997]) response = self.client.post( url, content_type='application/zip', data=self.atom_entry_data0) self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) self.assertRegex(response.content.decode('utf-8'), 'Deposit with id 997 does not exist') def test_replace_archive_to_unknown_deposit(self): """Replacing archive to unknown deposit should return a 404 response """ url = reverse(EM_IRI, args=[self.collection.name, 996]) response = self.client.put( url, content_type='application/zip', data=self.atom_entry_data0) self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) self.assertRegex(response.content.decode('utf-8'), 'Deposit with id 996 does not exist') def test_post_metadata_to_em_iri_failure(self): """Update (POST) archive with wrong content type should return 400 """ deposit_id = self.create_deposit_partial() # only update on partial update_uri = reverse(EM_IRI, args=[self.collection.name, deposit_id]) response = self.client.post( update_uri, content_type='application/x-gtar-compressed', data=self.atom_entry_data0) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) self.assertRegex(response.content.decode('utf-8'), 'Packaging format supported is restricted to ' 'application/zip, application/x-tar') def test_put_metadata_to_em_iri_failure(self): """Update (PUT) archive with wrong content type should return 400 """ # given deposit_id = self.create_deposit_partial() # only update on partial # when update_uri = reverse(EM_IRI, args=[self.collection.name, deposit_id]) response = self.client.put( update_uri, content_type='application/atom+xml;type=entry', data=self.atom_entry_data0) # then self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) self.assertRegex(response.content.decode('utf-8'), 'Packaging format supported is restricted to ' 'application/zip, application/x-tar') diff --git a/swh/deposit/tests/api/test_deposit_update_status.py b/swh/deposit/tests/api/test_deposit_update_status.py index 1ec4dbdb..d338ceab 100644 --- a/swh/deposit/tests/api/test_deposit_update_status.py +++ b/swh/deposit/tests/api/test_deposit_update_status.py @@ -1,130 +1,130 @@ -# Copyright (C) 2017-2018 The Software Heritage developers +# Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json -from django.core.urlresolvers import reverse +from django.urls import reverse from rest_framework import status from rest_framework.test import APITestCase from swh.deposit.models import Deposit, DEPOSIT_STATUS_DETAIL from swh.deposit.config import PRIVATE_PUT_DEPOSIT, DEPOSIT_STATUS_VERIFIED from swh.deposit.config import DEPOSIT_STATUS_LOAD_SUCCESS from ..common import BasicTestCase class UpdateDepositStatusTest(APITestCase, BasicTestCase): """Update the deposit's status scenario """ def setUp(self): super().setUp() deposit = Deposit(status=DEPOSIT_STATUS_VERIFIED, collection=self.collection, client=self.user) deposit.save() self.deposit = Deposit.objects.get(pk=deposit.id) assert self.deposit.status == DEPOSIT_STATUS_VERIFIED def test_update_deposit_status(self): """Existing status for update should return a 204 response """ url = reverse(PRIVATE_PUT_DEPOSIT, args=[self.collection.name, self.deposit.id]) possible_status = set(DEPOSIT_STATUS_DETAIL.keys()) - set( [DEPOSIT_STATUS_LOAD_SUCCESS]) for _status in possible_status: response = self.client.put( url, content_type='application/json', data=json.dumps({'status': _status})) self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) deposit = Deposit.objects.get(pk=self.deposit.id) self.assertEqual(deposit.status, _status) def test_update_deposit_status_with_info(self): """Existing status for update with info should return a 204 response """ url = reverse(PRIVATE_PUT_DEPOSIT, args=[self.collection.name, self.deposit.id]) expected_status = DEPOSIT_STATUS_LOAD_SUCCESS origin_url = 'something' directory_id = '42a13fc721c8716ff695d0d62fc851d641f3a12b' revision_id = '47dc6b4636c7f6cba0df83e3d5490bf4334d987e' expected_swh_id = 'swh:1:dir:%s' % directory_id expected_swh_id_context = 'swh:1:dir:%s;origin=%s' % ( directory_id, origin_url) expected_swh_anchor_id = 'swh:1:rev:%s' % revision_id expected_swh_anchor_id_context = 'swh:1:rev:%s;origin=%s' % ( revision_id, origin_url) response = self.client.put( url, content_type='application/json', data=json.dumps({ 'status': expected_status, 'revision_id': revision_id, 'directory_id': directory_id, 'origin_url': origin_url, })) self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) deposit = Deposit.objects.get(pk=self.deposit.id) self.assertEqual(deposit.status, expected_status) self.assertEqual(deposit.swh_id, expected_swh_id) self.assertEqual(deposit.swh_id_context, expected_swh_id_context) self.assertEqual(deposit.swh_anchor_id, expected_swh_anchor_id) self.assertEqual(deposit.swh_anchor_id_context, expected_swh_anchor_id_context) def test_update_deposit_status_will_fail_with_unknown_status(self): """Unknown status for update should return a 400 response """ url = reverse(PRIVATE_PUT_DEPOSIT, args=[self.collection.name, self.deposit.id]) response = self.client.put( url, content_type='application/json', data=json.dumps({'status': 'unknown'})) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) def test_update_deposit_status_will_fail_with_no_status_key(self): """No status provided for update should return a 400 response """ url = reverse(PRIVATE_PUT_DEPOSIT, args=[self.collection.name, self.deposit.id]) response = self.client.put( url, content_type='application/json', data=json.dumps({'something': 'something'})) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) def test_update_deposit_status_success_without_swh_id_fail(self): """Providing successful status without swh_id should return a 400 """ url = reverse(PRIVATE_PUT_DEPOSIT, args=[self.collection.name, self.deposit.id]) response = self.client.put( url, content_type='application/json', data=json.dumps({'status': DEPOSIT_STATUS_LOAD_SUCCESS})) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) diff --git a/swh/deposit/tests/api/test_service_document.py b/swh/deposit/tests/api/test_service_document.py index 3afa1ca5..61d8e074 100644 --- a/swh/deposit/tests/api/test_service_document.py +++ b/swh/deposit/tests/api/test_service_document.py @@ -1,102 +1,102 @@ -# Copyright (C) 2017-2018 The Software Heritage developers +# Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from django.core.urlresolvers import reverse +from django.urls import reverse from rest_framework import status from rest_framework.test import APITestCase from swh.deposit.tests import TEST_CONFIG from swh.deposit.config import SD_IRI from ..common import BasicTestCase, WithAuthTestCase class ServiceDocumentNoAuthCase(APITestCase, BasicTestCase): """Service document endpoints are protected with basic authentication. """ def test_service_document_no_authentication_fails(self): """Without authentication, service document endpoint should return 401 """ url = reverse(SD_IRI) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_401_UNAUTHORIZED) def test_service_document_with_http_accept_should_not_break(self): """Without auth, sd endpoint through browser should return 401 """ url = reverse(SD_IRI) # when response = self.client.get( url, HTTP_ACCEPT='text/html,application/xml;q=9,*/*,q=8') self.assertEqual(response.status_code, status.HTTP_401_UNAUTHORIZED) class ServiceDocumentCase(APITestCase, WithAuthTestCase, BasicTestCase): def assertResponseOk(self, response): # noqa: N802 self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(response.content.decode('utf-8'), ''' 2.0 %s The Software Heritage (SWH) Archive %s Software Collection application/zip application/x-tar Collection Policy Software Heritage Archive Collect, Preserve, Share false false http://purl.org/net/sword/package/SimpleZip http://testserver/1/%s/ %s ''' % (TEST_CONFIG['max_upload_size'], self.username, self.username, self.username, self.username)) # noqa def test_service_document(self): """With authentication, service document list user's collection """ url = reverse(SD_IRI) # when response = self.client.get(url) # then self.assertResponseOk(response) def test_service_document_with_http_accept_header(self): """With authentication, with browser, sd list user's collection """ url = reverse(SD_IRI) # when response = self.client.get( url, HTTP_ACCEPT='text/html,application/xml;q=9,*/*,q=8') self.assertResponseOk(response) diff --git a/swh/deposit/tests/common.py b/swh/deposit/tests/common.py index d7d9de20..0d298477 100644 --- a/swh/deposit/tests/common.py +++ b/swh/deposit/tests/common.py @@ -1,573 +1,568 @@ -# Copyright (C) 2017-2018 The Software Heritage developers +# Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import base64 import hashlib import os import shutil import tarfile import tempfile -from django.core.urlresolvers import reverse +from django.urls import reverse from django.test import TestCase from io import BytesIO import pytest from rest_framework import status from swh.deposit.config import (COL_IRI, EM_IRI, EDIT_SE_IRI, DEPOSIT_STATUS_PARTIAL, DEPOSIT_STATUS_VERIFIED, DEPOSIT_STATUS_REJECTED, DEPOSIT_STATUS_DEPOSITED) from swh.deposit.models import DepositClient, DepositCollection, Deposit from swh.deposit.models import DepositRequest -from swh.deposit.models import DepositRequestType from swh.deposit.parsers import parse_xml from swh.deposit.settings.testing import MEDIA_ROOT from swh.core import tarball def compute_info(archive_path): """Given a path, compute information on path. """ with open(archive_path, 'rb') as f: length = 0 sha1sum = hashlib.sha1() md5sum = hashlib.md5() data = b'' for chunk in f: sha1sum.update(chunk) md5sum.update(chunk) length += len(chunk) data += chunk return { 'dir': os.path.dirname(archive_path), 'name': os.path.basename(archive_path), 'path': archive_path, 'length': length, 'sha1sum': sha1sum.hexdigest(), 'md5sum': md5sum.hexdigest(), 'data': data } def _compress(path, extension, dir_path): """Compress path according to extension """ if extension == 'zip' or extension == 'tar': return tarball.compress(path, extension, dir_path) elif '.' in extension: split_ext = extension.split('.') if split_ext[0] != 'tar': raise ValueError( 'Development error, only zip or tar archive supported, ' '%s not supported' % extension) # deal with specific tar mode = split_ext[1] supported_mode = ['xz', 'gz', 'bz2'] if mode not in supported_mode: raise ValueError( 'Development error, only %s supported, %s not supported' % ( supported_mode, mode)) files = tarball._ls(dir_path) with tarfile.open(path, 'w:%s' % mode) as t: for fpath, fname in files: t.add(fpath, arcname=fname, recursive=False) return path def create_arborescence_archive(root_path, archive_name, filename, content, up_to_size=None, extension='zip'): """Build an archive named archive_name in the root_path. This archive contains one file named filename with the content content. Args: root_path (str): Location path of the archive to create archive_name (str): Archive's name (without extension) filename (str): Archive's content is only one filename content (bytes): Content of the filename up_to_size (int | None): Fill in the blanks size to oversize or complete an archive's size extension (str): Extension of the archive to write (default is zip) Returns: dict with the keys: - dir: the directory of that archive - path: full path to the archive - sha1sum: archive's sha1sum - length: archive's length """ os.makedirs(root_path, exist_ok=True) archive_path_dir = tempfile.mkdtemp(dir=root_path) dir_path = os.path.join(archive_path_dir, archive_name) os.mkdir(dir_path) filepath = os.path.join(dir_path, filename) _length = len(content) count = 0 batch_size = 128 with open(filepath, 'wb') as f: f.write(content) if up_to_size: # fill with blank content up to a given size count += _length while count < up_to_size: f.write(b'0'*batch_size) count += batch_size _path = '%s.%s' % (dir_path, extension) _path = _compress(_path, extension, dir_path) return compute_info(_path) def create_archive_with_archive(root_path, name, archive): """Create an archive holding another. """ invalid_archive_path = os.path.join(root_path, name) with tarfile.open(invalid_archive_path, 'w:gz') as _archive: _archive.add(archive['path'], arcname=archive['name']) return compute_info(invalid_archive_path) @pytest.mark.fs class FileSystemCreationRoutine(TestCase): """Mixin intended for tests needed to tamper with archives. """ def setUp(self): """Define the test client and other test variables.""" super().setUp() self.root_path = '/tmp/swh-deposit/test/build-zip/' os.makedirs(self.root_path, exist_ok=True) self.archive = create_arborescence_archive( self.root_path, 'archive1', 'file1', b'some content in file') self.atom_entry = b""" Awesome Compiler urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 1785io25c695 2017-10-07T15:17:08Z some awesome author https://hal-test.archives-ouvertes.fr """ def tearDown(self): super().tearDown() shutil.rmtree(self.root_path) def create_simple_binary_deposit(self, status_partial=True): response = self.client.post( reverse(COL_IRI, args=[self.collection.name]), content_type='application/zip', data=self.archive['data'], CONTENT_LENGTH=self.archive['length'], HTTP_MD5SUM=self.archive['md5sum'], HTTP_SLUG='external-id', HTTP_IN_PROGRESS=status_partial, HTTP_CONTENT_DISPOSITION='attachment; filename=%s' % ( self.archive['name'], )) # then self.assertEqual(response.status_code, status.HTTP_201_CREATED) response_content = parse_xml(BytesIO(response.content)) _status = response_content['deposit_status'] if status_partial: expected_status = DEPOSIT_STATUS_PARTIAL else: expected_status = DEPOSIT_STATUS_VERIFIED self.assertEqual(_status, expected_status) deposit_id = int(response_content['deposit_id']) return deposit_id def create_complex_binary_deposit(self, status_partial=False): deposit_id = self.create_simple_binary_deposit( status_partial=True) # Add a second archive to the deposit # update its status to DEPOSIT_STATUS_VERIFIED response = self.client.post( reverse(EM_IRI, args=[self.collection.name, deposit_id]), content_type='application/zip', data=self.archive2['data'], CONTENT_LENGTH=self.archive2['length'], HTTP_MD5SUM=self.archive2['md5sum'], HTTP_SLUG='external-id', HTTP_IN_PROGRESS=status_partial, HTTP_CONTENT_DISPOSITION='attachment; filename=filename1.zip') # then assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) deposit_id = int(response_content['deposit_id']) return deposit_id def create_deposit_archive_with_archive(self, archive_extension): # we create the holding archive to a given extension archive = create_arborescence_archive( self.root_path, 'archive1', 'file1', b'some content in file', extension=archive_extension) # now we create an archive holding the first created archive invalid_archive = create_archive_with_archive( self.root_path, 'invalid.tar.gz', archive) # we deposit it response = self.client.post( reverse(COL_IRI, args=[self.collection.name]), content_type='application/x-tar', data=invalid_archive['data'], CONTENT_LENGTH=invalid_archive['length'], HTTP_MD5SUM=invalid_archive['md5sum'], HTTP_SLUG='external-id', HTTP_IN_PROGRESS=False, HTTP_CONTENT_DISPOSITION='attachment; filename=%s' % ( invalid_archive['name'], )) # then self.assertEqual(response.status_code, status.HTTP_201_CREATED) response_content = parse_xml(BytesIO(response.content)) _status = response_content['deposit_status'] self.assertEqual(_status, DEPOSIT_STATUS_DEPOSITED) deposit_id = int(response_content['deposit_id']) return deposit_id def update_binary_deposit(self, deposit_id, status_partial=False): # update existing deposit with atom entry metadata response = self.client.post( reverse(EDIT_SE_IRI, args=[self.collection.name, deposit_id]), content_type='application/atom+xml;type=entry', data=self.codemeta_entry_data1, HTTP_SLUG='external-id', HTTP_IN_PROGRESS=status_partial) # then self.assertEqual(response.status_code, status.HTTP_201_CREATED) response_content = parse_xml(BytesIO(response.content)) _status = response_content['deposit_status'] if status_partial: expected_status = DEPOSIT_STATUS_PARTIAL else: expected_status = DEPOSIT_STATUS_DEPOSITED self.assertEqual(_status, expected_status) deposit_id = int(response_content['deposit_id']) return deposit_id @pytest.mark.fs class BasicTestCase(TestCase): """Mixin intended for data setup purposes (user, collection, etc...) """ def setUp(self): """Define the test client and other test variables.""" super().setUp() # expanding diffs in tests self.maxDiff = None # basic minimum test data - deposit_request_types = {} - # Add deposit request types - for deposit_request_type in ['archive', 'metadata']: - drt = DepositRequestType(name=deposit_request_type) - drt.save() - deposit_request_types[deposit_request_type] = drt _name = 'hal' _provider_url = 'https://hal-test.archives-ouvertes.fr/' _domain = 'archives-ouvertes.fr/' # set collection up _collection = DepositCollection(name=_name) _collection.save() # set user/client up _client = DepositClient.objects.create_user(username=_name, password=_name, provider_url=_provider_url, domain=_domain) _client.collections = [_collection.id] _client.last_name = _name _client.save() self.collection = _collection self.user = _client self.username = _name self.userpass = _name - self.deposit_request_types = deposit_request_types - def tearDown(self): super().tearDown() # Clean up uploaded files in temporary directory (tests have # their own media root folder) if os.path.exists(MEDIA_ROOT): for d in os.listdir(MEDIA_ROOT): shutil.rmtree(os.path.join(MEDIA_ROOT, d)) class WithAuthTestCase(TestCase): """Mixin intended for testing the api with basic authentication. """ def setUp(self): super().setUp() _token = '%s:%s' % (self.username, self.userpass) token = base64.b64encode(_token.encode('utf-8')) authorization = 'Basic %s' % token.decode('utf-8') self.client.credentials(HTTP_AUTHORIZATION=authorization) def tearDown(self): super().tearDown() self.client.credentials() class CommonCreationRoutine(TestCase): """Mixin class to share initialization routine. cf: `class`:test_deposit_update.DepositReplaceExistingDataTest `class`:test_deposit_update.DepositUpdateDepositWithNewDataTest `class`:test_deposit_update.DepositUpdateFailuresTest `class`:test_deposit_delete.DepositDeleteTest """ def setUp(self): super().setUp() self.atom_entry_data0 = b""" some-external-id https://hal-test.archives-ouvertes.fr/some-external-id some awesome author """ self.atom_entry_data1 = b""" another one no one + 2017-10-07T15:17:08Z """ self.atom_entry_data2 = b""" Awesome Compiler urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 1785io25c695 2017-10-07T15:17:08Z some awesome author https://hal-test.archives-ouvertes.fr/id """ self.codemeta_entry_data0 = b""" Awesome Compiler https://hal-test.archives-ouvertes.fr/1785io25c695 urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 1785io25c695 2017-10-07T15:17:08Z some awesome author description key-word 1 """ self.codemeta_entry_data1 = b""" Composing a Web of Audio Applications hal hal-01243065 hal-01243065 https://hal-test.archives-ouvertes.fr/hal-01243065 test DSP programming,Web 2017-05-03T16:08:47+02:00 this is the description 1 phpstorm stable php python C GNU General Public License v3.0 only CeCILL Free Software License Agreement v1.1 HAL hal@ccsd.cnrs.fr Morane Gruenpeter """ def create_deposit_with_invalid_archive(self, external_id='some-external-id-1'): url = reverse(COL_IRI, args=[self.collection.name]) data = b'some data which is clearly not a zip file' md5sum = hashlib.md5(data).hexdigest() # when response = self.client.post( url, content_type='application/zip', # as zip data=data, # + headers CONTENT_LENGTH=len(data), # other headers needs HTTP_ prefix to be taken into account HTTP_SLUG=external_id, HTTP_CONTENT_MD5=md5sum, HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') response_content = parse_xml(BytesIO(response.content)) deposit_id = int(response_content['deposit_id']) return deposit_id def create_deposit_with_status( self, status, external_id='some-external-id-1', swh_id=None, swh_id_context=None, swh_anchor_id=None, swh_anchor_id_context=None, status_detail=None): # create an invalid deposit which we will update further down the line deposit_id = self.create_deposit_with_invalid_archive(external_id) # We cannot create some form of deposit with a given status in # test context ('rejected' for example). Update in place the # deposit with such status to permit some further tests. deposit = Deposit.objects.get(pk=deposit_id) if status == DEPOSIT_STATUS_REJECTED: deposit.status_detail = status_detail deposit.status = status if swh_id: deposit.swh_id = swh_id if swh_id_context: deposit.swh_id_context = swh_id_context if swh_anchor_id: deposit.swh_anchor_id = swh_anchor_id if swh_anchor_id_context: deposit.swh_anchor_id_context = swh_anchor_id_context deposit.save() return deposit_id def create_simple_deposit_partial(self, external_id='some-external-id'): """Create a simple deposit (1 request) in `partial` state and returns its new identifier. Returns: deposit id """ response = self.client.post( reverse(COL_IRI, args=[self.collection.name]), content_type='application/atom+xml;type=entry', data=self.atom_entry_data0, HTTP_SLUG=external_id, HTTP_IN_PROGRESS='true') assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) deposit_id = int(response_content['deposit_id']) return deposit_id def create_deposit_partial_with_data_in_args(self, data): """Create a simple deposit (1 request) in `partial` state with the data or metadata as an argument and returns its new identifier. Args: data: atom entry Returns: deposit id """ + if isinstance(data, str): + data = data.encode('utf-8') + response = self.client.post( reverse(COL_IRI, args=[self.collection.name]), content_type='application/atom+xml;type=entry', data=data, HTTP_SLUG='external-id', HTTP_IN_PROGRESS='true') assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) deposit_id = int(response_content['deposit_id']) return deposit_id def _update_deposit_with_status(self, deposit_id, status_partial=False): """Add to a given deposit another archive and update its current status to `deposited` (by default). Returns: deposit id """ # when response = self.client.post( reverse(EDIT_SE_IRI, args=[self.collection.name, deposit_id]), content_type='application/atom+xml;type=entry', data=self.atom_entry_data1, HTTP_SLUG='external-id', HTTP_IN_PROGRESS=status_partial) # then assert response.status_code == status.HTTP_201_CREATED return deposit_id def create_deposit_ready(self, external_id='some-external-id'): """Create a complex deposit (2 requests) in status `deposited`. """ deposit_id = self.create_simple_deposit_partial( external_id=external_id) deposit_id = self._update_deposit_with_status(deposit_id) return deposit_id def create_deposit_partial(self, external_id='some-external-id'): """Create a complex deposit (2 requests) in status `partial`. """ deposit_id = self.create_simple_deposit_partial( external_id=external_id) deposit_id = self._update_deposit_with_status( deposit_id, status_partial=True) return deposit_id def add_metadata_to_deposit(self, deposit_id, status_partial=False): """Add metadata to deposit. """ # when response = self.client.post( reverse(EDIT_SE_IRI, args=[self.collection.name, deposit_id]), content_type='application/atom+xml;type=entry', data=self.codemeta_entry_data1, HTTP_SLUG='external-id', HTTP_IN_PROGRESS=status_partial) assert response.status_code == status.HTTP_201_CREATED # then deposit = Deposit.objects.get(pk=deposit_id) assert deposit is not None deposit_requests = DepositRequest.objects.filter(deposit=deposit) assert deposit_requests is not [] for dr in deposit_requests: - if dr.type.name == 'metadata': + if dr.type == 'metadata': assert deposit_requests[0].metadata is not {} return deposit_id diff --git a/swh/deposit/tests/loader/test_checker.py b/swh/deposit/tests/loader/test_checker.py index d1721d64..6b45b4c2 100644 --- a/swh/deposit/tests/loader/test_checker.py +++ b/swh/deposit/tests/loader/test_checker.py @@ -1,68 +1,68 @@ -# Copyright (C) 2017-2018 The Software Heritage developers +# Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from rest_framework.test import APITestCase from swh.deposit.models import Deposit from swh.deposit.config import PRIVATE_CHECK_DEPOSIT, DEPOSIT_STATUS_VERIFIED from swh.deposit.config import DEPOSIT_STATUS_REJECTED from swh.deposit.loader.checker import DepositChecker -from django.core.urlresolvers import reverse +from django.urls import reverse from .common import SWHDepositTestClient, CLIENT_TEST_CONFIG from ..common import BasicTestCase, WithAuthTestCase, CommonCreationRoutine from ..common import FileSystemCreationRoutine class DepositCheckerScenarioTest(APITestCase, WithAuthTestCase, BasicTestCase, CommonCreationRoutine, FileSystemCreationRoutine): def setUp(self): super().setUp() # 2. Sets a basic client which accesses the test data checker_client = SWHDepositTestClient(client=self.client, config=CLIENT_TEST_CONFIG) # 3. setup loader with no persistence and that client self.checker = DepositChecker(client=checker_client) def test_check_deposit_ready(self): """Check on a valid 'deposited' deposit should result in 'verified' """ # 1. create a deposit with archive and metadata deposit_id = self.create_simple_binary_deposit() deposit_id = self.update_binary_deposit(deposit_id, status_partial=False) args = [self.collection.name, deposit_id] deposit_check_url = reverse(PRIVATE_CHECK_DEPOSIT, args=args) # when actual_result = self.checker.check(deposit_check_url=deposit_check_url) # then deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(deposit.status, DEPOSIT_STATUS_VERIFIED) self.assertEqual(actual_result, {'status': 'eventful'}) def test_check_deposit_rejected(self): """Check on invalid 'deposited' deposit should result in 'rejected' """ # 1. create a deposit with archive and metadata deposit_id = self.create_deposit_with_invalid_archive() args = [self.collection.name, deposit_id] deposit_check_url = reverse(PRIVATE_CHECK_DEPOSIT, args=args) # when actual_result = self.checker.check(deposit_check_url=deposit_check_url) # then deposit = Deposit.objects.get(pk=deposit_id) self.assertEqual(deposit.status, DEPOSIT_STATUS_REJECTED) self.assertEqual(actual_result, {'status': 'eventful'}) diff --git a/swh/deposit/tests/loader/test_loader.py b/swh/deposit/tests/loader/test_loader.py index e8cea274..323ba137 100644 --- a/swh/deposit/tests/loader/test_loader.py +++ b/swh/deposit/tests/loader/test_loader.py @@ -1,169 +1,172 @@ -# Copyright (C) 2017-2018 The Software Heritage developers +# Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os import unittest import shutil import pytest from rest_framework.test import APITestCase from swh.model import hashutil from swh.deposit.models import Deposit from swh.deposit.loader import loader from swh.deposit.config import ( PRIVATE_GET_RAW_CONTENT, PRIVATE_GET_DEPOSIT_METADATA, PRIVATE_PUT_DEPOSIT ) -from django.core.urlresolvers import reverse +from django.urls import reverse from swh.loader.core.tests import BaseLoaderStorageTest +from swh.deposit import utils from .common import SWHDepositTestClient, CLIENT_TEST_CONFIG from .. import TEST_LOADER_CONFIG from ..common import (BasicTestCase, WithAuthTestCase, CommonCreationRoutine, FileSystemCreationRoutine) class TestLoaderUtils(unittest.TestCase): def assertRevisionsOk(self, expected_revisions): # noqa: N802 """Check the loader's revisions match the expected revisions. Expects self.loader to be instantiated and ready to be inspected (meaning the loading took place). Args: expected_revisions (dict): Dict with key revision id, value the targeted directory id. """ # The last revision being the one used later to start back from for rev in self.loader.state['revision']: rev_id = hashutil.hash_to_hex(rev['id']) directory_id = hashutil.hash_to_hex(rev['directory']) self.assertEqual(expected_revisions[rev_id], directory_id) @pytest.mark.fs class DepositLoaderScenarioTest(APITestCase, WithAuthTestCase, BasicTestCase, CommonCreationRoutine, FileSystemCreationRoutine, TestLoaderUtils, BaseLoaderStorageTest): def setUp(self): super().setUp() # create the extraction dir used by the loader os.makedirs(TEST_LOADER_CONFIG['extraction_dir'], exist_ok=True) - # 1. create a deposit with archive and metadata - self.deposit_id = self.create_simple_binary_deposit() - # 2. Sets a basic client which accesses the test data + # Sets a basic client which accesses the test data loader_client = SWHDepositTestClient(self.client, config=CLIENT_TEST_CONFIG) - # 3. setup loader with that client + # Setup loader with that client self.loader = loader.DepositLoader(client=loader_client) self.storage = self.loader.storage def tearDown(self): super().tearDown() shutil.rmtree(TEST_LOADER_CONFIG['extraction_dir']) def test_inject_deposit_ready(self): """Load a deposit which is ready """ - args = [self.collection.name, self.deposit_id] + # create a deposit with archive and metadata + deposit_id = self.create_simple_binary_deposit() + self.update_binary_deposit(deposit_id, status_partial=False) + + args = [self.collection.name, deposit_id] archive_url = reverse(PRIVATE_GET_RAW_CONTENT, args=args) deposit_meta_url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=args) deposit_update_url = reverse(PRIVATE_PUT_DEPOSIT, args=args) # when res = self.loader.load(archive_url=archive_url, deposit_meta_url=deposit_meta_url, deposit_update_url=deposit_update_url) # then self.assertEqual(res['status'], 'eventful', res) self.assertCountContents(1) self.assertCountDirectories(1) self.assertCountRevisions(1) self.assertCountReleases(0) self.assertCountSnapshots(1) def test_inject_deposit_verify_metadata(self): """Load a deposit with metadata, test metadata integrity """ - self.deposit_metadata_id = self.add_metadata_to_deposit( - self.deposit_id) - args = [self.collection.name, self.deposit_metadata_id] + deposit_id = self.create_simple_binary_deposit() + self.add_metadata_to_deposit(deposit_id, status_partial=False) + args = [self.collection.name, deposit_id] archive_url = reverse(PRIVATE_GET_RAW_CONTENT, args=args) deposit_meta_url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=args) deposit_update_url = reverse(PRIVATE_PUT_DEPOSIT, args=args) # when self.loader.load(archive_url=archive_url, deposit_meta_url=deposit_meta_url, deposit_update_url=deposit_update_url) # then self.assertCountContents(1) self.assertCountDirectories(1) self.assertCountRevisions(1) self.assertCountReleases(0) self.assertCountSnapshots(1) codemeta = 'codemeta:' - origin_url = 'https://hal-test.archives-ouvertes.fr/hal-01243065' + deposit = Deposit.objects.get(pk=deposit_id) + origin_url = utils.origin_url_from(deposit) + expected_origin_metadata = { '@xmlns': 'http://www.w3.org/2005/Atom', '@xmlns:codemeta': 'https://doi.org/10.5063/SCHEMA/CODEMETA-2.0', 'author': { 'email': 'hal@ccsd.cnrs.fr', 'name': 'HAL' }, - codemeta + 'url': origin_url, + codemeta + 'url': 'https://hal-test.archives-ouvertes.fr/hal-01243065', # same as xml # noqa codemeta + 'runtimePlatform': 'phpstorm', codemeta + 'license': [ { codemeta + 'name': 'GNU General Public License v3.0 only' }, { codemeta + 'name': 'CeCILL Free Software License Agreement v1.1' # noqa } ], codemeta + 'author': { codemeta + 'name': 'Morane Gruenpeter' }, codemeta + 'programmingLanguage': ['php', 'python', 'C'], codemeta + 'applicationCategory': 'test', codemeta + 'dateCreated': '2017-05-03T16:08:47+02:00', codemeta + 'version': '1', 'external_identifier': 'hal-01243065', 'title': 'Composing a Web of Audio Applications', codemeta + 'description': 'this is the description', 'id': 'hal-01243065', 'client': 'hal', codemeta + 'keywords': 'DSP programming,Web', codemeta + 'developmentStatus': 'stable' } self.assertOriginMetadataContains('deposit', origin_url, expected_origin_metadata) - deposit = Deposit.objects.get(pk=self.deposit_id) - self.assertRegex(deposit.swh_id, r'^swh:1:dir:.*') self.assertEqual(deposit.swh_id_context, '%s;origin=%s' % ( deposit.swh_id, origin_url )) self.assertRegex(deposit.swh_anchor_id, r'^swh:1:rev:.*') self.assertEqual(deposit.swh_anchor_id_context, '%s;origin=%s' % ( deposit.swh_anchor_id, origin_url )) diff --git a/swh/deposit/tests/test_utils.py b/swh/deposit/tests/test_utils.py index aa3ecfa7..27e06047 100644 --- a/swh/deposit/tests/test_utils.py +++ b/swh/deposit/tests/test_utils.py @@ -1,132 +1,216 @@ -# Copyright (C) 2018 The Software Heritage developers +# Copyright (C) 2018-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest +import pytest +from unittest.mock import patch from swh.deposit import utils +from swh.deposit.models import Deposit, DepositClient + + +def test_origin_url_from(): + """With correctly setup-ed deposit, all is fine + + """ + for provider_url, external_id in ( + ('http://somewhere.org', 'uuid'), + ('http://overthejungle.org', 'diuu'), + ): + deposit = Deposit( + client=DepositClient(provider_url=provider_url), + external_id=external_id + ) + + actual_origin_url = utils.origin_url_from(deposit) + + assert actual_origin_url == '%s/%s' % ( + provider_url.rstrip('/'), external_id) + + +def test_origin_url_from_ko(): + """Badly configured deposit should raise + + """ + for provider_url, external_id in ( + (None, 'uuid'), + ('http://overthejungle.org', None), + ): + deposit = Deposit( + client=DepositClient(provider_url=provider_url), + external_id=None + ) + + with pytest.raises(AssertionError): + utils.origin_url_from(deposit) class UtilsTestCase(unittest.TestCase): """Utils library """ def test_merge(self): """Calling utils.merge on dicts should merge without losing information """ d0 = { 'author': 'someone', 'license': [['gpl2']], 'a': 1 } d1 = { 'author': ['author0', {'name': 'author1'}], 'license': [['gpl3']], 'b': { '1': '2' } } d2 = { 'author': map(lambda x: x, ['else']), 'license': 'mit', 'b': { '2': '3', } } d3 = { 'author': (v for v in ['no one']), } actual_merge = utils.merge(d0, d1, d2, d3) expected_merge = { 'a': 1, 'license': [['gpl2'], ['gpl3'], 'mit'], 'author': [ 'someone', 'author0', {'name': 'author1'}, 'else', 'no one'], 'b': { '1': '2', '2': '3', } } self.assertEqual(actual_merge, expected_merge) def test_merge_2(self): d0 = { 'license': 'gpl2', 'runtime': { 'os': 'unix derivative' } } d1 = { 'license': 'gpl3', 'runtime': 'GNU/Linux' } expected = { 'license': ['gpl2', 'gpl3'], 'runtime': [ { 'os': 'unix derivative' }, 'GNU/Linux' ], } actual = utils.merge(d0, d1) self.assertEqual(actual, expected) def test_merge_edge_cases(self): input_dict = { 'license': ['gpl2', 'gpl3'], 'runtime': [ { 'os': 'unix derivative' }, 'GNU/Linux' ], } # against empty dict actual = utils.merge(input_dict, {}) self.assertEqual(actual, input_dict) # against oneself actual = utils.merge(input_dict, input_dict, input_dict) self.assertEqual(input_dict, input_dict) def test_merge_one_dict(self): """Merge one dict should result in the same dict value """ input_and_expected = {'anything': 'really'} actual = utils.merge(input_and_expected) self.assertEqual(actual, input_and_expected) def test_merge_raise(self): """Calling utils.merge with any no dict argument should raise """ d0 = { 'author': 'someone', 'a': 1 } d1 = ['not a dict'] with self.assertRaises(ValueError): utils.merge(d0, d1) with self.assertRaises(ValueError): utils.merge(d1, d0) with self.assertRaises(ValueError): utils.merge(d1) self.assertEqual(utils.merge(d0), d0) + + +@patch('swh.deposit.utils.normalize_timestamp', side_effect=lambda x: x) +def test_normalize_date_0(mock_normalize): + """When date is a list, choose the first date and normalize it + + Note: We do not test swh.model.identifiers which is already tested + in swh.model + + """ + actual_date = utils.normalize_date(['2017-10-12', 'date1']) + + expected_date = '2017-10-12 00:00:00+00:00' + + assert str(actual_date) == expected_date + + +@patch('swh.deposit.utils.normalize_timestamp', side_effect=lambda x: x) +def test_normalize_date_1(mock_normalize): + """Providing a date in a reasonable format, everything is fine + + Note: We do not test swh.model.identifiers which is already tested + in swh.model + + """ + actual_date = utils.normalize_date('2018-06-11 17:02:02') + + expected_date = '2018-06-11 17:02:02+00:00' + + assert str(actual_date) == expected_date + + +@patch('swh.deposit.utils.normalize_timestamp', side_effect=lambda x: x) +def test_normalize_date_doing_irrelevant_stuff(mock_normalize): + """Providing a date with only the year results in a reasonable date + + Note: We do not test swh.model.identifiers which is already tested + in swh.model + + """ + actual_date = utils.normalize_date('2017') + + expected_date = '2017-01-01 00:00:00+00:00' + + assert str(actual_date) == expected_date diff --git a/swh/deposit/utils.py b/swh/deposit/utils.py index 7979ec5b..beb31ef6 100644 --- a/swh/deposit/utils.py +++ b/swh/deposit/utils.py @@ -1,55 +1,108 @@ -# Copyright (C) 2018 The Software Heritage developers +# Copyright (C) 2018-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import iso8601 + from types import GeneratorType +from swh.model.identifiers import normalize_timestamp + + +def origin_url_from(deposit): + """Given a deposit instance, return the associated origin url. + + This expects a deposit and the associated client to be correctly + configured. + + Args: + deposit (Deposit): The deposit from which derives the origin url + + Raises: + AssertionError if: + - the client's provider_url field is not configured. + - the deposit's external_id field is not configured. + + Returns + The associated origin url + + """ + external_id = deposit.external_id + assert external_id is not None + base_url = deposit.client.provider_url + assert base_url is not None + return '%s/%s' % (base_url.rstrip('/'), external_id) + def merge(*dicts): """Given an iterator of dicts, merge them losing no information. Args: *dicts: arguments are all supposed to be dict to merge into one Returns: dict merged without losing information """ def _extend(existing_val, value): """Given an existing value and a value (as potential lists), merge them together without repetition. """ if isinstance(value, (list, map, GeneratorType)): vals = value else: vals = [value] for v in vals: if v in existing_val: continue existing_val.append(v) return existing_val d = {} for data in dicts: if not isinstance(data, dict): raise ValueError( 'dicts is supposed to be a variable arguments of dict') for key, value in data.items(): existing_val = d.get(key) if not existing_val: d[key] = value continue if isinstance(existing_val, (list, map, GeneratorType)): new_val = _extend(existing_val, value) elif isinstance(existing_val, dict): if isinstance(value, dict): new_val = merge(existing_val, value) else: new_val = _extend([existing_val], value) else: new_val = _extend([existing_val], value) d[key] = new_val return d + + +def normalize_date(date): + """Normalize date fields as expected by swh workers. + + If date is a list, elect arbitrarily the first element of that + list + + If date is (then) a string, parse it through + dateutil.parser.parse to extract a datetime. + + Then normalize it through + swh.model.identifiers.normalize_timestamp. + + Returns + The swh date object + + """ + if isinstance(date, list): + date = date[0] + if isinstance(date, str): + date = iso8601.parse_date(date) + + return normalize_timestamp(date) diff --git a/version.txt b/version.txt index 606ff0c4..f4fa03ef 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -v0.0.67-0-g822ab1d \ No newline at end of file +v0.0.68-0-gd075c56 \ No newline at end of file