diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 73159e53..73a7c98f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,46 +1,40 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v2.4.0 hooks: - id: trailing-whitespace - id: flake8 - id: check-json - id: check-yaml - repo: https://github.com/codespell-project/codespell rev: v1.16.0 hooks: - id: codespell - repo: local hooks: - id: mypy name: mypy entry: env DJANGO_SETTINGS_MODULE=swh.deposit.settings.testing mypy args: [swh] pass_filenames: false language: system types: [python] +- repo: https://github.com/python/black + rev: 19.10b0 + hooks: + - id: black + # unfortunately, we are far from being able to enable this... # - repo: https://github.com/PyCQA/pydocstyle.git # rev: 4.0.0 # hooks: # - id: pydocstyle # name: pydocstyle # description: pydocstyle is a static analysis tool for checking compliance with Python docstring conventions. # entry: pydocstyle --convention=google # language: python # types: [python] -# black requires py3.6+ -#- repo: https://github.com/python/black -# rev: 19.3b0 -# hooks: -# - id: black -# language_version: python3 -#- repo: https://github.com/asottile/blacken-docs -# rev: v1.0.0-1 -# hooks: -# - id: blacken-docs -# additional_dependencies: [black==19.3b0] diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 00000000..8d79b7e6 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,6 @@ +[flake8] +# E203: whitespaces before ':' +# E231: missing whitespace after ',' +# W503: line break before binary operator +ignore = E203,E231,W503 +max-line-length = 88 diff --git a/setup.py b/setup.py index 7d834de8..2ca6eadb 100755 --- a/setup.py +++ b/setup.py @@ -1,76 +1,77 @@ #!/usr/bin/env python3 # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from setuptools import setup, find_packages from os import path from io import open here = path.abspath(path.dirname(__file__)) # Get the long description from the README file -with open(path.join(here, 'README.md'), encoding='utf-8') as f: +with open(path.join(here, "README.md"), encoding="utf-8") as f: long_description = f.read() def parse_requirements(*names): requirements = [] for name in names: if name: - reqf = 'requirements-%s.txt' % name + reqf = "requirements-%s.txt" % name else: - reqf = 'requirements.txt' + reqf = "requirements.txt" if not path.exists(reqf): return requirements with open(reqf) as f: for line in f.readlines(): line = line.strip() - if not line or line.startswith('#'): + if not line or line.startswith("#"): continue requirements.append(line) return requirements setup( - name='swh.deposit', - description='Software Heritage Deposit Server', + name="swh.deposit", + description="Software Heritage Deposit Server", long_description=long_description, - long_description_content_type='text/markdown', - author='Software Heritage developers', - author_email='swh-devel@inria.fr', - url='https://forge.softwareheritage.org/source/swh-deposit/', + long_description_content_type="text/markdown", + author="Software Heritage developers", + author_email="swh-devel@inria.fr", + url="https://forge.softwareheritage.org/source/swh-deposit/", packages=find_packages(), - install_requires=parse_requirements(None, 'swh'), - tests_require=parse_requirements('test'), - setup_requires=['vcversioner'], + install_requires=parse_requirements(None, "swh"), + tests_require=parse_requirements("test"), + setup_requires=["vcversioner"], extras_require={ - 'testing': parse_requirements('test', 'server', 'swh-server'), - 'server': parse_requirements('server', 'swh-server')}, + "testing": parse_requirements("test", "server", "swh-server"), + "server": parse_requirements("server", "swh-server"), + }, vcversioner={}, include_package_data=True, - entry_points=''' + entry_points=""" [console_scripts] swh-deposit=swh.deposit.cli:main [swh.cli.subcommands] deposit=swh.deposit.cli:deposit [swh.workers] deposit.worker=swh.deposit.loader:register - ''', + """, classifiers=[ "Programming Language :: Python :: 3", "Intended Audience :: Developers", "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", "Operating System :: OS Independent", "Development Status :: 5 - Production/Stable", ], project_urls={ - 'Bug Reports': 'https://forge.softwareheritage.org/maniphest', - 'Funding': 'https://www.softwareheritage.org/donate', - 'Source': 'https://forge.softwareheritage.org/source/swh-deposit', + "Bug Reports": "https://forge.softwareheritage.org/maniphest", + "Funding": "https://www.softwareheritage.org/donate", + "Source": "https://forge.softwareheritage.org/source/swh-deposit", }, ) diff --git a/swh/deposit/api/common.py b/swh/deposit/api/common.py index 79560007..014955b3 100644 --- a/swh/deposit/api/common.py +++ b/swh/deposit/api/common.py @@ -1,901 +1,963 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import hashlib from typing import Any, Tuple from abc import ABCMeta, abstractmethod from django.urls import reverse from django.http import HttpResponse from django.shortcuts import render from django.utils import timezone from rest_framework import status from rest_framework.authentication import BasicAuthentication from rest_framework.permissions import IsAuthenticated from rest_framework.views import APIView from swh.model import hashutil from swh.scheduler.utils import create_oneshot_task_dict from ..config import ( - SWHDefaultConfig, EDIT_SE_IRI, EM_IRI, CONT_FILE_IRI, - ARCHIVE_KEY, METADATA_KEY, RAW_METADATA_KEY, STATE_IRI, - DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_PARTIAL, + SWHDefaultConfig, + EDIT_SE_IRI, + EM_IRI, + CONT_FILE_IRI, + ARCHIVE_KEY, + METADATA_KEY, + RAW_METADATA_KEY, + STATE_IRI, + DEPOSIT_STATUS_DEPOSITED, + DEPOSIT_STATUS_PARTIAL, PRIVATE_CHECK_DEPOSIT, - DEPOSIT_STATUS_LOAD_SUCCESS, ARCHIVE_TYPE, METADATA_TYPE + DEPOSIT_STATUS_LOAD_SUCCESS, + ARCHIVE_TYPE, + METADATA_TYPE, ) from ..errors import ( - MAX_UPLOAD_SIZE_EXCEEDED, BAD_REQUEST, ERROR_CONTENT, - CHECKSUM_MISMATCH, make_error_dict, MEDIATION_NOT_ALLOWED, - make_error_response_from_dict, FORBIDDEN, - NOT_FOUND, make_error_response, METHOD_NOT_ALLOWED, - ParserError, PARSING_ERROR -) -from ..models import ( - Deposit, DepositRequest, DepositCollection, - DepositClient + MAX_UPLOAD_SIZE_EXCEEDED, + BAD_REQUEST, + ERROR_CONTENT, + CHECKSUM_MISMATCH, + make_error_dict, + MEDIATION_NOT_ALLOWED, + make_error_response_from_dict, + FORBIDDEN, + NOT_FOUND, + make_error_response, + METHOD_NOT_ALLOWED, + ParserError, + PARSING_ERROR, ) +from ..models import Deposit, DepositRequest, DepositCollection, DepositClient from ..parsers import parse_xml -ACCEPT_PACKAGINGS = ['http://purl.org/net/sword/package/SimpleZip'] -ACCEPT_ARCHIVE_CONTENT_TYPES = ['application/zip', 'application/x-tar'] +ACCEPT_PACKAGINGS = ["http://purl.org/net/sword/package/SimpleZip"] +ACCEPT_ARCHIVE_CONTENT_TYPES = ["application/zip", "application/x-tar"] class SWHAPIView(APIView): """Mixin intended as a based API view to enforce the basic authentication check """ - authentication_classes = (BasicAuthentication, ) # type: Tuple[Any, ...] - permission_classes = (IsAuthenticated, ) + + authentication_classes = (BasicAuthentication,) # type: Tuple[Any, ...] + permission_classes = (IsAuthenticated,) class SWHBaseDeposit(SWHDefaultConfig, SWHAPIView, metaclass=ABCMeta): """Base deposit request class sharing multiple common behaviors. """ def _read_headers(self, req): """Read and unify the necessary headers from the request (those are not stored in the same location or not properly formatted). Args: req (Request): Input request Returns: Dictionary with the following keys (some associated values may be None): - content-type - content-length - in-progress - content-disposition - packaging - slug - on-behalf-of """ meta = req._request.META content_type = req.content_type - content_length = meta.get('CONTENT_LENGTH') + content_length = meta.get("CONTENT_LENGTH") if content_length and isinstance(content_length, str): content_length = int(content_length) # final deposit if not provided - in_progress = meta.get('HTTP_IN_PROGRESS', False) - content_disposition = meta.get('HTTP_CONTENT_DISPOSITION') + in_progress = meta.get("HTTP_IN_PROGRESS", False) + content_disposition = meta.get("HTTP_CONTENT_DISPOSITION") if isinstance(in_progress, str): - in_progress = in_progress.lower() == 'true' + in_progress = in_progress.lower() == "true" - content_md5sum = meta.get('HTTP_CONTENT_MD5') + content_md5sum = meta.get("HTTP_CONTENT_MD5") if content_md5sum: content_md5sum = bytes.fromhex(content_md5sum) - packaging = meta.get('HTTP_PACKAGING') - slug = meta.get('HTTP_SLUG') - on_behalf_of = meta.get('HTTP_ON_BEHALF_OF') - metadata_relevant = meta.get('HTTP_METADATA_RELEVANT') + packaging = meta.get("HTTP_PACKAGING") + slug = meta.get("HTTP_SLUG") + on_behalf_of = meta.get("HTTP_ON_BEHALF_OF") + metadata_relevant = meta.get("HTTP_METADATA_RELEVANT") return { - 'content-type': content_type, - 'content-length': content_length, - 'in-progress': in_progress, - 'content-disposition': content_disposition, - 'content-md5sum': content_md5sum, - 'packaging': packaging, - 'slug': slug, - 'on-behalf-of': on_behalf_of, - 'metadata-relevant': metadata_relevant, + "content-type": content_type, + "content-length": content_length, + "in-progress": in_progress, + "content-disposition": content_disposition, + "content-md5sum": content_md5sum, + "packaging": packaging, + "slug": slug, + "on-behalf-of": on_behalf_of, + "metadata-relevant": metadata_relevant, } def _compute_md5(self, filehandler): """Compute uploaded file's md5 sum. Args: filehandler (InMemoryUploadedFile): the file to compute the md5 hash Returns: the md5 checksum (str) """ h = hashlib.md5() for chunk in filehandler: h.update(chunk) return h.digest() - def _deposit_put(self, req, deposit_id=None, in_progress=False, - external_id=None): + def _deposit_put(self, req, deposit_id=None, in_progress=False, external_id=None): """Save/Update a deposit in db. Args: deposit_id (int): deposit identifier in_progress (dict): The deposit's status external_id (str): The external identifier to associate to the deposit Returns: The Deposit instance saved or updated. """ if in_progress is False: complete_date = timezone.now() status_type = DEPOSIT_STATUS_DEPOSITED else: complete_date = None status_type = DEPOSIT_STATUS_PARTIAL if not deposit_id: try: # find a deposit parent (same external id, status load # to success) - deposit_parent = Deposit.objects.filter( - external_id=external_id, - status=DEPOSIT_STATUS_LOAD_SUCCESS).order_by('-id')[0:1].get() # noqa + deposit_parent = ( + Deposit.objects.filter( + external_id=external_id, status=DEPOSIT_STATUS_LOAD_SUCCESS + ) + .order_by("-id")[0:1] + .get() + ) # noqa except Deposit.DoesNotExist: deposit_parent = None - deposit = Deposit(collection=self._collection, - external_id=external_id, - complete_date=complete_date, - status=status_type, - client=self._client, - parent=deposit_parent) + deposit = Deposit( + collection=self._collection, + external_id=external_id, + complete_date=complete_date, + status=status_type, + client=self._client, + parent=deposit_parent, + ) else: deposit = Deposit.objects.get(pk=deposit_id) # update metadata deposit.complete_date = complete_date deposit.status = status_type - if self.config['checks']: + if self.config["checks"]: deposit.save() # needed to have a deposit id args = [deposit.collection.name, deposit.id] scheduler = self.scheduler - if (deposit.status == DEPOSIT_STATUS_DEPOSITED and - not deposit.check_task_id): + if deposit.status == DEPOSIT_STATUS_DEPOSITED and not deposit.check_task_id: check_url = req.build_absolute_uri( - reverse(PRIVATE_CHECK_DEPOSIT, args=args)) + reverse(PRIVATE_CHECK_DEPOSIT, args=args) + ) task = create_oneshot_task_dict( - 'check-deposit', deposit_check_url=check_url) - check_task_id = scheduler.create_tasks([task])[0]['id'] + "check-deposit", deposit_check_url=check_url + ) + check_task_id = scheduler.create_tasks([task])[0]["id"] deposit.check_task_id = check_task_id deposit.save() return deposit - def _deposit_request_put(self, deposit, deposit_request_data, - replace_metadata=False, replace_archives=False): + def _deposit_request_put( + self, + deposit, + deposit_request_data, + replace_metadata=False, + replace_archives=False, + ): """Save a deposit request with metadata attached to a deposit. Args: deposit (Deposit): The deposit concerned by the request deposit_request_data (dict): The dictionary with at most 2 deposit request types (archive, metadata) to associate to the deposit replace_metadata (bool): Flag defining if we add or update existing metadata to the deposit replace_archives (bool): Flag defining if we add or update archives to existing deposit Returns: None """ if replace_metadata: - DepositRequest.objects.filter( - deposit=deposit, - type=METADATA_TYPE).delete() + DepositRequest.objects.filter(deposit=deposit, type=METADATA_TYPE).delete() if replace_archives: - DepositRequest.objects.filter( - deposit=deposit, - type=ARCHIVE_TYPE).delete() + DepositRequest.objects.filter(deposit=deposit, type=ARCHIVE_TYPE).delete() deposit_request = None archive_file = deposit_request_data.get(ARCHIVE_KEY) if archive_file: deposit_request = DepositRequest( - type=ARCHIVE_TYPE, - deposit=deposit, - archive=archive_file) + type=ARCHIVE_TYPE, deposit=deposit, archive=archive_file + ) deposit_request.save() metadata = deposit_request_data.get(METADATA_KEY) if metadata: raw_metadata = deposit_request_data.get(RAW_METADATA_KEY) deposit_request = DepositRequest( type=METADATA_TYPE, deposit=deposit, metadata=metadata, - raw_metadata=raw_metadata.decode('utf-8')) + raw_metadata=raw_metadata.decode("utf-8"), + ) deposit_request.save() assert deposit_request is not None def _delete_archives(self, collection_name, deposit_id): """Delete archives reference from the deposit id. """ try: deposit = Deposit.objects.get(pk=deposit_id) except Deposit.DoesNotExist: return make_error_dict( - NOT_FOUND, - 'The deposit %s does not exist' % deposit_id) - DepositRequest.objects.filter( - deposit=deposit, - type=ARCHIVE_TYPE).delete() + NOT_FOUND, "The deposit %s does not exist" % deposit_id + ) + DepositRequest.objects.filter(deposit=deposit, type=ARCHIVE_TYPE).delete() return {} def _delete_deposit(self, collection_name, deposit_id): """Delete deposit reference. Args: collection_name (str): Client's name deposit_id (id): The deposit to delete Returns Empty dict when ok. Dict with error key to describe the failure. """ try: deposit = Deposit.objects.get(pk=deposit_id) except Deposit.DoesNotExist: return make_error_dict( - NOT_FOUND, - 'The deposit %s does not exist' % deposit_id) + NOT_FOUND, "The deposit %s does not exist" % deposit_id + ) if deposit.collection.name != collection_name: - summary = 'Cannot delete a deposit from another collection' + summary = "Cannot delete a deposit from another collection" description = "Deposit %s does not belong to the collection %s" % ( - deposit_id, collection_name) + deposit_id, + collection_name, + ) return make_error_dict( - BAD_REQUEST, - summary=summary, - verbose_description=description) + BAD_REQUEST, summary=summary, verbose_description=description + ) DepositRequest.objects.filter(deposit=deposit).delete() deposit.delete() return {} - def _check_preconditions_on(self, filehandler, md5sum, - content_length=None): + def _check_preconditions_on(self, filehandler, md5sum, content_length=None): """Check preconditions on provided file are respected. That is the length and/or the md5sum hash match the file's content. Args: filehandler (InMemoryUploadedFile): The file to check md5sum (hex str): md5 hash expected from the file's content content_length (int): the expected length if provided. Returns: Either none if no error or a dictionary with a key error detailing the problem. """ if content_length: - if content_length > self.config['max_upload_size']: + if content_length > self.config["max_upload_size"]: return make_error_dict( MAX_UPLOAD_SIZE_EXCEEDED, - 'Upload size limit exceeded (max %s bytes).' % - self.config['max_upload_size'], - 'Please consider sending the archive in ' - 'multiple steps.') + "Upload size limit exceeded (max %s bytes)." + % self.config["max_upload_size"], + "Please consider sending the archive in " "multiple steps.", + ) length = filehandler.size if length != content_length: - return make_error_dict(status.HTTP_412_PRECONDITION_FAILED, - 'Wrong length') + return make_error_dict( + status.HTTP_412_PRECONDITION_FAILED, "Wrong length" + ) if md5sum: _md5sum = self._compute_md5(filehandler) if _md5sum != md5sum: return make_error_dict( CHECKSUM_MISMATCH, - 'Wrong md5 hash', - 'The checksum sent %s and the actual checksum ' - '%s does not match.' % (hashutil.hash_to_hex(md5sum), - hashutil.hash_to_hex(_md5sum))) + "Wrong md5 hash", + "The checksum sent %s and the actual checksum " + "%s does not match." + % (hashutil.hash_to_hex(md5sum), hashutil.hash_to_hex(_md5sum)), + ) return None - def _binary_upload(self, req, headers, collection_name, deposit_id=None, - replace_metadata=False, replace_archives=False): + def _binary_upload( + self, + req, + headers, + collection_name, + deposit_id=None, + replace_metadata=False, + replace_archives=False, + ): """Binary upload routine. Other than such a request, a 415 response is returned. Args: req (Request): the request holding information to parse and inject in db headers (dict): request headers formatted collection_name (str): the associated client deposit_id (id): deposit identifier if provided replace_metadata (bool): 'Update or add' request to existing deposit. If False (default), this adds new metadata request to existing ones. Otherwise, this will replace existing metadata. replace_archives (bool): 'Update or add' request to existing deposit. If False (default), this adds new archive request to existing ones. Otherwise, this will replace existing archives. ones. Returns: In the optimal case a dict with the following keys: - deposit_id (int): Deposit identifier - deposit_date (date): Deposit date - archive: None (no archive is provided here) Otherwise, a dictionary with the key error and the associated failures, either: - 400 (bad request) if the request is not providing an external identifier - 413 (request entity too large) if the length of the archive exceeds the max size configured - 412 (precondition failed) if the length or md5 hash provided mismatch the reality of the archive - 415 (unsupported media type) if a wrong media type is provided """ - content_length = headers['content-length'] + content_length = headers["content-length"] if not content_length: return make_error_dict( BAD_REQUEST, - 'CONTENT_LENGTH header is mandatory', - 'For archive deposit, the ' - 'CONTENT_LENGTH header must be sent.') + "CONTENT_LENGTH header is mandatory", + "For archive deposit, the " "CONTENT_LENGTH header must be sent.", + ) - content_disposition = headers['content-disposition'] + content_disposition = headers["content-disposition"] if not content_disposition: return make_error_dict( BAD_REQUEST, - 'CONTENT_DISPOSITION header is mandatory', - 'For archive deposit, the ' - 'CONTENT_DISPOSITION header must be sent.') + "CONTENT_DISPOSITION header is mandatory", + "For archive deposit, the " "CONTENT_DISPOSITION header must be sent.", + ) - packaging = headers['packaging'] + packaging = headers["packaging"] if packaging and packaging not in ACCEPT_PACKAGINGS: return make_error_dict( BAD_REQUEST, - 'Only packaging %s is supported' % - ACCEPT_PACKAGINGS, - 'The packaging provided %s is not supported' % packaging) + "Only packaging %s is supported" % ACCEPT_PACKAGINGS, + "The packaging provided %s is not supported" % packaging, + ) - filehandler = req.FILES['file'] + filehandler = req.FILES["file"] precondition_status_response = self._check_preconditions_on( - filehandler, headers['content-md5sum'], content_length) + filehandler, headers["content-md5sum"], content_length + ) if precondition_status_response: return precondition_status_response - external_id = headers['slug'] + external_id = headers["slug"] # actual storage of data archive_metadata = filehandler - deposit = self._deposit_put(req, deposit_id=deposit_id, - in_progress=headers['in-progress'], - external_id=external_id) + deposit = self._deposit_put( + req, + deposit_id=deposit_id, + in_progress=headers["in-progress"], + external_id=external_id, + ) self._deposit_request_put( - deposit, {ARCHIVE_KEY: archive_metadata}, + deposit, + {ARCHIVE_KEY: archive_metadata}, replace_metadata=replace_metadata, - replace_archives=replace_archives) + replace_archives=replace_archives, + ) return { - 'deposit_id': deposit.id, - 'deposit_date': deposit.reception_date, - 'status': deposit.status, - 'archive': filehandler.name, + "deposit_id": deposit.id, + "deposit_date": deposit.reception_date, + "status": deposit.status, + "archive": filehandler.name, } def _read_metadata(self, metadata_stream): """Given a metadata stream, reads the metadata and returns both the parsed and the raw metadata. """ raw_metadata = metadata_stream.read() metadata = parse_xml(raw_metadata) return raw_metadata, metadata - def _multipart_upload(self, req, headers, collection_name, - deposit_id=None, replace_metadata=False, - replace_archives=False): + def _multipart_upload( + self, + req, + headers, + collection_name, + deposit_id=None, + replace_metadata=False, + replace_archives=False, + ): """Multipart upload supported with exactly: - 1 archive (zip) - 1 atom entry Other than such a request, a 415 response is returned. Args: req (Request): the request holding information to parse and inject in db headers (dict): request headers formatted collection_name (str): the associated client deposit_id (id): deposit identifier if provided replace_metadata (bool): 'Update or add' request to existing deposit. If False (default), this adds new metadata request to existing ones. Otherwise, this will replace existing metadata. replace_archives (bool): 'Update or add' request to existing deposit. If False (default), this adds new archive request to existing ones. Otherwise, this will replace existing archives. ones. Returns: In the optimal case a dict with the following keys: - deposit_id (int): Deposit identifier - deposit_date (date): Deposit date - archive: None (no archive is provided here) Otherwise, a dictionary with the key error and the associated failures, either: - 400 (bad request) if the request is not providing an external identifier - 412 (precondition failed) if the potentially md5 hash provided mismatch the reality of the archive - 413 (request entity too large) if the length of the archive exceeds the max size configured - 415 (unsupported media type) if a wrong media type is provided """ - external_id = headers['slug'] + external_id = headers["slug"] content_types_present = set() data = { - 'application/zip': None, # expected either zip - 'application/x-tar': None, # or x-tar - 'application/atom+xml': None, + "application/zip": None, # expected either zip + "application/x-tar": None, # or x-tar + "application/atom+xml": None, } for key, value in req.FILES.items(): fh = value if fh.content_type in content_types_present: return make_error_dict( ERROR_CONTENT, - 'Only 1 application/zip (or application/x-tar) archive ' - 'and 1 atom+xml entry is supported (as per sword2.0 ' - 'specification)', - 'You provided more than 1 application/(zip|x-tar) ' - 'or more than 1 application/atom+xml content-disposition ' - 'header in the multipart deposit') + "Only 1 application/zip (or application/x-tar) archive " + "and 1 atom+xml entry is supported (as per sword2.0 " + "specification)", + "You provided more than 1 application/(zip|x-tar) " + "or more than 1 application/atom+xml content-disposition " + "header in the multipart deposit", + ) content_types_present.add(fh.content_type) data[fh.content_type] = fh if len(content_types_present) != 2: return make_error_dict( ERROR_CONTENT, - 'You must provide both 1 application/zip (or ' - 'application/x-tar) and 1 atom+xml entry for multipart ' - 'deposit', - 'You need to provide only 1 application/(zip|x-tar) ' - 'and 1 application/atom+xml content-disposition header ' - 'in the multipart deposit') - - filehandler = data['application/zip'] + "You must provide both 1 application/zip (or " + "application/x-tar) and 1 atom+xml entry for multipart " + "deposit", + "You need to provide only 1 application/(zip|x-tar) " + "and 1 application/atom+xml content-disposition header " + "in the multipart deposit", + ) + + filehandler = data["application/zip"] if not filehandler: - filehandler = data['application/x-tar'] + filehandler = data["application/x-tar"] precondition_status_response = self._check_preconditions_on( - filehandler, - headers['content-md5sum']) + filehandler, headers["content-md5sum"] + ) if precondition_status_response: return precondition_status_response try: - raw_metadata, metadata = self._read_metadata( - data['application/atom+xml']) + raw_metadata, metadata = self._read_metadata(data["application/atom+xml"]) except ParserError: return make_error_dict( PARSING_ERROR, - 'Malformed xml metadata', + "Malformed xml metadata", "The xml received is malformed. " - "Please ensure your metadata file is correctly formatted.") + "Please ensure your metadata file is correctly formatted.", + ) # actual storage of data - deposit = self._deposit_put(req, deposit_id=deposit_id, - in_progress=headers['in-progress'], - external_id=external_id) + deposit = self._deposit_put( + req, + deposit_id=deposit_id, + in_progress=headers["in-progress"], + external_id=external_id, + ) deposit_request_data = { ARCHIVE_KEY: filehandler, METADATA_KEY: metadata, RAW_METADATA_KEY: raw_metadata, } self._deposit_request_put( - deposit, deposit_request_data, replace_metadata, replace_archives) + deposit, deposit_request_data, replace_metadata, replace_archives + ) return { - 'deposit_id': deposit.id, - 'deposit_date': deposit.reception_date, - 'archive': filehandler.name, - 'status': deposit.status, + "deposit_id": deposit.id, + "deposit_date": deposit.reception_date, + "archive": filehandler.name, + "status": deposit.status, } - def _atom_entry(self, req, headers, collection_name, - deposit_id=None, - replace_metadata=False, - replace_archives=False): + def _atom_entry( + self, + req, + headers, + collection_name, + deposit_id=None, + replace_metadata=False, + replace_archives=False, + ): """Atom entry deposit. Args: req (Request): the request holding information to parse and inject in db headers (dict): request headers formatted collection_name (str): the associated client deposit_id (id): deposit identifier if provided replace_metadata (bool): 'Update or add' request to existing deposit. If False (default), this adds new metadata request to existing ones. Otherwise, this will replace existing metadata. replace_archives (bool): 'Update or add' request to existing deposit. If False (default), this adds new archive request to existing ones. Otherwise, this will replace existing archives. ones. Returns: In the optimal case a dict with the following keys: - deposit_id: deposit id associated to the deposit - deposit_date: date of the deposit - archive: None (no archive is provided here) Otherwise, a dictionary with the key error and the associated failures, either: - 400 (bad request) if the request is not providing an external identifier - 400 (bad request) if the request's body is empty - 415 (unsupported media type) if a wrong media type is provided """ try: raw_metadata, metadata = self._read_metadata(req.data) except ParserError: return make_error_dict( BAD_REQUEST, - 'Malformed xml metadata', + "Malformed xml metadata", "The xml received is malformed. " - "Please ensure your metadata file is correctly formatted.") + "Please ensure your metadata file is correctly formatted.", + ) if not metadata: return make_error_dict( BAD_REQUEST, - 'Empty body request is not supported', - 'Atom entry deposit is supposed to send for metadata. ' - 'If the body is empty, there is no metadata.') + "Empty body request is not supported", + "Atom entry deposit is supposed to send for metadata. " + "If the body is empty, there is no metadata.", + ) - external_id = metadata.get('external_identifier', headers['slug']) + external_id = metadata.get("external_identifier", headers["slug"]) - deposit = self._deposit_put(req, deposit_id=deposit_id, - in_progress=headers['in-progress'], - external_id=external_id) + deposit = self._deposit_put( + req, + deposit_id=deposit_id, + in_progress=headers["in-progress"], + external_id=external_id, + ) self._deposit_request_put( - deposit, {METADATA_KEY: metadata, RAW_METADATA_KEY: raw_metadata}, - replace_metadata, replace_archives) + deposit, + {METADATA_KEY: metadata, RAW_METADATA_KEY: raw_metadata}, + replace_metadata, + replace_archives, + ) return { - 'deposit_id': deposit.id, - 'deposit_date': deposit.reception_date, - 'archive': None, - 'status': deposit.status, + "deposit_id": deposit.id, + "deposit_date": deposit.reception_date, + "archive": None, + "status": deposit.status, } def _empty_post(self, req, headers, collection_name, deposit_id): """Empty post to finalize an empty deposit. Args: req (Request): the request holding information to parse and inject in db headers (dict): request headers formatted collection_name (str): the associated client deposit_id (id): deposit identifier Returns: Dictionary of result with the deposit's id, the date it was completed and no archive. """ deposit = Deposit.objects.get(pk=deposit_id) deposit.complete_date = timezone.now() deposit.status = DEPOSIT_STATUS_DEPOSITED deposit.save() return { - 'deposit_id': deposit_id, - 'deposit_date': deposit.complete_date, - 'status': deposit.status, - 'archive': None, + "deposit_id": deposit_id, + "deposit_date": deposit.complete_date, + "status": deposit.status, + "archive": None, } def _make_iris(self, req, collection_name, deposit_id): """Define the IRI endpoints Args: req (Request): The initial request collection_name (str): client/collection's name deposit_id (id): Deposit identifier Returns: Dictionary of keys with the iris' urls. """ args = [collection_name, deposit_id] return { iri: req.build_absolute_uri(reverse(iri, args=args)) for iri in [EM_IRI, EDIT_SE_IRI, CONT_FILE_IRI, STATE_IRI] } - def additional_checks(self, req, headers, collection_name, - deposit_id=None): + def additional_checks(self, req, headers, collection_name, deposit_id=None): """Permit the child class to enrich additional checks. Returns: dict with 'error' detailing the problem. """ return {} def checks(self, req, collection_name, deposit_id=None): try: - self._collection = DepositCollection.objects.get( - name=collection_name) + self._collection = DepositCollection.objects.get(name=collection_name) except DepositCollection.DoesNotExist: return make_error_dict( - NOT_FOUND, - 'Unknown collection name %s' % collection_name) + NOT_FOUND, "Unknown collection name %s" % collection_name + ) username = req.user.username if username: # unauthenticated request can have the username empty try: self._client = DepositClient.objects.get(username=username) except DepositClient.DoesNotExist: - return make_error_dict(NOT_FOUND, - 'Unknown client name %s' % username) + return make_error_dict(NOT_FOUND, "Unknown client name %s" % username) if self._collection.id not in self._client.collections: return make_error_dict( FORBIDDEN, - 'Client %s cannot access collection %s' % ( - username, collection_name)) + "Client %s cannot access collection %s" + % (username, collection_name), + ) if deposit_id: try: deposit = Deposit.objects.get(pk=deposit_id) except Deposit.DoesNotExist: return make_error_dict( - NOT_FOUND, - 'Deposit with id %s does not exist' % - deposit_id) + NOT_FOUND, "Deposit with id %s does not exist" % deposit_id + ) checks = self.restrict_access(req, deposit) if checks: return checks headers = self._read_headers(req) - if headers['on-behalf-of']: - return make_error_dict(MEDIATION_NOT_ALLOWED, - 'Mediation is not supported.') + if headers["on-behalf-of"]: + return make_error_dict(MEDIATION_NOT_ALLOWED, "Mediation is not supported.") - checks = self.additional_checks(req, headers, - collection_name, deposit_id) - if 'error' in checks: + checks = self.additional_checks(req, headers, collection_name, deposit_id) + if "error" in checks: return checks - return {'headers': headers} + return {"headers": headers} def restrict_access(self, req, deposit=None): if deposit: - if (req.method != 'GET' and - deposit.status != DEPOSIT_STATUS_PARTIAL): + if req.method != "GET" and deposit.status != DEPOSIT_STATUS_PARTIAL: summary = "You can only act on deposit with status '%s'" % ( - DEPOSIT_STATUS_PARTIAL, ) + DEPOSIT_STATUS_PARTIAL, + ) description = "This deposit has status '%s'" % deposit.status return make_error_dict( - BAD_REQUEST, summary=summary, - verbose_description=description) + BAD_REQUEST, summary=summary, verbose_description=description + ) def _basic_not_allowed_method(self, req, method): return make_error_response( - req, METHOD_NOT_ALLOWED, - '%s method is not supported on this endpoint' % method) + req, + METHOD_NOT_ALLOWED, + "%s method is not supported on this endpoint" % method, + ) def get(self, req, *args, **kwargs): - return self._basic_not_allowed_method(req, 'GET') + return self._basic_not_allowed_method(req, "GET") def post(self, req, *args, **kwargs): - return self._basic_not_allowed_method(req, 'POST') + return self._basic_not_allowed_method(req, "POST") def put(self, req, *args, **kwargs): - return self._basic_not_allowed_method(req, 'PUT') + return self._basic_not_allowed_method(req, "PUT") def delete(self, req, *args, **kwargs): - return self._basic_not_allowed_method(req, 'DELETE') + return self._basic_not_allowed_method(req, "DELETE") class SWHGetDepositAPI(SWHBaseDeposit, metaclass=ABCMeta): """Mixin for class to support GET method. """ + def get(self, req, collection_name, deposit_id, format=None): """Endpoint to create/add resources to deposit. Returns: 200 response when no error during routine occurred 400 if the deposit does not belong to the collection 404 if the deposit or the collection does not exist """ checks = self.checks(req, collection_name, deposit_id) - if 'error' in checks: - return make_error_response_from_dict(req, checks['error']) + if "error" in checks: + return make_error_response_from_dict(req, checks["error"]) - r = self.process_get( - req, collection_name, deposit_id) + r = self.process_get(req, collection_name, deposit_id) if isinstance(r, tuple): status, content, content_type = r - return HttpResponse(content, - status=status, - content_type=content_type) + return HttpResponse(content, status=status, content_type=content_type) return r @abstractmethod def process_get(self, req, collection_name, deposit_id): """Routine to deal with the deposit's get processing. Returns: Tuple status, stream of content, content-type """ pass class SWHPostDepositAPI(SWHBaseDeposit, metaclass=ABCMeta): """Mixin for class to support DELETE method. """ + def post(self, req, collection_name, deposit_id=None, format=None): """Endpoint to create/add resources to deposit. Returns: 204 response when no error during routine occurred. 400 if the deposit does not belong to the collection 404 if the deposit or the collection does not exist """ checks = self.checks(req, collection_name, deposit_id) - if 'error' in checks: - return make_error_response_from_dict(req, checks['error']) + if "error" in checks: + return make_error_response_from_dict(req, checks["error"]) - headers = checks['headers'] + headers = checks["headers"] _status, _iri_key, data = self.process_post( - req, headers, collection_name, deposit_id) + req, headers, collection_name, deposit_id + ) - error = data.get('error') + error = data.get("error") if error: return make_error_response_from_dict(req, error) - data['packagings'] = ACCEPT_PACKAGINGS - iris = self._make_iris(req, collection_name, data['deposit_id']) + data["packagings"] = ACCEPT_PACKAGINGS + iris = self._make_iris(req, collection_name, data["deposit_id"]) data.update(iris) - response = render(req, 'deposit/deposit_receipt.xml', - context=data, - content_type='application/xml', - status=_status) - response._headers['location'] = 'Location', data[_iri_key] + response = render( + req, + "deposit/deposit_receipt.xml", + context=data, + content_type="application/xml", + status=_status, + ) + response._headers["location"] = "Location", data[_iri_key] return response @abstractmethod def process_post(self, req, headers, collection_name, deposit_id=None): """Routine to deal with the deposit's processing. Returns Tuple of: - response status code (200, 201, etc...) - key iri (EM_IRI, EDIT_SE_IRI, etc...) - dictionary of the processing result """ pass class SWHPutDepositAPI(SWHBaseDeposit, metaclass=ABCMeta): """Mixin for class to support PUT method. """ + def put(self, req, collection_name, deposit_id, format=None): """Endpoint to update deposit resources. Returns: 204 response when no error during routine occurred. 400 if the deposit does not belong to the collection 404 if the deposit or the collection does not exist """ checks = self.checks(req, collection_name, deposit_id) - if 'error' in checks: - return make_error_response_from_dict(req, checks['error']) + if "error" in checks: + return make_error_response_from_dict(req, checks["error"]) - headers = checks['headers'] + headers = checks["headers"] data = self.process_put(req, headers, collection_name, deposit_id) - error = data.get('error') + error = data.get("error") if error: return make_error_response_from_dict(req, error) return HttpResponse(status=status.HTTP_204_NO_CONTENT) @abstractmethod def process_put(self, req, headers, collection_name, deposit_id): """Routine to deal with updating a deposit in some way. Returns dictionary of the processing result """ pass class SWHDeleteDepositAPI(SWHBaseDeposit, metaclass=ABCMeta): """Mixin for class to support DELETE method. """ + def delete(self, req, collection_name, deposit_id): """Endpoint to delete some deposit's resources (archives, deposit). Returns: 204 response when no error during routine occurred. 400 if the deposit does not belong to the collection 404 if the deposit or the collection does not exist """ checks = self.checks(req, collection_name, deposit_id) - if 'error' in checks: - return make_error_response_from_dict(req, checks['error']) + if "error" in checks: + return make_error_response_from_dict(req, checks["error"]) data = self.process_delete(req, collection_name, deposit_id) - error = data.get('error') + error = data.get("error") if error: return make_error_response_from_dict(req, error) return HttpResponse(status=status.HTTP_204_NO_CONTENT) @abstractmethod def process_delete(self, req, collection_name, deposit_id): """Routine to delete a resource. This is mostly not allowed except for the EM_IRI (cf. .api.deposit_update.SWHUpdateArchiveDeposit) """ pass diff --git a/swh/deposit/api/converters.py b/swh/deposit/api/converters.py index 6707f9ba..85943446 100644 --- a/swh/deposit/api/converters.py +++ b/swh/deposit/api/converters.py @@ -1,57 +1,57 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information def convert_status_detail(status_detail): """Given a status_detail dict, transforms it into a human readable string. Dict has the following form (all first level keys are optional):: { 'url': { 'summary': "summary-string", 'fields': [impacted-fields-list] }, 'metadata': [{ 'summary': "summary-string", 'fields': [impacted-fields-list], }], 'archive': [{ 'summary': "summary-string", 'fields': [impacted-fields-list], }] } Args: status_detail (dict): The status detail dict with the syntax mentioned Returns: the status detail as inlined string """ if not status_detail: return None def _str_fields(data): - fields = data.get('fields') + fields = data.get("fields") if not fields: - return '' - return ' (%s)' % ', '.join(map(str, fields)) + return "" + return " (%s)" % ", ".join(map(str, fields)) msg = [] - for key in ['metadata', 'archive']: + for key in ["metadata", "archive"]: _detail = status_detail.get(key) if _detail: for data in _detail: - msg.append('- %s%s\n' % (data['summary'], _str_fields(data))) + msg.append("- %s%s\n" % (data["summary"], _str_fields(data))) - _detail = status_detail.get('url') + _detail = status_detail.get("url") if _detail: - msg.append('- %s%s\n' % (_detail['summary'], _str_fields(_detail))) + msg.append("- %s%s\n" % (_detail["summary"], _str_fields(_detail))) if not msg: return None - return ''.join(msg) + return "".join(msg) diff --git a/swh/deposit/api/deposit.py b/swh/deposit/api/deposit.py index fb7ec49a..c5258081 100644 --- a/swh/deposit/api/deposit.py +++ b/swh/deposit/api/deposit.py @@ -1,93 +1,95 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from rest_framework import status from .common import SWHPostDepositAPI, ACCEPT_ARCHIVE_CONTENT_TYPES from ..config import EDIT_SE_IRI from ..errors import make_error_dict, BAD_REQUEST from ..parsers import SWHFileUploadZipParser, SWHFileUploadTarParser from ..parsers import SWHAtomEntryParser from ..parsers import SWHMultiPartParser class SWHDeposit(SWHPostDepositAPI): """Deposit request class defining api endpoints for sword deposit. What's known as 'Col IRI' in the sword specification. HTTP verbs supported: POST """ - parser_classes = (SWHMultiPartParser, - SWHFileUploadZipParser, - SWHFileUploadTarParser, - SWHAtomEntryParser) - - def additional_checks(self, req, headers, collection_name, - deposit_id=None): - slug = headers['slug'] + + parser_classes = ( + SWHMultiPartParser, + SWHFileUploadZipParser, + SWHFileUploadTarParser, + SWHAtomEntryParser, + ) + + def additional_checks(self, req, headers, collection_name, deposit_id=None): + slug = headers["slug"] if not slug: - msg = 'Missing SLUG header in request' - verbose_description = 'Provide in the SLUG header one identifier, for example the url pointing to the resource you are depositing.' # noqa + msg = "Missing SLUG header in request" + verbose_description = "Provide in the SLUG header one identifier, for example the url pointing to the resource you are depositing." # noqa return make_error_dict(BAD_REQUEST, msg, verbose_description) return {} def process_post(self, req, headers, collection_name, deposit_id=None): """Create a first deposit as: - archive deposit (1 zip) - multipart (1 zip + 1 atom entry) - atom entry Args: req (Request): the request holding the information to parse and inject in db collection_name (str): the associated client Returns: An http response (HttpResponse) according to the situation. If everything is ok, a 201 response (created) with a deposit receipt. Otherwise, depending on the upload, the following errors can be returned: - archive deposit: - 400 (bad request) if the request is not providing an external identifier - 403 (forbidden) if the length of the archive exceeds the max size configured - 412 (precondition failed) if the length or hash provided mismatch the reality of the archive. - 415 (unsupported media type) if a wrong media type is provided - multipart deposit: - 400 (bad request) if the request is not providing an external identifier - 412 (precondition failed) if the potentially md5 hash provided mismatch the reality of the archive - 415 (unsupported media type) if a wrong media type is provided - Atom entry deposit: - 400 (bad request) if the request is not providing an external identifier - 400 (bad request) if the request's body is empty - 415 (unsupported media type) if a wrong media type is provided """ assert deposit_id is None if req.content_type in ACCEPT_ARCHIVE_CONTENT_TYPES: data = self._binary_upload(req, headers, collection_name) - elif req.content_type.startswith('multipart/'): + elif req.content_type.startswith("multipart/"): data = self._multipart_upload(req, headers, collection_name) else: data = self._atom_entry(req, headers, collection_name) return status.HTTP_201_CREATED, EDIT_SE_IRI, data diff --git a/swh/deposit/api/deposit_content.py b/swh/deposit/api/deposit_content.py index ecddd912..a5065ae6 100644 --- a/swh/deposit/api/deposit_content.py +++ b/swh/deposit/api/deposit_content.py @@ -1,42 +1,47 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.shortcuts import render from rest_framework import status from .common import SWHBaseDeposit from ..errors import NOT_FOUND, make_error_response from ..errors import make_error_response_from_dict from ..models import DEPOSIT_STATUS_DETAIL, Deposit, DepositRequest class SWHDepositContent(SWHBaseDeposit): def get(self, req, collection_name, deposit_id, format=None): checks = self.checks(req, collection_name, deposit_id) - if 'error' in checks: - return make_error_response_from_dict(req, checks['error']) + if "error" in checks: + return make_error_response_from_dict(req, checks["error"]) try: deposit = Deposit.objects.get(pk=deposit_id) if deposit.collection.name != collection_name: raise Deposit.DoesNotExist except Deposit.DoesNotExist: return make_error_response( - req, NOT_FOUND, - 'deposit %s does not belong to collection %s' % ( - deposit_id, collection_name)) + req, + NOT_FOUND, + "deposit %s does not belong to collection %s" + % (deposit_id, collection_name), + ) requests = DepositRequest.objects.filter(deposit=deposit) context = { - 'deposit_id': deposit.id, - 'status': deposit.status, - 'status_detail': DEPOSIT_STATUS_DETAIL[deposit.status], - 'requests': requests, + "deposit_id": deposit.id, + "status": deposit.status, + "status_detail": DEPOSIT_STATUS_DETAIL[deposit.status], + "requests": requests, } - return render(req, 'deposit/content.xml', - context=context, - content_type='application/xml', - status=status.HTTP_200_OK) + return render( + req, + "deposit/content.xml", + context=context, + content_type="application/xml", + status=status.HTTP_200_OK, + ) diff --git a/swh/deposit/api/deposit_status.py b/swh/deposit/api/deposit_status.py index f800bd41..84687642 100644 --- a/swh/deposit/api/deposit_status.py +++ b/swh/deposit/api/deposit_status.py @@ -1,55 +1,67 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.shortcuts import render from rest_framework import status from .common import SWHBaseDeposit from .converters import convert_status_detail from ..errors import NOT_FOUND, make_error_response from ..errors import make_error_response_from_dict from ..models import DEPOSIT_STATUS_DETAIL, Deposit class SWHDepositStatus(SWHBaseDeposit): """Deposit status. What's known as 'State IRI' in the sword specification. HTTP verbs supported: GET """ + def get(self, req, collection_name, deposit_id, format=None): checks = self.checks(req, collection_name, deposit_id) - if 'error' in checks: - return make_error_response_from_dict(req, checks['error']) + if "error" in checks: + return make_error_response_from_dict(req, checks["error"]) try: deposit = Deposit.objects.get(pk=deposit_id) if deposit.collection.name != collection_name: raise Deposit.DoesNotExist except Deposit.DoesNotExist: return make_error_response( - req, NOT_FOUND, - 'deposit %s does not belong to collection %s' % ( - deposit_id, collection_name)) + req, + NOT_FOUND, + "deposit %s does not belong to collection %s" + % (deposit_id, collection_name), + ) status_detail = convert_status_detail(deposit.status_detail) if not status_detail: status_detail = DEPOSIT_STATUS_DETAIL[deposit.status] context = { - 'deposit_id': deposit.id, - 'status_detail': status_detail, + "deposit_id": deposit.id, + "status_detail": status_detail, } - keys = ('status', 'swh_id', 'swh_id_context', 'swh_anchor_id', - 'swh_anchor_id_context', 'external_id') + keys = ( + "status", + "swh_id", + "swh_id_context", + "swh_anchor_id", + "swh_anchor_id_context", + "external_id", + ) for k in keys: context[k] = getattr(deposit, k, None) - return render(req, 'deposit/status.xml', - context=context, - content_type='application/xml', - status=status.HTTP_200_OK) + return render( + req, + "deposit/status.xml", + context=context, + content_type="application/xml", + status=status.HTTP_200_OK, + ) diff --git a/swh/deposit/api/deposit_update.py b/swh/deposit/api/deposit_update.py index 109902a7..21707415 100644 --- a/swh/deposit/api/deposit_update.py +++ b/swh/deposit/api/deposit_update.py @@ -1,148 +1,169 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from rest_framework import status from .common import SWHPostDepositAPI, SWHPutDepositAPI, SWHDeleteDepositAPI from .common import ACCEPT_ARCHIVE_CONTENT_TYPES from ..config import CONT_FILE_IRI, EDIT_SE_IRI, EM_IRI from ..errors import make_error_dict, BAD_REQUEST from ..parsers import SWHFileUploadZipParser, SWHFileUploadTarParser from ..parsers import SWHAtomEntryParser from ..parsers import SWHMultiPartParser -class SWHUpdateArchiveDeposit(SWHPostDepositAPI, SWHPutDepositAPI, - SWHDeleteDepositAPI): +class SWHUpdateArchiveDeposit(SWHPostDepositAPI, SWHPutDepositAPI, SWHDeleteDepositAPI): """Deposit request class defining api endpoints for sword deposit. What's known as 'EM IRI' in the sword specification. HTTP verbs supported: PUT, POST, DELETE """ - parser_classes = (SWHFileUploadZipParser, SWHFileUploadTarParser, ) + + parser_classes = ( + SWHFileUploadZipParser, + SWHFileUploadTarParser, + ) def process_put(self, req, headers, collection_name, deposit_id): """Replace existing content for the existing deposit. source: http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_editingcontent_binary # noqa Returns: 204 No content """ if req.content_type not in ACCEPT_ARCHIVE_CONTENT_TYPES: - msg = 'Packaging format supported is restricted to %s' % ( - ', '.join(ACCEPT_ARCHIVE_CONTENT_TYPES)) + msg = "Packaging format supported is restricted to %s" % ( + ", ".join(ACCEPT_ARCHIVE_CONTENT_TYPES) + ) return make_error_dict(BAD_REQUEST, msg) - return self._binary_upload(req, headers, collection_name, - deposit_id=deposit_id, - replace_archives=True) + return self._binary_upload( + req, headers, collection_name, deposit_id=deposit_id, replace_archives=True + ) def process_post(self, req, headers, collection_name, deposit_id): """Add new content to the existing deposit. source: http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_addingcontent_mediaresource # noqa Returns: 201 Created Headers: Location: [Cont-File-IRI] Body: [optional Deposit Receipt] """ if req.content_type not in ACCEPT_ARCHIVE_CONTENT_TYPES: - msg = 'Packaging format supported is restricted to %s' % ( - ', '.join(ACCEPT_ARCHIVE_CONTENT_TYPES)) - return 'unused', 'unused', make_error_dict(BAD_REQUEST, msg) + msg = "Packaging format supported is restricted to %s" % ( + ", ".join(ACCEPT_ARCHIVE_CONTENT_TYPES) + ) + return "unused", "unused", make_error_dict(BAD_REQUEST, msg) - return (status.HTTP_201_CREATED, CONT_FILE_IRI, - self._binary_upload(req, headers, collection_name, deposit_id)) + return ( + status.HTTP_201_CREATED, + CONT_FILE_IRI, + self._binary_upload(req, headers, collection_name, deposit_id), + ) def process_delete(self, req, collection_name, deposit_id): """Delete content (archives) from existing deposit. source: http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_deletingcontent # noqa Returns: 204 Created """ return self._delete_archives(collection_name, deposit_id) -class SWHUpdateMetadataDeposit(SWHPostDepositAPI, SWHPutDepositAPI, - SWHDeleteDepositAPI): +class SWHUpdateMetadataDeposit( + SWHPostDepositAPI, SWHPutDepositAPI, SWHDeleteDepositAPI +): """Deposit request class defining api endpoints for sword deposit. What's known as 'Edit IRI' (and SE IRI) in the sword specification. HTTP verbs supported: POST (SE IRI), PUT (Edit IRI), DELETE """ + parser_classes = (SWHMultiPartParser, SWHAtomEntryParser) def process_put(self, req, headers, collection_name, deposit_id): """Replace existing deposit's metadata/archive with new ones. source: - http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_editingcontent_metadata # noqa - http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_editingcontent_multipart # noqa Returns: 204 No content """ - if req.content_type.startswith('multipart/'): - return self._multipart_upload(req, headers, collection_name, - deposit_id=deposit_id, - replace_archives=True, - replace_metadata=True) - return self._atom_entry(req, headers, collection_name, - deposit_id=deposit_id, replace_metadata=True) + if req.content_type.startswith("multipart/"): + return self._multipart_upload( + req, + headers, + collection_name, + deposit_id=deposit_id, + replace_archives=True, + replace_metadata=True, + ) + return self._atom_entry( + req, headers, collection_name, deposit_id=deposit_id, replace_metadata=True + ) def process_post(self, req, headers, collection_name, deposit_id): """Add new metadata/archive to existing deposit. source: - http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_addingcontent_metadata # noqa - http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_addingcontent_multipart # noqa This also deals with an empty post corner case to finalize a deposit. Returns: In optimal case for a multipart and atom-entry update, a 201 Created response. The body response will hold a deposit. And the response headers will contain an entry 'Location' with the EM-IRI. For the empty post case, this returns a 200. """ - if req.content_type.startswith('multipart/'): - return (status.HTTP_201_CREATED, EM_IRI, - self._multipart_upload(req, headers, collection_name, - deposit_id=deposit_id)) + if req.content_type.startswith("multipart/"): + return ( + status.HTTP_201_CREATED, + EM_IRI, + self._multipart_upload( + req, headers, collection_name, deposit_id=deposit_id + ), + ) # check for final empty post # source: http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html # #continueddeposit_complete - if headers['content-length'] == 0 and headers['in-progress'] is False: + if headers["content-length"] == 0 and headers["in-progress"] is False: data = self._empty_post(req, headers, collection_name, deposit_id) return (status.HTTP_200_OK, EDIT_SE_IRI, data) - return (status.HTTP_201_CREATED, EM_IRI, - self._atom_entry(req, headers, collection_name, - deposit_id=deposit_id)) + return ( + status.HTTP_201_CREATED, + EM_IRI, + self._atom_entry(req, headers, collection_name, deposit_id=deposit_id), + ) def process_delete(self, req, collection_name, deposit_id): """Delete the container (deposit). source: http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_deleteconteiner # noqa """ return self._delete_deposit(collection_name, deposit_id) diff --git a/swh/deposit/api/private/__init__.py b/swh/deposit/api/private/__init__.py index f0f86945..4b1e1dd3 100644 --- a/swh/deposit/api/private/__init__.py +++ b/swh/deposit/api/private/__init__.py @@ -1,94 +1,97 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.deposit import utils from ...config import METADATA_TYPE, SWHDefaultConfig from ...models import DepositRequest, Deposit from rest_framework.permissions import AllowAny from swh.deposit.api.common import SWHAPIView from swh.deposit.errors import make_error_dict, NOT_FOUND class DepositReadMixin: """Deposit Read mixin """ def _deposit_requests(self, deposit, request_type): """Given a deposit, yields its associated deposit_request Args: deposit (Deposit): Deposit to list requests for request_type (str): 'archive' or 'metadata' Yields: deposit requests of type request_type associated to the deposit """ if isinstance(deposit, int): deposit = Deposit.objects.get(pk=deposit) deposit_requests = DepositRequest.objects.filter( - type=request_type, - deposit=deposit).order_by('id') + type=request_type, deposit=deposit + ).order_by("id") for deposit_request in deposit_requests: yield deposit_request def _metadata_get(self, deposit): """Given a deposit, aggregate all metadata requests. Args: deposit (Deposit): The deposit instance to extract metadata from. Returns: metadata dict from the deposit. """ - metadata = (m.metadata for m in self._deposit_requests( - deposit, request_type=METADATA_TYPE)) + metadata = ( + m.metadata + for m in self._deposit_requests(deposit, request_type=METADATA_TYPE) + ) return utils.merge(*metadata) class SWHPrivateAPIView(SWHDefaultConfig, SWHAPIView): """Mixin intended as private api (so no authentication) based API view (for the private ones). """ + authentication_classes = () - permission_classes = (AllowAny, ) + permission_classes = (AllowAny,) def checks(self, req, collection_name, deposit_id=None): """Override default checks implementation to allow empty collection. """ if deposit_id: try: Deposit.objects.get(pk=deposit_id) except Deposit.DoesNotExist: return make_error_dict( - NOT_FOUND, - 'Deposit with id %s does not exist' % - deposit_id) + NOT_FOUND, "Deposit with id %s does not exist" % deposit_id + ) headers = self._read_headers(req) - checks = self.additional_checks( - req, headers, collection_name, deposit_id) - if 'error' in checks: + checks = self.additional_checks(req, headers, collection_name, deposit_id) + if "error" in checks: return checks - return {'headers': headers} + return {"headers": headers} - def get(self, req, collection_name=None, deposit_id=None, format=None, - *args, **kwargs): + def get( + self, req, collection_name=None, deposit_id=None, format=None, *args, **kwargs + ): return super().get(req, collection_name, deposit_id, format) - def put(self, req, collection_name=None, deposit_id=None, format=None, - *args, **kwargs): + def put( + self, req, collection_name=None, deposit_id=None, format=None, *args, **kwargs + ): return super().put(req, collection_name, deposit_id, format) diff --git a/swh/deposit/api/private/deposit_check.py b/swh/deposit/api/private/deposit_check.py index e17d4eab..76924560 100644 --- a/swh/deposit/api/private/deposit_check.py +++ b/swh/deposit/api/private/deposit_check.py @@ -1,229 +1,230 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json import re import tarfile import zipfile from itertools import chain from shutil import get_unpack_formats from rest_framework import status from swh.scheduler.utils import create_oneshot_task_dict from . import DepositReadMixin, SWHPrivateAPIView from ..common import SWHGetDepositAPI from ...config import DEPOSIT_STATUS_VERIFIED, DEPOSIT_STATUS_REJECTED from ...config import ARCHIVE_TYPE from ...models import Deposit -MANDATORY_FIELDS_MISSING = 'Mandatory fields are missing' -ALTERNATE_FIELDS_MISSING = 'Mandatory alternate fields are missing' -MANDATORY_ARCHIVE_UNREADABLE = 'At least one of its associated archives is not readable' # noqa -MANDATORY_ARCHIVE_INVALID = 'Mandatory archive is invalid (i.e contains only one archive)' # noqa -MANDATORY_ARCHIVE_UNSUPPORTED = 'Mandatory archive type is not supported' -MANDATORY_ARCHIVE_MISSING = 'Deposit without archive is rejected' +MANDATORY_FIELDS_MISSING = "Mandatory fields are missing" +ALTERNATE_FIELDS_MISSING = "Mandatory alternate fields are missing" +MANDATORY_ARCHIVE_UNREADABLE = ( + "At least one of its associated archives is not readable" # noqa +) +MANDATORY_ARCHIVE_INVALID = ( + "Mandatory archive is invalid (i.e contains only one archive)" # noqa +) +MANDATORY_ARCHIVE_UNSUPPORTED = "Mandatory archive type is not supported" +MANDATORY_ARCHIVE_MISSING = "Deposit without archive is rejected" ARCHIVE_EXTENSIONS = [ - 'zip', 'tar', 'tar.gz', 'xz', 'tar.xz', 'bz2', - 'tar.bz2', 'Z', 'tar.Z', 'tgz', '7z' + "zip", + "tar", + "tar.gz", + "xz", + "tar.xz", + "bz2", + "tar.bz2", + "Z", + "tar.Z", + "tgz", + "7z", ] -PATTERN_ARCHIVE_EXTENSION = re.compile( - r'.*\.(%s)$' % '|'.join(ARCHIVE_EXTENSIONS)) +PATTERN_ARCHIVE_EXTENSION = re.compile(r".*\.(%s)$" % "|".join(ARCHIVE_EXTENSIONS)) def known_archive_format(filename): - return any(filename.endswith(t) for t in - chain(*(x[1] for x in get_unpack_formats()))) + return any( + filename.endswith(t) for t in chain(*(x[1] for x in get_unpack_formats())) + ) class SWHChecksDeposit(SWHPrivateAPIView, SWHGetDepositAPI, DepositReadMixin): """Dedicated class to read a deposit's raw archives content. Only GET is supported. """ + def _check_deposit_archives(self, deposit): """Given a deposit, check each deposit request of type archive. Args: The deposit to check archives for Returns tuple (status, error_detail): True, None if all archives are ok, (False, ) otherwise. """ - requests = list(self._deposit_requests( - deposit, request_type=ARCHIVE_TYPE)) + requests = list(self._deposit_requests(deposit, request_type=ARCHIVE_TYPE)) if len(requests) == 0: # no associated archive is refused - return False, { - 'archive': [{ - 'summary': MANDATORY_ARCHIVE_MISSING, - }] - } + return False, {"archive": [{"summary": MANDATORY_ARCHIVE_MISSING,}]} errors = [] for archive_request in requests: check, error_message = self._check_archive(archive_request) if not check: - errors.append({ - 'summary': error_message, - 'fields': [archive_request.id] - }) + errors.append( + {"summary": error_message, "fields": [archive_request.id]} + ) if not errors: return True, None - return False, { - 'archive': errors - } + return False, {"archive": errors} def _check_archive(self, archive_request): """Check that a deposit associated archive is ok: - readable - supported archive format - valid content: the archive does not contain a single archive file If any of those checks are not ok, return the corresponding failing check. Args: archive_path (DepositRequest): Archive to check Returns: (True, None) if archive is check compliant, (False, ) otherwise. """ archive_path = archive_request.archive.path if not known_archive_format(archive_path): return False, MANDATORY_ARCHIVE_UNSUPPORTED try: if zipfile.is_zipfile(archive_path): with zipfile.ZipFile(archive_path) as f: files = f.namelist() elif tarfile.is_tarfile(archive_path): with tarfile.open(archive_path) as f: files = f.getnames() else: return False, MANDATORY_ARCHIVE_UNSUPPORTED except Exception: return False, MANDATORY_ARCHIVE_UNREADABLE if len(files) > 1: return True, None element = files[0] if PATTERN_ARCHIVE_EXTENSION.match(element): # archive in archive! return False, MANDATORY_ARCHIVE_INVALID return True, None def _check_metadata(self, metadata): """Check to execute on all metadata for mandatory field presence. Args: metadata (dict): Metadata dictionary to check for mandatory fields Returns: tuple (status, error_detail): True, None if metadata are ok (False, ) otherwise. """ required_fields = { - 'author': False, + "author": False, } alternate_fields = { - ('name', 'title'): False, # alternate field, at least one + ("name", "title"): False, # alternate field, at least one # of them must be present } for field, value in metadata.items(): for name in required_fields: if name in field: required_fields[name] = True for possible_names in alternate_fields: for possible_name in possible_names: if possible_name in field: alternate_fields[possible_names] = True continue mandatory_result = [k for k, v in required_fields.items() if not v] - optional_result = [ - ' or '.join(k) for k, v in alternate_fields.items() if not v] + optional_result = [" or ".join(k) for k, v in alternate_fields.items() if not v] if mandatory_result == [] and optional_result == []: return True, None detail = [] if mandatory_result != []: - detail.append({ - 'summary': MANDATORY_FIELDS_MISSING, - 'fields': mandatory_result - }) + detail.append( + {"summary": MANDATORY_FIELDS_MISSING, "fields": mandatory_result} + ) if optional_result != []: - detail.append({ - 'summary': ALTERNATE_FIELDS_MISSING, - 'fields': optional_result, - }) - return False, { - 'metadata': detail - } + detail.append( + {"summary": ALTERNATE_FIELDS_MISSING, "fields": optional_result,} + ) + return False, {"metadata": detail} def process_get(self, req, collection_name, deposit_id): """Build a unique tarball from the multiple received and stream that content to the client. Args: req (Request): collection_name (str): Collection owning the deposit deposit_id (id): Deposit concerned by the reading Returns: Tuple status, stream of content, content-type """ deposit = Deposit.objects.get(pk=deposit_id) metadata = self._metadata_get(deposit) problems = {} # will check each deposit's associated request (both of type # archive and metadata) for errors archives_status, error_detail = self._check_deposit_archives(deposit) if not archives_status: problems.update(error_detail) metadata_status, error_detail = self._check_metadata(metadata) if not metadata_status: problems.update(error_detail) deposit_status = archives_status and metadata_status # if any problems arose, the deposit is rejected if not deposit_status: deposit.status = DEPOSIT_STATUS_REJECTED deposit.status_detail = problems response = { - 'status': deposit.status, - 'details': deposit.status_detail, + "status": deposit.status, + "details": deposit.status_detail, } else: deposit.status = DEPOSIT_STATUS_VERIFIED response = { - 'status': deposit.status, + "status": deposit.status, } - if not deposit.load_task_id and self.config['checks']: + if not deposit.load_task_id and self.config["checks"]: url = deposit.origin_url task = create_oneshot_task_dict( - 'load-deposit', url=url, deposit_id=deposit.id, - retries_left=3) - load_task_id = self.scheduler.create_tasks([task])[0]['id'] + "load-deposit", url=url, deposit_id=deposit.id, retries_left=3 + ) + load_task_id = self.scheduler.create_tasks([task])[0]["id"] deposit.load_task_id = load_task_id deposit.save() - return status.HTTP_200_OK, json.dumps(response), 'application/json' + return status.HTTP_200_OK, json.dumps(response), "application/json" diff --git a/swh/deposit/api/private/deposit_list.py b/swh/deposit/api/private/deposit_list.py index f3e3b1ad..4866acae 100644 --- a/swh/deposit/api/private/deposit_list.py +++ b/swh/deposit/api/private/deposit_list.py @@ -1,48 +1,50 @@ # Copyright (C) 2018-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from rest_framework.fields import _UnvalidatedField from rest_framework.generics import ListAPIView from rest_framework.pagination import PageNumberPagination from rest_framework import serializers from . import SWHPrivateAPIView from ..converters import convert_status_detail from ...models import Deposit class DefaultPagination(PageNumberPagination): page_size = 100 - page_size_query_param = 'page_size' + page_size_query_param = "page_size" class StatusDetailField(_UnvalidatedField): """status_detail field is a dict, we want a simple message instead. So, we reuse the convert_status_detail from deposit_status endpoint to that effect. """ + def to_representation(self, value): return convert_status_detail(value) class DepositSerializer(serializers.ModelSerializer): status_detail = StatusDetailField() class Meta: model = Deposit - fields = '__all__' + fields = "__all__" class DepositList(ListAPIView, SWHPrivateAPIView): """Deposit request class to list the deposit's status per page. HTTP verbs supported: GET """ - queryset = Deposit.objects.all().order_by('id') + + queryset = Deposit.objects.all().order_by("id") serializer_class = DepositSerializer pagination_class = DefaultPagination diff --git a/swh/deposit/api/private/deposit_read.py b/swh/deposit/api/private/deposit_read.py index 2ee5da91..50cb6231 100644 --- a/swh/deposit/api/private/deposit_read.py +++ b/swh/deposit/api/private/deposit_read.py @@ -1,227 +1,231 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json import os import shutil import tempfile from contextlib import contextmanager from django.http import FileResponse from rest_framework import status from swh.core import tarball from swh.model import identifiers from swh.deposit.utils import normalize_date from . import DepositReadMixin, SWHPrivateAPIView from ...config import SWH_PERSON, ARCHIVE_TYPE from ..common import SWHGetDepositAPI from ...models import Deposit @contextmanager def aggregate_tarballs(extraction_dir, archive_paths): """Aggregate multiple tarballs into one and returns this new archive's path. Args: extraction_dir (path): Path to use for the tarballs computation archive_paths ([str]): Deposit's archive paths Returns: Tuple (directory to clean up, archive path (aggregated or not)) """ # rebuild one zip archive from (possibly) multiple ones os.makedirs(extraction_dir, 0o755, exist_ok=True) - dir_path = tempfile.mkdtemp(prefix='swh.deposit-', dir=extraction_dir) + dir_path = tempfile.mkdtemp(prefix="swh.deposit-", dir=extraction_dir) # root folder to build an aggregated tarball - aggregated_tarball_rootdir = os.path.join(dir_path, 'aggregate') + aggregated_tarball_rootdir = os.path.join(dir_path, "aggregate") os.makedirs(aggregated_tarball_rootdir, 0o755, exist_ok=True) # uncompress in a temporary location all archives for archive_path in archive_paths: tarball.uncompress(archive_path, aggregated_tarball_rootdir) # Aggregate into one big tarball the multiple smaller ones temp_tarpath = shutil.make_archive( - aggregated_tarball_rootdir, 'zip', - aggregated_tarball_rootdir) + aggregated_tarball_rootdir, "zip", aggregated_tarball_rootdir + ) # can already clean up temporary directory shutil.rmtree(aggregated_tarball_rootdir) try: yield temp_tarpath finally: shutil.rmtree(dir_path) -class SWHDepositReadArchives(SWHPrivateAPIView, SWHGetDepositAPI, - DepositReadMixin): +class SWHDepositReadArchives(SWHPrivateAPIView, SWHGetDepositAPI, DepositReadMixin): """Dedicated class to read a deposit's raw archives content. Only GET is supported. """ + ADDITIONAL_CONFIG = { - 'extraction_dir': ('str', '/tmp/swh-deposit/archive/'), + "extraction_dir": ("str", "/tmp/swh-deposit/archive/"), } def __init__(self): super().__init__() - self.extraction_dir = self.config['extraction_dir'] + self.extraction_dir = self.config["extraction_dir"] if not os.path.exists(self.extraction_dir): os.makedirs(self.extraction_dir) def process_get(self, req, collection_name, deposit_id): """Build a unique tarball from the multiple received and stream that content to the client. Args: req (Request): collection_name (str): Collection owning the deposit deposit_id (id): Deposit concerned by the reading Returns: Tuple status, stream of content, content-type """ - archive_paths = [r.archive.path for r in self._deposit_requests( - deposit_id, request_type=ARCHIVE_TYPE)] + archive_paths = [ + r.archive.path + for r in self._deposit_requests(deposit_id, request_type=ARCHIVE_TYPE) + ] with aggregate_tarballs(self.extraction_dir, archive_paths) as path: - return FileResponse(open(path, 'rb'), - status=status.HTTP_200_OK, - content_type='application/zip') + return FileResponse( + open(path, "rb"), + status=status.HTTP_200_OK, + content_type="application/zip", + ) -class SWHDepositReadMetadata(SWHPrivateAPIView, SWHGetDepositAPI, - DepositReadMixin): +class SWHDepositReadMetadata(SWHPrivateAPIView, SWHGetDepositAPI, DepositReadMixin): """Class in charge of aggregating metadata on a deposit. """ + ADDITIONAL_CONFIG = { - 'provider': ('dict', { - # 'provider_name': '', # those are not set since read from the - # 'provider_url': '', # deposit's client - 'provider_type': 'deposit_client', - 'metadata': {} - }), - 'tool': ('dict', { - 'name': 'swh-deposit', - 'version': '0.0.1', - 'configuration': { - 'sword_version': '2' - } - }) + "provider": ( + "dict", + { + # 'provider_name': '', # those are not set since read from the + # 'provider_url': '', # deposit's client + "provider_type": "deposit_client", + "metadata": {}, + }, + ), + "tool": ( + "dict", + { + "name": "swh-deposit", + "version": "0.0.1", + "configuration": {"sword_version": "2"}, + }, + ), } def __init__(self): super().__init__() - self.provider = self.config['provider'] - self.tool = self.config['tool'] + self.provider = self.config["provider"] + self.tool = self.config["tool"] def _normalize_dates(self, deposit, metadata): """Normalize the date to use as a tuple of author date, committer date from the incoming metadata. Args: deposit (Deposit): Deposit model representation metadata (Dict): Metadata dict representation Returns: Tuple of author date, committer date. Those dates are swh normalized. """ - commit_date = metadata.get('codemeta:datePublished') - author_date = metadata.get('codemeta:dateCreated') + commit_date = metadata.get("codemeta:datePublished") + author_date = metadata.get("codemeta:dateCreated") if author_date and commit_date: pass elif commit_date: author_date = commit_date elif author_date: commit_date = author_date else: author_date = deposit.complete_date commit_date = deposit.complete_date - return ( - normalize_date(author_date), - normalize_date(commit_date) - ) + return (normalize_date(author_date), normalize_date(commit_date)) def metadata_read(self, deposit): """Read and aggregate multiple data on deposit into one unified data dictionary. Args: deposit (Deposit): Deposit concerned by the data aggregation. Returns: Dictionary of data representing the deposit to inject in swh. """ metadata = self._metadata_get(deposit) # Read information metadata - data = { - 'origin': { - 'type': 'deposit', - 'url': deposit.origin_url, - } - } + data = {"origin": {"type": "deposit", "url": deposit.origin_url,}} # revision fullname = deposit.client.username author_committer = SWH_PERSON # metadata provider - self.provider['provider_name'] = deposit.client.last_name - self.provider['provider_url'] = deposit.client.provider_url - - revision_type = 'tar' - revision_msg = '%s: Deposit %s in collection %s' % ( - fullname, deposit.id, deposit.collection.name) + self.provider["provider_name"] = deposit.client.last_name + self.provider["provider_url"] = deposit.client.provider_url + + revision_type = "tar" + revision_msg = "%s: Deposit %s in collection %s" % ( + fullname, + deposit.id, + deposit.collection.name, + ) author_date, commit_date = self._normalize_dates(deposit, metadata) - data['revision'] = { - 'synthetic': True, - 'date': author_date, - 'committer_date': commit_date, - 'author': author_committer, - 'committer': author_committer, - 'type': revision_type, - 'message': revision_msg, - 'metadata': metadata, + data["revision"] = { + "synthetic": True, + "date": author_date, + "committer_date": commit_date, + "author": author_committer, + "committer": author_committer, + "type": revision_type, + "message": revision_msg, + "metadata": metadata, } if deposit.parent: swh_persistent_id = deposit.parent.swh_id persistent_identifier = identifiers.parse_persistent_identifier( - swh_persistent_id) + swh_persistent_id + ) parent_revision = persistent_identifier.object_id - data['revision']['parents'] = [parent_revision] + data["revision"]["parents"] = [parent_revision] - data['branch_name'] = 'master' - data['origin_metadata'] = { - 'provider': self.provider, - 'tool': self.tool, - 'metadata': metadata + data["branch_name"] = "master" + data["origin_metadata"] = { + "provider": self.provider, + "tool": self.tool, + "metadata": metadata, } return data def process_get(self, req, collection_name, deposit_id): deposit = Deposit.objects.get(pk=deposit_id) data = self.metadata_read(deposit) d = {} if data: d = json.dumps(data) - return status.HTTP_200_OK, d, 'application/json' + return status.HTTP_200_OK, d, "application/json" diff --git a/swh/deposit/api/private/deposit_update_status.py b/swh/deposit/api/private/deposit_update_status.py index 208a49eb..8cb0b234 100644 --- a/swh/deposit/api/private/deposit_update_status.py +++ b/swh/deposit/api/private/deposit_update_status.py @@ -1,83 +1,82 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from rest_framework.parsers import JSONParser -from swh.model.identifiers import ( - persistent_identifier, REVISION, DIRECTORY -) +from swh.model.identifiers import persistent_identifier, REVISION, DIRECTORY from . import SWHPrivateAPIView from ..common import SWHPutDepositAPI from ...errors import make_error_dict, BAD_REQUEST from ...models import Deposit, DEPOSIT_STATUS_DETAIL from ...models import DEPOSIT_STATUS_LOAD_SUCCESS class SWHUpdateStatusDeposit(SWHPrivateAPIView, SWHPutDepositAPI): """Deposit request class to update the deposit's status. HTTP verbs supported: PUT """ - parser_classes = (JSONParser, ) - def additional_checks(self, req, headers, collection_name, - deposit_id=None): + parser_classes = (JSONParser,) + + def additional_checks(self, req, headers, collection_name, deposit_id=None): """Enrich existing checks to the default ones. New checks: - Ensure the status is provided - Ensure it exists """ data = req.data - status = data.get('status') + status = data.get("status") if not status: - msg = 'The status key is mandatory with possible values %s' % list( - DEPOSIT_STATUS_DETAIL.keys()) + msg = "The status key is mandatory with possible values %s" % list( + DEPOSIT_STATUS_DETAIL.keys() + ) return make_error_dict(BAD_REQUEST, msg) if status not in DEPOSIT_STATUS_DETAIL: - msg = 'Possible status in %s' % list(DEPOSIT_STATUS_DETAIL.keys()) + msg = "Possible status in %s" % list(DEPOSIT_STATUS_DETAIL.keys()) return make_error_dict(BAD_REQUEST, msg) if status == DEPOSIT_STATUS_LOAD_SUCCESS: - swh_id = data.get('revision_id') + swh_id = data.get("revision_id") if not swh_id: - msg = 'Updating status to %s requires a revision_id key' % ( - status, ) + msg = "Updating status to %s requires a revision_id key" % (status,) return make_error_dict(BAD_REQUEST, msg) return {} def process_put(self, req, headers, collection_name, deposit_id): """Update the deposit's status Returns: 204 No content """ deposit = Deposit.objects.get(pk=deposit_id) - deposit.status = req.data['status'] # checks already done before + deposit.status = req.data["status"] # checks already done before - origin_url = req.data.get('origin_url') + origin_url = req.data.get("origin_url") - dir_id = req.data.get('directory_id') + dir_id = req.data.get("directory_id") if dir_id: deposit.swh_id = persistent_identifier(DIRECTORY, dir_id) deposit.swh_id_context = persistent_identifier( - DIRECTORY, dir_id, metadata={'origin': origin_url}) + DIRECTORY, dir_id, metadata={"origin": origin_url} + ) - rev_id = req.data.get('revision_id') + rev_id = req.data.get("revision_id") if rev_id: - deposit.swh_anchor_id = persistent_identifier( - REVISION, rev_id) + deposit.swh_anchor_id = persistent_identifier(REVISION, rev_id) deposit.swh_anchor_id_context = persistent_identifier( - REVISION, rev_id, metadata={'origin': origin_url}) + REVISION, rev_id, metadata={"origin": origin_url} + ) deposit.save() return {} diff --git a/swh/deposit/api/private/urls.py b/swh/deposit/api/private/urls.py index f3a0363c..19330bbf 100644 --- a/swh/deposit/api/private/urls.py +++ b/swh/deposit/api/private/urls.py @@ -1,62 +1,79 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.conf.urls import url from ...config import ( - PRIVATE_GET_RAW_CONTENT, PRIVATE_PUT_DEPOSIT, PRIVATE_GET_DEPOSIT_METADATA, - PRIVATE_CHECK_DEPOSIT, PRIVATE_LIST_DEPOSITS + PRIVATE_GET_RAW_CONTENT, + PRIVATE_PUT_DEPOSIT, + PRIVATE_GET_DEPOSIT_METADATA, + PRIVATE_CHECK_DEPOSIT, + PRIVATE_LIST_DEPOSITS, ) from .deposit_read import SWHDepositReadArchives from .deposit_read import SWHDepositReadMetadata from .deposit_update_status import SWHUpdateStatusDeposit from .deposit_check import SWHChecksDeposit from .deposit_list import DepositList urlpatterns = [ # Retrieve deposit's raw archives' content # -> GET - url(r'^(?P[^/]+)/(?P[^/]+)/raw/$', + url( + r"^(?P[^/]+)/(?P[^/]+)/raw/$", SWHDepositReadArchives.as_view(), - name=PRIVATE_GET_RAW_CONTENT), + name=PRIVATE_GET_RAW_CONTENT, + ), # Update deposit's status # -> PUT - url(r'^(?P[^/]+)/(?P[^/]+)/update/$', + url( + r"^(?P[^/]+)/(?P[^/]+)/update/$", SWHUpdateStatusDeposit.as_view(), - name=PRIVATE_PUT_DEPOSIT), + name=PRIVATE_PUT_DEPOSIT, + ), # Retrieve metadata information on a specific deposit # -> GET - url(r'^(?P[^/]+)/(?P[^/]+)/meta/$', + url( + r"^(?P[^/]+)/(?P[^/]+)/meta/$", SWHDepositReadMetadata.as_view(), - name=PRIVATE_GET_DEPOSIT_METADATA), + name=PRIVATE_GET_DEPOSIT_METADATA, + ), # Check archive and metadata information on a specific deposit # -> GET - url(r'^(?P[^/]+)/(?P[^/]+)/check/$', + url( + r"^(?P[^/]+)/(?P[^/]+)/check/$", SWHChecksDeposit.as_view(), - name=PRIVATE_CHECK_DEPOSIT), + name=PRIVATE_CHECK_DEPOSIT, + ), # Retrieve deposit's raw archives' content # -> GET - url(r'^(?P[^/]+)/raw/$', + url( + r"^(?P[^/]+)/raw/$", SWHDepositReadArchives.as_view(), - name=PRIVATE_GET_RAW_CONTENT+'-nc'), + name=PRIVATE_GET_RAW_CONTENT + "-nc", + ), # Update deposit's status # -> PUT - url(r'^(?P[^/]+)/update/$', + url( + r"^(?P[^/]+)/update/$", SWHUpdateStatusDeposit.as_view(), - name=PRIVATE_PUT_DEPOSIT+'-nc'), + name=PRIVATE_PUT_DEPOSIT + "-nc", + ), # Retrieve metadata information on a specific deposit # -> GET - url(r'^(?P[^/]+)/meta/$', + url( + r"^(?P[^/]+)/meta/$", SWHDepositReadMetadata.as_view(), - name=PRIVATE_GET_DEPOSIT_METADATA+'-nc'), + name=PRIVATE_GET_DEPOSIT_METADATA + "-nc", + ), # Check archive and metadata information on a specific deposit # -> GET - url(r'^(?P[^/]+)/check/$', + url( + r"^(?P[^/]+)/check/$", SWHChecksDeposit.as_view(), - name=PRIVATE_CHECK_DEPOSIT+'-nc'), - - url(r'^deposits/$', DepositList.as_view(), - name=PRIVATE_LIST_DEPOSITS) + name=PRIVATE_CHECK_DEPOSIT + "-nc", + ), + url(r"^deposits/$", DepositList.as_view(), name=PRIVATE_LIST_DEPOSITS), ] diff --git a/swh/deposit/api/service_document.py b/swh/deposit/api/service_document.py index 9b79065c..6aa3899c 100644 --- a/swh/deposit/api/service_document.py +++ b/swh/deposit/api/service_document.py @@ -1,33 +1,34 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.shortcuts import render from django.urls import reverse from .common import SWHBaseDeposit, ACCEPT_PACKAGINGS from .common import ACCEPT_ARCHIVE_CONTENT_TYPES from ..config import COL_IRI from ..models import DepositClient, DepositCollection class SWHServiceDocument(SWHBaseDeposit): def get(self, req, *args, **kwargs): client = DepositClient.objects.get(username=req.user) collections = {} for col_id in client.collections: col = DepositCollection.objects.get(pk=col_id) col_uri = req.build_absolute_uri(reverse(COL_IRI, args=[col.name])) collections[col.name] = col_uri context = { - 'max_upload_size': self.config['max_upload_size'], - 'accept_packagings': ACCEPT_PACKAGINGS, - 'accept_content_types': ACCEPT_ARCHIVE_CONTENT_TYPES, - 'collections': collections, + "max_upload_size": self.config["max_upload_size"], + "accept_packagings": ACCEPT_PACKAGINGS, + "accept_content_types": ACCEPT_ARCHIVE_CONTENT_TYPES, + "collections": collections, } - return render(req, 'deposit/service_document.xml', - context, content_type='application/xml') + return render( + req, "deposit/service_document.xml", context, content_type="application/xml" + ) diff --git a/swh/deposit/api/urls.py b/swh/deposit/api/urls.py index b65f0be0..236db5aa 100644 --- a/swh/deposit/api/urls.py +++ b/swh/deposit/api/urls.py @@ -1,64 +1,70 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """SWH's deposit api URL Configuration """ from django.conf.urls import url from django.shortcuts import render from ..config import EDIT_SE_IRI, EM_IRI, CONT_FILE_IRI from ..config import SD_IRI, COL_IRI, STATE_IRI from .deposit import SWHDeposit from .deposit_status import SWHDepositStatus from .deposit_update import SWHUpdateMetadataDeposit from .deposit_update import SWHUpdateArchiveDeposit from .deposit_content import SWHDepositContent from .service_document import SWHServiceDocument def api_view(req): return render(req, "api.html") # PUBLIC API urlpatterns = [ # simple view on the api - url(r'^$', api_view, name='api'), + url(r"^$", api_view, name="api"), # SD IRI - Service Document IRI # -> GET - url(r'^servicedocument/', SWHServiceDocument.as_view(), - name=SD_IRI), + url(r"^servicedocument/", SWHServiceDocument.as_view(), name=SD_IRI), # Col IRI - Collection IRI # -> POST - url(r'^(?P[^/]+)/$', SWHDeposit.as_view(), - name=COL_IRI), + url(r"^(?P[^/]+)/$", SWHDeposit.as_view(), name=COL_IRI), # EM IRI - Atom Edit Media IRI (update archive IRI) # -> PUT (update-in-place existing archive) # -> POST (add new archive) - url(r'^(?P[^/]+)/(?P[^/]+)/media/$', + url( + r"^(?P[^/]+)/(?P[^/]+)/media/$", SWHUpdateArchiveDeposit.as_view(), - name=EM_IRI), + name=EM_IRI, + ), # Edit IRI - Atom Entry Edit IRI (update metadata IRI) # SE IRI - Sword Edit IRI ;; possibly same as Edit IRI # -> PUT (update in place) # -> POST (add new metadata) - url(r'^(?P[^/]+)/(?P[^/]+)/metadata/$', + url( + r"^(?P[^/]+)/(?P[^/]+)/metadata/$", SWHUpdateMetadataDeposit.as_view(), - name=EDIT_SE_IRI), + name=EDIT_SE_IRI, + ), # State IRI # -> GET - url(r'^(?P[^/]+)/(?P[^/]+)/status/$', + url( + r"^(?P[^/]+)/(?P[^/]+)/status/$", SWHDepositStatus.as_view(), - name=STATE_IRI), + name=STATE_IRI, + ), # Cont/File IRI # -> GET - url(r'^(?P[^/]+)/(?P[^/]+)/content/$', + url( + r"^(?P[^/]+)/(?P[^/]+)/content/$", SWHDepositContent.as_view(), - name=CONT_FILE_IRI), # specification is not clear about - # FILE-IRI, we assume it's the same as - # the CONT-IRI one + name=CONT_FILE_IRI, + ), # specification is not clear about + # FILE-IRI, we assume it's the same as + # the CONT-IRI one ] diff --git a/swh/deposit/apps.py b/swh/deposit/apps.py index 16dffec6..2a60f2c6 100644 --- a/swh/deposit/apps.py +++ b/swh/deposit/apps.py @@ -1,10 +1,10 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.apps import AppConfig class DepositConfig(AppConfig): - name = 'swh.deposit' + name = "swh.deposit" diff --git a/swh/deposit/auth.py b/swh/deposit/auth.py index dbb5155e..6c3fb63e 100644 --- a/swh/deposit/auth.py +++ b/swh/deposit/auth.py @@ -1,64 +1,63 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from rest_framework import status from .errors import UNAUTHORIZED, make_error_response def convert_response(request, content): """Convert response from drf's basic authentication mechanism to a swh-deposit one. Args: request (Request): Use to build the response content (bytes): The drf's answer Returns: Response with the same status error as before, only the body is now an swh-deposit compliant one. """ from json import loads - content = loads(content.decode('utf-8')) - detail = content.get('detail') + content = loads(content.decode("utf-8")) + detail = content.get("detail") if detail: - verbose_description = 'API is protected by basic authentication' + verbose_description = "API is protected by basic authentication" else: - detail = 'API is protected by basic authentication' + detail = "API is protected by basic authentication" verbose_description = None response = make_error_response( - request, - UNAUTHORIZED, - summary=detail, - verbose_description=verbose_description) - response['WWW-Authenticate'] = 'Basic realm=""' + request, UNAUTHORIZED, summary=detail, verbose_description=verbose_description + ) + response["WWW-Authenticate"] = 'Basic realm=""' return response class WrapBasicAuthenticationResponseMiddleware: """Middleware to capture potential authentication error and convert them to standard deposit response. This is to be installed in django's settings.py module. """ + def __init__(self, get_response): super().__init__() self.get_response = get_response def __call__(self, request): response = self.get_response(request) if response.status_code is status.HTTP_401_UNAUTHORIZED: - content_type = response._headers.get('content-type') - if content_type == ('Content-Type', 'application/json'): + content_type = response._headers.get("content-type") + if content_type == ("Content-Type", "application/json"): return convert_response(request, response.content) return response diff --git a/swh/deposit/cli/__init__.py b/swh/deposit/cli/__init__.py index fac8a56a..cc6e00dc 100644 --- a/swh/deposit/cli/__init__.py +++ b/swh/deposit/cli/__init__.py @@ -1,38 +1,40 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import click import logging from swh.core.cli import CONTEXT_SETTINGS logger = logging.getLogger(__name__) @click.group(context_settings=CONTEXT_SETTINGS) @click.pass_context def deposit(ctx): """Deposit main command """ ctx.ensure_object(dict) - log_level = ctx.obj.get('log_level', logging.INFO) + log_level = ctx.obj.get("log_level", logging.INFO) logger.setLevel(log_level) def main(): logging.basicConfig() - return deposit(auto_envvar_prefix='SWH_DEPOSIT') + return deposit(auto_envvar_prefix="SWH_DEPOSIT") + # These import statements MUST be executed after defining the 'deposit' group # since the subcommands in these are defined using this 'deposit' group. from . import client # noqa + try: from . import admin # noqa except ImportError: # server part is optional - logger.debug('admin subcommand not loaded') + logger.debug("admin subcommand not loaded") -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/swh/deposit/cli/admin.py b/swh/deposit/cli/admin.py index b59268cb..7192defd 100644 --- a/swh/deposit/cli/admin.py +++ b/swh/deposit/cli/admin.py @@ -1,254 +1,273 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import click from swh.deposit.config import setup_django_for from swh.deposit.cli import deposit -@deposit.group('admin') -@click.option('--config-file', '-C', default=None, - type=click.Path(exists=True, dir_okay=False,), - help="Optional extra configuration file.") -@click.option('--platform', default='development', - type=click.Choice(['development', 'production']), - help='development or production platform') +@deposit.group("admin") +@click.option( + "--config-file", + "-C", + default=None, + type=click.Path(exists=True, dir_okay=False,), + help="Optional extra configuration file.", +) +@click.option( + "--platform", + default="development", + type=click.Choice(["development", "production"]), + help="development or production platform", +) @click.pass_context def admin(ctx, config_file, platform): """Server administration tasks (manipulate user or collections)""" # configuration happens here setup_django_for(platform, config_file=config_file) -@admin.group('user') +@admin.group("user") @click.pass_context def user(ctx): """Manipulate user.""" # configuration happens here pass def _create_collection(name): """Create the collection with name if it does not exist. Args: name (str): collection's name Returns: collection (DepositCollection): the existing collection object (created or not) """ # to avoid loading too early django namespaces from swh.deposit.models import DepositCollection try: collection = DepositCollection.objects.get(name=name) - click.echo('Collection %s exists, nothing to do.' % name) + click.echo("Collection %s exists, nothing to do." % name) except DepositCollection.DoesNotExist: - click.echo('Create new collection %s' % name) + click.echo("Create new collection %s" % name) collection = DepositCollection.objects.create(name=name) - click.echo('Collection %s created' % name) + click.echo("Collection %s created" % name) return collection -@user.command('create') -@click.option('--username', required=True, help="User's name") -@click.option('--password', required=True, - help="Desired user's password (plain).") -@click.option('--firstname', default='', help="User's first name") -@click.option('--lastname', default='', help="User's last name") -@click.option('--email', default='', help="User's email") -@click.option('--collection', help="User's collection") -@click.option('--provider-url', default='', help="Provider URL") -@click.option('--domain', default='', help="The domain") +@user.command("create") +@click.option("--username", required=True, help="User's name") +@click.option("--password", required=True, help="Desired user's password (plain).") +@click.option("--firstname", default="", help="User's first name") +@click.option("--lastname", default="", help="User's last name") +@click.option("--email", default="", help="User's email") +@click.option("--collection", help="User's collection") +@click.option("--provider-url", default="", help="Provider URL") +@click.option("--domain", default="", help="The domain") @click.pass_context -def user_create(ctx, username, password, firstname, lastname, email, - collection, provider_url, domain): +def user_create( + ctx, + username, + password, + firstname, + lastname, + email, + collection, + provider_url, + domain, +): """Create a user with some needed information (password, collection) If the collection does not exist, the collection is then created alongside. The password is stored encrypted using django's utilities. """ # to avoid loading too early django namespaces from swh.deposit.models import DepositClient # If collection is not provided, fallback to username if not collection: collection = username - click.echo('collection: %s' % collection) + click.echo("collection: %s" % collection) # create the collection if it does not exist collection = _create_collection(collection) # user create/update try: user = DepositClient.objects.get(username=username) - click.echo('User %s exists, updating information.' % user) + click.echo("User %s exists, updating information." % user) user.set_password(password) except DepositClient.DoesNotExist: - click.echo('Create new user %s' % username) - user = DepositClient.objects.create_user( - username=username, - password=password) + click.echo("Create new user %s" % username) + user = DepositClient.objects.create_user(username=username, password=password) user.collections = [collection.id] user.first_name = firstname user.last_name = lastname user.email = email user.is_active = True user.provider_url = provider_url user.domain = domain user.save() - click.echo('Information registered for user %s' % user) + click.echo("Information registered for user %s" % user) -@user.command('list') +@user.command("list") @click.pass_context def user_list(ctx): """List existing users. This entrypoint is not paginated yet as there is not a lot of entry. """ # to avoid loading too early django namespaces from swh.deposit.models import DepositClient + users = DepositClient.objects.all() if not users: - output = 'Empty user list' + output = "Empty user list" else: - output = '\n'.join((user.username for user in users)) + output = "\n".join((user.username for user in users)) click.echo(output) -@user.command('exists') -@click.argument('username', required=True) +@user.command("exists") +@click.argument("username", required=True) @click.pass_context def user_exists(ctx, username): """Check if user exists. """ # to avoid loading too early django namespaces from swh.deposit.models import DepositClient + try: DepositClient.objects.get(username=username) - click.echo('User %s exists.' % username) + click.echo("User %s exists." % username) ctx.exit(0) except DepositClient.DoesNotExist: - click.echo('User %s does not exist.' % username) + click.echo("User %s does not exist." % username) ctx.exit(1) -@admin.group('collection') +@admin.group("collection") @click.pass_context def collection(ctx): """Manipulate collections.""" pass -@collection.command('create') -@click.option('--name', required=True, help="Collection's name") +@collection.command("create") +@click.option("--name", required=True, help="Collection's name") @click.pass_context def collection_create(ctx, name): _create_collection(name) -@collection.command('list') +@collection.command("list") @click.pass_context def collection_list(ctx): """List existing collections. This entrypoint is not paginated yet as there is not a lot of entry. """ # to avoid loading too early django namespaces from swh.deposit.models import DepositCollection + collections = DepositCollection.objects.all() if not collections: - output = 'Empty collection list' + output = "Empty collection list" else: - output = '\n'.join((col.name for col in collections)) + output = "\n".join((col.name for col in collections)) click.echo(output) -@admin.group('deposit') +@admin.group("deposit") @click.pass_context def adm_deposit(ctx): """Manipulate deposit.""" pass -@adm_deposit.command('reschedule') -@click.option('--deposit-id', required=True, help="Deposit identifier") +@adm_deposit.command("reschedule") +@click.option("--deposit-id", required=True, help="Deposit identifier") @click.pass_context def adm_deposit_reschedule(ctx, deposit_id): """Reschedule the deposit loading This will: - check the deposit's status to something reasonable (failed or done). That means that the checks have passed alright but something went wrong during the loading (failed: loading failed, done: loading ok, still for some reasons as in bugs, we need to reschedule it) - reset the deposit's status to 'verified' (prior to any loading but after the checks which are fine) and removes the different archives' identifiers (swh-id, ...) - trigger back the loading task through the scheduler """ # to avoid loading too early django namespaces from datetime import datetime from swh.deposit.models import Deposit from swh.deposit.config import ( - DEPOSIT_STATUS_LOAD_SUCCESS, DEPOSIT_STATUS_LOAD_FAILURE, - DEPOSIT_STATUS_VERIFIED, SWHDefaultConfig, + DEPOSIT_STATUS_LOAD_SUCCESS, + DEPOSIT_STATUS_LOAD_FAILURE, + DEPOSIT_STATUS_VERIFIED, + SWHDefaultConfig, ) try: deposit = Deposit.objects.get(pk=deposit_id) except Deposit.DoesNotExist: - click.echo('Deposit %s does not exist.' % deposit_id) + click.echo("Deposit %s does not exist." % deposit_id) ctx.exit(1) # Check the deposit is in a reasonable state - accepted_statuses = [ - DEPOSIT_STATUS_LOAD_SUCCESS, DEPOSIT_STATUS_LOAD_FAILURE - ] + accepted_statuses = [DEPOSIT_STATUS_LOAD_SUCCESS, DEPOSIT_STATUS_LOAD_FAILURE] if deposit.status == DEPOSIT_STATUS_VERIFIED: - click.echo('Deposit %s\'s status already set for rescheduling.' % ( - deposit_id)) + click.echo("Deposit %s's status already set for rescheduling." % (deposit_id)) ctx.exit(0) if deposit.status not in accepted_statuses: - click.echo('Deposit %s\'s status be one of %s.' % ( - deposit_id, ', '.join(accepted_statuses))) + click.echo( + "Deposit %s's status be one of %s." + % (deposit_id, ", ".join(accepted_statuses)) + ) ctx.exit(1) task_id = deposit.load_task_id if not task_id: - click.echo('Deposit %s cannot be rescheduled. It misses the ' - 'associated task.' % deposit_id) + click.echo( + "Deposit %s cannot be rescheduled. It misses the " + "associated task." % deposit_id + ) ctx.exit(1) # Reset the deposit's state deposit.swh_id = None deposit.swh_id_context = None deposit.swh_anchor_id = None deposit.swh_anchor_id_context = None deposit.status = DEPOSIT_STATUS_VERIFIED deposit.save() # Trigger back the deposit scheduler = SWHDefaultConfig().scheduler scheduler.set_status_tasks( - [task_id], status='next_run_not_scheduled', - next_run=datetime.now()) + [task_id], status="next_run_not_scheduled", next_run=datetime.now() + ) diff --git a/swh/deposit/cli/client.py b/swh/deposit/cli/client.py index fc945fa0..820fd5ad 100644 --- a/swh/deposit/cli/client.py +++ b/swh/deposit/cli/client.py @@ -1,397 +1,490 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os import logging import sys import tempfile import uuid import json import yaml import click import xmltodict from swh.deposit.client import PublicApiDepositClient from swh.deposit.cli import deposit logger = logging.getLogger(__name__) class InputError(ValueError): """Input script error """ + pass def generate_slug(): """Generate a slug (sample purposes). """ return str(uuid.uuid4()) def _url(url): """Force the /1 api version at the end of the url (avoiding confusing issues without it). Args: url (str): api url used by cli users Returns: Top level api url to actually request """ - if not url.endswith('/1'): - url = '%s/1' % url + if not url.endswith("/1"): + url = "%s/1" % url return url def generate_metadata_file(name, external_id, authors, temp_dir): """Generate a temporary metadata file with the minimum required metadata This generates a xml file in a temporary location and returns the path to that file. This is up to the client of that function to clean up the temporary file. Args: name (str): Software's name external_id (str): External identifier (slug) or generated one authors (List[str]): List of author names Returns: Filepath to the metadata generated file """ - path = os.path.join(temp_dir, 'metadata.xml') + path = os.path.join(temp_dir, "metadata.xml") # generate a metadata file with the minimum required metadata codemetadata = { - 'entry': { - '@xmlns': "http://www.w3.org/2005/Atom", - '@xmlns:codemeta': "https://doi.org/10.5063/SCHEMA/CODEMETA-2.0", - 'codemeta:name': name, - 'codemeta:identifier': external_id, - 'codemeta:author': [{ - 'codemeta:name': author_name - } for author_name in authors], + "entry": { + "@xmlns": "http://www.w3.org/2005/Atom", + "@xmlns:codemeta": "https://doi.org/10.5063/SCHEMA/CODEMETA-2.0", + "codemeta:name": name, + "codemeta:identifier": external_id, + "codemeta:author": [ + {"codemeta:name": author_name} for author_name in authors + ], }, } - logging.debug('Temporary file: %s', path) - logging.debug('Metadata dict to generate as xml: %s', codemetadata) + logging.debug("Temporary file: %s", path) + logging.debug("Metadata dict to generate as xml: %s", codemetadata) s = xmltodict.unparse(codemetadata, pretty=True) - logging.debug('Metadata dict as xml generated: %s', s) - with open(path, 'w') as fp: + logging.debug("Metadata dict as xml generated: %s", s) + with open(path, "w") as fp: fp.write(s) return path def _client(url, username, password): """Instantiate a client to access the deposit api server Args: url (str): Deposit api server username (str): User password (str): User's password """ - client = PublicApiDepositClient({ - 'url': url, - 'auth': { - 'username': username, - 'password': password - }, - }) + client = PublicApiDepositClient( + {"url": url, "auth": {"username": username, "password": password},} + ) return client def _collection(client): """Retrieve the client's collection """ # retrieve user's collection sd_content = client.service_document() - if 'error' in sd_content: - raise InputError('Service document retrieval: %s' % ( - sd_content['error'], )) - collection = sd_content[ - 'service']['workspace']['collection']['sword:name'] + if "error" in sd_content: + raise InputError("Service document retrieval: %s" % (sd_content["error"],)) + collection = sd_content["service"]["workspace"]["collection"]["sword:name"] return collection def client_command_parse_input( - username, password, archive, metadata, - archive_deposit, metadata_deposit, - collection, slug, partial, deposit_id, replace, - url, name, authors, temp_dir): + username, + password, + archive, + metadata, + archive_deposit, + metadata_deposit, + collection, + slug, + partial, + deposit_id, + replace, + url, + name, + authors, + temp_dir, +): """Parse the client subcommand options and make sure the combination is acceptable*. If not, an InputError exception is raised explaining the issue. By acceptable, we mean: - A multipart deposit (create or update) requires: - an existing software archive - an existing metadata file or author(s) and name provided in params - A binary deposit (create/update) requires an existing software archive - A metadata deposit (create/update) requires an existing metadata file or author(s) and name provided in params - A deposit update requires a deposit_id This will not prevent all failure cases though. The remaining errors are already dealt with by the underlying api client. Raises: InputError explaining the issue Returns: dict with the following keys: 'archive': the software archive to deposit 'username': username 'password': associated password 'metadata': the metadata file to deposit 'collection': the username's associated client 'slug': the slug or external id identifying the deposit to make 'partial': if the deposit is partial or not 'client': instantiated class 'url': deposit's server main entry point 'deposit_type': deposit's type (binary, multipart, metadata) 'deposit_id': optional deposit identifier """ if archive_deposit and metadata_deposit: # too many flags use, remove redundant ones (-> multipart deposit) archive_deposit = False metadata_deposit = False if not slug: # generate one as this is mandatory slug = generate_slug() if not metadata: if name and authors: metadata = generate_metadata_file(name, slug, authors, temp_dir) elif not archive_deposit and not partial and not deposit_id: # If we meet all the following conditions: # * there is not an archive-only deposit # * it is not part of a multipart deposit (either create/update # or finish) # * it misses either name or authors raise InputError( "Either a metadata file (--metadata) or both --author and " "--name must be provided, unless this is an archive-only " - "deposit.") + "deposit." + ) elif name or authors: # If we are generating metadata, then all mandatory metadata # must be present raise InputError( "Either a metadata file (--metadata) or both --author and " - "--name must be provided.") + "--name must be provided." + ) else: # TODO: this is a multipart deposit, we might want to check that # metadata are deposited at some point pass elif name or authors: raise InputError( "Using a metadata file (--metadata) is incompatible with " - "--author and --name, which are used to generate one.") + "--author and --name, which are used to generate one." + ) if metadata_deposit: archive = None if archive_deposit: metadata = None if metadata_deposit and not metadata: raise InputError( "Metadata deposit must be provided for metadata " - "deposit (either a filepath or --name and --author)") + "deposit (either a filepath or --name and --author)" + ) if not archive and not metadata and partial: raise InputError( - 'Please provide an actionable command. See --help for more ' - 'information') + "Please provide an actionable command. See --help for more " "information" + ) if replace and not deposit_id: - raise InputError( - 'To update an existing deposit, you must provide its id') + raise InputError("To update an existing deposit, you must provide its id") client = _client(url, username, password) if not collection: collection = _collection(client) return { - 'archive': archive, - 'username': username, - 'password': password, - 'metadata': metadata, - 'collection': collection, - 'slug': slug, - 'in_progress': partial, - 'client': client, - 'url': url, - 'deposit_id': deposit_id, - 'replace': replace, + "archive": archive, + "username": username, + "password": password, + "metadata": metadata, + "collection": collection, + "slug": slug, + "in_progress": partial, + "client": client, + "url": url, + "deposit_id": deposit_id, + "replace": replace, } def _subdict(d, keys): - 'return a dict from d with only given keys' + "return a dict from d with only given keys" return {k: v for k, v in d.items() if k in keys} def deposit_create(config, logger): """Delegate the actual deposit to the deposit client. """ - logger.debug('Create deposit') + logger.debug("Create deposit") - client = config['client'] - keys = ('collection', 'archive', 'metadata', 'slug', 'in_progress') - return client.deposit_create( - **_subdict(config, keys)) + client = config["client"] + keys = ("collection", "archive", "metadata", "slug", "in_progress") + return client.deposit_create(**_subdict(config, keys)) def deposit_update(config, logger): """Delegate the actual deposit to the deposit client. """ - logger.debug('Update deposit') + logger.debug("Update deposit") - client = config['client'] - keys = ('collection', 'deposit_id', 'archive', 'metadata', - 'slug', 'in_progress', 'replace') - return client.deposit_update( - **_subdict(config, keys)) + client = config["client"] + keys = ( + "collection", + "deposit_id", + "archive", + "metadata", + "slug", + "in_progress", + "replace", + ) + return client.deposit_update(**_subdict(config, keys)) @deposit.command() -@click.option('--username', required=True, - help="(Mandatory) User's name") -@click.option('--password', required=True, - help="(Mandatory) User's associated password") -@click.option('--archive', type=click.Path(exists=True), - help='(Optional) Software archive to deposit') -@click.option('--metadata', type=click.Path(exists=True), - help="(Optional) Path to xml metadata file. If not provided, this will use a file named .metadata.xml") # noqa -@click.option('--archive-deposit/--no-archive-deposit', default=False, - help='(Optional) Software archive only deposit') -@click.option('--metadata-deposit/--no-metadata-deposit', default=False, - help='(Optional) Metadata only deposit') -@click.option('--collection', - help="(Optional) User's collection. If not provided, this will be fetched.") # noqa -@click.option('--slug', - help="""(Optional) External system information identifier. If not provided, it will be generated""") # noqa -@click.option('--partial/--no-partial', default=False, - help='(Optional) The deposit will be partial, other deposits will have to take place to finalize it.') # noqa -@click.option('--deposit-id', default=None, - help='(Optional) Update an existing partial deposit with its identifier') # noqa -@click.option('--replace/--no-replace', default=False, - help='(Optional) Update by replacing existing metadata to a deposit') # noqa -@click.option('--url', default='https://deposit.softwareheritage.org', - help="(Optional) Deposit server api endpoint. By default, https://deposit.softwareheritage.org/1") # noqa -@click.option('--verbose/--no-verbose', default=False, - help='Verbose mode') -@click.option('--name', - help='Software name') -@click.option('--author', multiple=True, - help='Software author(s), this can be repeated as many times' - ' as there are authors') -@click.option('-f', '--format', 'output_format', default='logging', - type=click.Choice(['logging', 'yaml', 'json']), - help='Output format results.') +@click.option("--username", required=True, help="(Mandatory) User's name") +@click.option( + "--password", required=True, help="(Mandatory) User's associated password" +) +@click.option( + "--archive", + type=click.Path(exists=True), + help="(Optional) Software archive to deposit", +) +@click.option( + "--metadata", + type=click.Path(exists=True), + help=( + "(Optional) Path to xml metadata file. If not provided, " + "this will use a file named .metadata.xml" + ), +) # noqa +@click.option( + "--archive-deposit/--no-archive-deposit", + default=False, + help="(Optional) Software archive only deposit", +) +@click.option( + "--metadata-deposit/--no-metadata-deposit", + default=False, + help="(Optional) Metadata only deposit", +) +@click.option( + "--collection", + help="(Optional) User's collection. If not provided, this will be fetched.", +) # noqa +@click.option( + "--slug", + help=( + "(Optional) External system information identifier. " + "If not provided, it will be generated" + ), +) # noqa +@click.option( + "--partial/--no-partial", + default=False, + help=( + "(Optional) The deposit will be partial, other deposits " + "will have to take place to finalize it." + ), +) # noqa +@click.option( + "--deposit-id", + default=None, + help="(Optional) Update an existing partial deposit with its identifier", +) # noqa +@click.option( + "--replace/--no-replace", + default=False, + help="(Optional) Update by replacing existing metadata to a deposit", +) # noqa +@click.option( + "--url", + default="https://deposit.softwareheritage.org", + help=( + "(Optional) Deposit server api endpoint. By default, " + "https://deposit.softwareheritage.org/1" + ), +) # noqa +@click.option("--verbose/--no-verbose", default=False, help="Verbose mode") +@click.option("--name", help="Software name") +@click.option( + "--author", + multiple=True, + help="Software author(s), this can be repeated as many times" + " as there are authors", +) +@click.option( + "-f", + "--format", + "output_format", + default="logging", + type=click.Choice(["logging", "yaml", "json"]), + help="Output format results.", +) @click.pass_context -def upload(ctx, - username, password, archive=None, metadata=None, - archive_deposit=False, metadata_deposit=False, - collection=None, slug=None, partial=False, deposit_id=None, - replace=False, url='https://deposit.softwareheritage.org', - verbose=False, name=None, author=None, output_format=None): +def upload( + ctx, + username, + password, + archive=None, + metadata=None, + archive_deposit=False, + metadata_deposit=False, + collection=None, + slug=None, + partial=False, + deposit_id=None, + replace=False, + url="https://deposit.softwareheritage.org", + verbose=False, + name=None, + author=None, + output_format=None, +): """Software Heritage Public Deposit Client Create/Update deposit through the command line. More documentation can be found at https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html. """ url = _url(url) config = {} with tempfile.TemporaryDirectory() as temp_dir: try: - logger.debug('Parsing cli options') + logger.debug("Parsing cli options") config = client_command_parse_input( - username, password, archive, metadata, archive_deposit, - metadata_deposit, collection, slug, partial, deposit_id, - replace, url, name, author, temp_dir) + username, + password, + archive, + metadata, + archive_deposit, + metadata_deposit, + collection, + slug, + partial, + deposit_id, + replace, + url, + name, + author, + temp_dir, + ) except InputError as e: - logger.error('Problem during parsing options: %s', e) + logger.error("Problem during parsing options: %s", e) sys.exit(1) if verbose: - logger.info("Parsed configuration: %s" % ( - config, )) + logger.info("Parsed configuration: %s" % (config,)) - deposit_id = config['deposit_id'] + deposit_id = config["deposit_id"] if deposit_id: r = deposit_update(config, logger) else: r = deposit_create(config, logger) print_result(r, output_format) @deposit.command() -@click.option('--url', default='https://deposit.softwareheritage.org', - help="(Optional) Deposit server api endpoint. By default, " - "https://deposit.softwareheritage.org/1") -@click.option('--username', required=True, - help="(Mandatory) User's name") -@click.option('--password', required=True, - help="(Mandatory) User's associated password") -@click.option('--deposit-id', default=None, - required=True, - help="Deposit identifier.") -@click.option('-f', '--format', 'output_format', default='logging', - type=click.Choice(['logging', 'yaml', 'json']), - help='Output format results.') +@click.option( + "--url", + default="https://deposit.softwareheritage.org", + help="(Optional) Deposit server api endpoint. By default, " + "https://deposit.softwareheritage.org/1", +) +@click.option("--username", required=True, help="(Mandatory) User's name") +@click.option( + "--password", required=True, help="(Mandatory) User's associated password" +) +@click.option("--deposit-id", default=None, required=True, help="Deposit identifier.") +@click.option( + "-f", + "--format", + "output_format", + default="logging", + type=click.Choice(["logging", "yaml", "json"]), + help="Output format results.", +) @click.pass_context def status(ctx, url, username, password, deposit_id, output_format): """Deposit's status """ url = _url(url) - logger.debug('Status deposit') + logger.debug("Status deposit") try: client = _client(url, username, password) collection = _collection(client) except InputError as e: - logger.error('Problem during parsing options: %s', e) + logger.error("Problem during parsing options: %s", e) sys.exit(1) - print_result(client.deposit_status( - collection=collection, deposit_id=deposit_id), - output_format) + print_result( + client.deposit_status(collection=collection, deposit_id=deposit_id), + output_format, + ) def print_result(data, output_format): - if output_format == 'json': + if output_format == "json": click.echo(json.dumps(data)) - elif output_format == 'yaml': + elif output_format == "yaml": click.echo(yaml.dump(data)) else: logger.info(data) diff --git a/swh/deposit/client.py b/swh/deposit/client.py index 58517c3c..3ac6d732 100644 --- a/swh/deposit/client.py +++ b/swh/deposit/client.py @@ -1,581 +1,639 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """Module in charge of defining an swh-deposit client """ import hashlib import os import requests import xmltodict import logging from abc import ABCMeta, abstractmethod from urllib.parse import urljoin from swh.core.config import SWHConfig logger = logging.getLogger(__name__) -def _parse(stream, encoding='utf-8'): +def _parse(stream, encoding="utf-8"): """Given a xml stream, parse the result. Args: stream (bytes/text): The stream to parse encoding (str): The encoding to use if to decode the bytes stream Returns: A dict of values corresponding to the parsed xml """ if isinstance(stream, bytes): stream = stream.decode(encoding) data = xmltodict.parse(stream, encoding=encoding, process_namespaces=False) - if 'entry' in data: - data = data['entry'] - if 'sword:error' in data: - data = data['sword:error'] + if "entry" in data: + data = data["entry"] + if "sword:error" in data: + data = data["sword:error"] return dict(data) -def _parse_with_filter(stream, encoding='utf-8', keys=[]): +def _parse_with_filter(stream, encoding="utf-8", keys=[]): """Given a xml stream, parse the result and filter with keys. Args: stream (bytes/text): The stream to parse encoding (str): The encoding to use if to decode the bytes stream keys ([str]): Keys to filter the parsed result Returns: A dict of values corresponding to the parsed xml filtered by the keys provided. """ data = _parse(stream, encoding=encoding) m = {} for key in keys: m[key] = data.get(key) return m class BaseApiDepositClient(SWHConfig): """Deposit client base class """ - CONFIG_BASE_FILENAME = 'deposit/client' + + CONFIG_BASE_FILENAME = "deposit/client" DEFAULT_CONFIG = { - 'url': ('str', 'http://localhost:5006'), - 'auth': ('dict', {}), # with optional 'username'/'password' keys + "url": ("str", "http://localhost:5006"), + "auth": ("dict", {}), # with optional 'username'/'password' keys } def __init__(self, config=None, _client=requests): super().__init__() if config is None: self.config = super().parse_config_file() else: self.config = config self._client = _client - self.base_url = self.config['url'].strip('/') + '/' - auth = self.config['auth'] + self.base_url = self.config["url"].strip("/") + "/" + auth = self.config["auth"] if auth == {}: self.auth = None else: - self.auth = (auth['username'], auth['password']) + self.auth = (auth["username"], auth["password"]) def do(self, method, url, *args, **kwargs): """Internal method to deal with requests, possibly with basic http authentication. Args: method (str): supported http methods as in self._methods' keys Returns: The request's execution """ if hasattr(self._client, method): method_fn = getattr(self._client, method) else: - raise ValueError('Development error, unsupported method %s' % ( - method)) + raise ValueError("Development error, unsupported method %s" % (method)) if self.auth: - kwargs['auth'] = self.auth + kwargs["auth"] = self.auth - full_url = urljoin(self.base_url, url.lstrip('/')) + full_url = urljoin(self.base_url, url.lstrip("/")) return method_fn(full_url, *args, **kwargs) class PrivateApiDepositClient(BaseApiDepositClient): """Private API deposit client to: - read a given deposit's archive(s) - read a given deposit's metadata - update a given deposit's status """ + def archive_get(self, archive_update_url, archive): """Retrieve the archive from the deposit to a local directory. Args: archive_update_url (str): The full deposit archive(s)'s raw content to retrieve locally archive (str): the local archive's path where to store the raw content Returns: The archive path to the local archive to load. Or None if any problem arose. """ - r = self.do('get', archive_update_url, stream=True) + r = self.do("get", archive_update_url, stream=True) if r.ok: - with open(archive, 'wb') as f: + with open(archive, "wb") as f: for chunk in r.iter_content(): f.write(chunk) return archive - msg = 'Problem when retrieving deposit archive at %s' % ( - archive_update_url, ) + msg = "Problem when retrieving deposit archive at %s" % (archive_update_url,) logger.error(msg) raise ValueError(msg) def metadata_get(self, metadata_url): """Retrieve the metadata information on a given deposit. Args: metadata_url (str): The full deposit metadata url to retrieve locally Returns: The dictionary of metadata for that deposit or None if any problem arose. """ - r = self.do('get', metadata_url) + r = self.do("get", metadata_url) if r.ok: return r.json() - msg = 'Problem when retrieving metadata at %s' % metadata_url + msg = "Problem when retrieving metadata at %s" % metadata_url logger.error(msg) raise ValueError(msg) - def status_update(self, update_status_url, status, - revision_id=None, directory_id=None, origin_url=None): + def status_update( + self, + update_status_url, + status, + revision_id=None, + directory_id=None, + origin_url=None, + ): """Update the deposit's status. Args: update_status_url (str): the full deposit's archive status (str): The status to update the deposit with revision_id (str/None): the revision's identifier to update to directory_id (str/None): the directory's identifier to update to origin_url (str/None): deposit's associated origin url """ - payload = {'status': status} + payload = {"status": status} if revision_id: - payload['revision_id'] = revision_id + payload["revision_id"] = revision_id if directory_id: - payload['directory_id'] = directory_id + payload["directory_id"] = directory_id if origin_url: - payload['origin_url'] = origin_url + payload["origin_url"] = origin_url - self.do('put', update_status_url, json=payload) + self.do("put", update_status_url, json=payload) def check(self, check_url): """Check the deposit's associated data (metadata, archive(s)) Args: check_url (str): the full deposit's check url """ - r = self.do('get', check_url) + r = self.do("get", check_url) if r.ok: data = r.json() - return data['status'] + return data["status"] - msg = 'Problem when checking deposit %s' % check_url + msg = "Problem when checking deposit %s" % check_url logger.error(msg) raise ValueError(msg) class BaseDepositClient(BaseApiDepositClient, metaclass=ABCMeta): """Base Deposit client to access the public api. """ + def __init__(self, config, error_msg=None, empty_result={}): super().__init__(config) self.error_msg = error_msg self.empty_result = empty_result @abstractmethod def compute_url(self, *args, **kwargs): """Compute api url endpoint to query.""" pass @abstractmethod def compute_method(self, *args, **kwargs): """Http method to use on the url""" pass @abstractmethod def parse_result_ok(self, xml_content): """Given an xml result from the api endpoint, parse it and returns a dict. """ pass def compute_information(self, *args, **kwargs): """Compute some more information given the inputs (e.g http headers, ...) """ return {} def parse_result_error(self, xml_content): """Given an error response in xml, parse it into a dict. Returns: dict with following keys: 'error': The error message 'detail': Some more detail about the error if any """ - return _parse_with_filter(xml_content, keys=[ - 'summary', 'detail', 'sword:verboseDescription']) + return _parse_with_filter( + xml_content, keys=["summary", "detail", "sword:verboseDescription"] + ) def do_execute(self, method, url, info): """Execute the http query to url using method and info information. By default, execute a simple query to url with the http method. Override this in daughter class to improve the default behavior if needed. """ return self.do(method, url) def execute(self, *args, **kwargs): """Main endpoint to prepare and execute the http query to the api. """ url = self.compute_url(*args, **kwargs) method = self.compute_method(*args, **kwargs) info = self.compute_information(*args, **kwargs) try: r = self.do_execute(method, url, info) except Exception as e: msg = self.error_msg % (url, e) r = self.empty_result - r.update({ - 'error': msg, - }) + r.update( + {"error": msg,} + ) return r else: if r.ok: if int(r.status_code) == 204: # 204 returns no body - return {'status': r.status_code} + return {"status": r.status_code} else: return self.parse_result_ok(r.text) else: error = self.parse_result_error(r.text) empty = self.empty_result error.update(empty) - error.update({ - 'status': r.status_code, - }) + error.update( + {"status": r.status_code,} + ) return error class ServiceDocumentDepositClient(BaseDepositClient): """Service Document information retrieval. """ + def __init__(self, config): - super().__init__(config, - error_msg='Service document failure at %s: %s', - empty_result={'collection': None}) + super().__init__( + config, + error_msg="Service document failure at %s: %s", + empty_result={"collection": None}, + ) def compute_url(self, *args, **kwargs): - return '/servicedocument/' + return "/servicedocument/" def compute_method(self, *args, **kwargs): - return 'get' + return "get" def parse_result_ok(self, xml_content): """Parse service document's success response. """ return _parse(xml_content) class StatusDepositClient(BaseDepositClient): """Status information on a deposit. """ + def __init__(self, config): - super().__init__(config, - error_msg='Status check failure at %s: %s', - empty_result={ - 'deposit_status': None, - 'deposit_status_detail': None, - 'deposit_swh_id': None, - }) + super().__init__( + config, + error_msg="Status check failure at %s: %s", + empty_result={ + "deposit_status": None, + "deposit_status_detail": None, + "deposit_swh_id": None, + }, + ) def compute_url(self, collection, deposit_id): - return '/%s/%s/status/' % (collection, deposit_id) + return "/%s/%s/status/" % (collection, deposit_id) def compute_method(self, *args, **kwargs): - return 'get' + return "get" def parse_result_ok(self, xml_content): """Given an xml content as string, returns a deposit dict. """ - return _parse_with_filter(xml_content, keys=[ - 'deposit_id', - 'deposit_status', - 'deposit_status_detail', - 'deposit_swh_id', - 'deposit_swh_id_context', - 'deposit_swh_anchor_id', - 'deposit_swh_anchor_id_context', - 'deposit_external_id', - ]) + return _parse_with_filter( + xml_content, + keys=[ + "deposit_id", + "deposit_status", + "deposit_status_detail", + "deposit_swh_id", + "deposit_swh_id_context", + "deposit_swh_anchor_id", + "deposit_swh_anchor_id_context", + "deposit_external_id", + ], + ) class BaseCreateDepositClient(BaseDepositClient): """Deposit client base class to post new deposit. """ + def __init__(self, config): - super().__init__(config, - error_msg='Post Deposit failure at %s: %s', - empty_result={ - 'deposit_id': None, - 'deposit_status': None, - }) + super().__init__( + config, + error_msg="Post Deposit failure at %s: %s", + empty_result={"deposit_id": None, "deposit_status": None,}, + ) def compute_url(self, collection, *args, **kwargs): - return '/%s/' % collection + return "/%s/" % collection def compute_method(self, *args, **kwargs): - return 'post' + return "post" def parse_result_ok(self, xml_content): """Given an xml content as string, returns a deposit dict. """ - return _parse_with_filter(xml_content, keys=['deposit_id', - 'deposit_status', - 'deposit_status_detail', - 'deposit_date']) - - def _compute_information(self, collection, filepath, in_progress, slug, - is_archive=True): + return _parse_with_filter( + xml_content, + keys=[ + "deposit_id", + "deposit_status", + "deposit_status_detail", + "deposit_date", + ], + ) + + def _compute_information( + self, collection, filepath, in_progress, slug, is_archive=True + ): """Given a filepath, compute necessary information on that file. Args: filepath (str): Path to a file is_archive (bool): is it an archive or not? Returns: dict with keys: 'content-type': content type associated 'md5sum': md5 sum 'filename': filename """ filename = os.path.basename(filepath) if is_archive: - md5sum = hashlib.md5(open(filepath, 'rb').read()).hexdigest() - extension = filename.split('.')[-1] - if 'zip' in extension: - content_type = 'application/zip' + md5sum = hashlib.md5(open(filepath, "rb").read()).hexdigest() + extension = filename.split(".")[-1] + if "zip" in extension: + content_type = "application/zip" else: - content_type = 'application/x-tar' + content_type = "application/x-tar" else: content_type = None md5sum = None return { - 'slug': slug, - 'in_progress': in_progress, - 'content-type': content_type, - 'md5sum': md5sum, - 'filename': filename, - 'filepath': filepath, + "slug": slug, + "in_progress": in_progress, + "content-type": content_type, + "md5sum": md5sum, + "filename": filename, + "filepath": filepath, } - def compute_information(self, collection, filepath, in_progress, slug, - is_archive=True, **kwargs): - info = self._compute_information(collection, filepath, in_progress, - slug, is_archive=is_archive) - info['headers'] = self.compute_headers(info) + def compute_information( + self, collection, filepath, in_progress, slug, is_archive=True, **kwargs + ): + info = self._compute_information( + collection, filepath, in_progress, slug, is_archive=is_archive + ) + info["headers"] = self.compute_headers(info) return info def do_execute(self, method, url, info): - with open(info['filepath'], 'rb') as f: - return self.do(method, url, data=f, headers=info['headers']) + with open(info["filepath"], "rb") as f: + return self.do(method, url, data=f, headers=info["headers"]) class CreateArchiveDepositClient(BaseCreateDepositClient): """Post an archive (binary) deposit client.""" + def compute_headers(self, info): return { - 'SLUG': info['slug'], - 'CONTENT_MD5': info['md5sum'], - 'IN-PROGRESS': str(info['in_progress']), - 'CONTENT-TYPE': info['content-type'], - 'CONTENT-DISPOSITION': 'attachment; filename=%s' % ( - info['filename'], ), + "SLUG": info["slug"], + "CONTENT_MD5": info["md5sum"], + "IN-PROGRESS": str(info["in_progress"]), + "CONTENT-TYPE": info["content-type"], + "CONTENT-DISPOSITION": "attachment; filename=%s" % (info["filename"],), } class UpdateArchiveDepositClient(CreateArchiveDepositClient): """Update (add/replace) an archive (binary) deposit client.""" + def compute_url(self, collection, *args, deposit_id=None, **kwargs): - return '/%s/%s/media/' % (collection, deposit_id) + return "/%s/%s/media/" % (collection, deposit_id) def compute_method(self, *args, replace=False, **kwargs): - return 'put' if replace else 'post' + return "put" if replace else "post" class CreateMetadataDepositClient(BaseCreateDepositClient): """Post a metadata deposit client.""" + def compute_headers(self, info): return { - 'SLUG': info['slug'], - 'IN-PROGRESS': str(info['in_progress']), - 'CONTENT-TYPE': 'application/atom+xml;type=entry', + "SLUG": info["slug"], + "IN-PROGRESS": str(info["in_progress"]), + "CONTENT-TYPE": "application/atom+xml;type=entry", } class UpdateMetadataDepositClient(CreateMetadataDepositClient): """Update (add/replace) a metadata deposit client.""" + def compute_url(self, collection, *args, deposit_id=None, **kwargs): - return '/%s/%s/metadata/' % (collection, deposit_id) + return "/%s/%s/metadata/" % (collection, deposit_id) def compute_method(self, *args, replace=False, **kwargs): - return 'put' if replace else 'post' + return "put" if replace else "post" class CreateMultipartDepositClient(BaseCreateDepositClient): """Create a multipart deposit client.""" + def _multipart_info(self, info, info_meta): files = [ - ('file', - (info['filename'], - open(info['filepath'], 'rb'), - info['content-type'])), - ('atom', - (info_meta['filename'], - open(info_meta['filepath'], 'rb'), - 'application/atom+xml')), + ( + "file", + (info["filename"], open(info["filepath"], "rb"), info["content-type"]), + ), + ( + "atom", + ( + info_meta["filename"], + open(info_meta["filepath"], "rb"), + "application/atom+xml", + ), + ), ] headers = { - 'SLUG': info['slug'], - 'CONTENT_MD5': info['md5sum'], - 'IN-PROGRESS': str(info['in_progress']), + "SLUG": info["slug"], + "CONTENT_MD5": info["md5sum"], + "IN-PROGRESS": str(info["in_progress"]), } return files, headers - def compute_information(self, collection, archive, metadata, - in_progress, slug, **kwargs): - info = self._compute_information( - collection, archive, in_progress, slug) + def compute_information( + self, collection, archive, metadata, in_progress, slug, **kwargs + ): + info = self._compute_information(collection, archive, in_progress, slug) info_meta = self._compute_information( - collection, metadata, in_progress, slug, is_archive=False) + collection, metadata, in_progress, slug, is_archive=False + ) files, headers = self._multipart_info(info, info_meta) - return {'files': files, 'headers': headers} + return {"files": files, "headers": headers} def do_execute(self, method, url, info): - return self.do( - method, url, files=info['files'], headers=info['headers']) + return self.do(method, url, files=info["files"], headers=info["headers"]) class UpdateMultipartDepositClient(CreateMultipartDepositClient): """Update a multipart deposit client.""" + def compute_url(self, collection, *args, deposit_id=None, **kwargs): - return '/%s/%s/metadata/' % (collection, deposit_id) + return "/%s/%s/metadata/" % (collection, deposit_id) def compute_method(self, *args, replace=False, **kwargs): - return 'put' if replace else 'post' + return "put" if replace else "post" class PublicApiDepositClient(BaseApiDepositClient): """Public api deposit client.""" + def service_document(self): """Retrieve service document endpoint's information.""" return ServiceDocumentDepositClient(self.config).execute() def deposit_status(self, collection, deposit_id): """Retrieve status information on a deposit.""" - return StatusDepositClient(self.config).execute( - collection, deposit_id) + return StatusDepositClient(self.config).execute(collection, deposit_id) - def deposit_create(self, collection, slug, archive=None, - metadata=None, in_progress=False): + def deposit_create( + self, collection, slug, archive=None, metadata=None, in_progress=False + ): """Create a new deposit (archive, metadata, both as multipart).""" if archive and not metadata: return CreateArchiveDepositClient(self.config).execute( - collection, archive, in_progress, slug) + collection, archive, in_progress, slug + ) elif not archive and metadata: return CreateMetadataDepositClient(self.config).execute( - collection, metadata, in_progress, slug, - is_archive=False) + collection, metadata, in_progress, slug, is_archive=False + ) else: return CreateMultipartDepositClient(self.config).execute( - collection, archive, metadata, in_progress, - slug) - - def deposit_update(self, collection, deposit_id, slug, archive=None, - metadata=None, in_progress=False, - replace=False): + collection, archive, metadata, in_progress, slug + ) + + def deposit_update( + self, + collection, + deposit_id, + slug, + archive=None, + metadata=None, + in_progress=False, + replace=False, + ): """Update (add/replace) existing deposit (archive, metadata, both).""" r = self.deposit_status(collection, deposit_id) - if 'error' in r: + if "error" in r: return r - status = r['deposit_status'] - if status != 'partial': + status = r["deposit_status"] + if status != "partial": return { - 'error': "You can only act on deposit with status 'partial'", - 'detail': "The deposit %s has status '%s'" % ( - deposit_id, status), - 'deposit_status': status, - 'deposit_id': deposit_id, + "error": "You can only act on deposit with status 'partial'", + "detail": "The deposit %s has status '%s'" % (deposit_id, status), + "deposit_status": status, + "deposit_id": deposit_id, } if archive and not metadata: r = UpdateArchiveDepositClient(self.config).execute( - collection, archive, in_progress, slug, - deposit_id=deposit_id, replace=replace) + collection, + archive, + in_progress, + slug, + deposit_id=deposit_id, + replace=replace, + ) elif not archive and metadata: r = UpdateMetadataDepositClient(self.config).execute( - collection, metadata, in_progress, slug, - deposit_id=deposit_id, replace=replace) + collection, + metadata, + in_progress, + slug, + deposit_id=deposit_id, + replace=replace, + ) else: r = UpdateMultipartDepositClient(self.config).execute( - collection, archive, metadata, in_progress, - slug, deposit_id=deposit_id, replace=replace) - - if 'error' in r: + collection, + archive, + metadata, + in_progress, + slug, + deposit_id=deposit_id, + replace=replace, + ) + + if "error" in r: return r return self.deposit_status(collection, deposit_id) diff --git a/swh/deposit/config.py b/swh/deposit/config.py index 912fb602..410370d0 100644 --- a/swh/deposit/config.py +++ b/swh/deposit/config.py @@ -1,111 +1,110 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os import logging from typing import Any, Dict, Tuple from swh.core.config import SWHConfig from swh.scheduler import get_scheduler # IRIs (Internationalized Resource identifier) sword 2.0 specified -EDIT_SE_IRI = 'edit_se_iri' -EM_IRI = 'em_iri' -CONT_FILE_IRI = 'cont_file_iri' -SD_IRI = 'servicedocument' -COL_IRI = 'upload' -STATE_IRI = 'state_iri' -PRIVATE_GET_RAW_CONTENT = 'private-download' -PRIVATE_CHECK_DEPOSIT = 'check-deposit' -PRIVATE_PUT_DEPOSIT = 'private-update' -PRIVATE_GET_DEPOSIT_METADATA = 'private-read' -PRIVATE_LIST_DEPOSITS = 'private-deposit-list' - -ARCHIVE_KEY = 'archive' -METADATA_KEY = 'metadata' -RAW_METADATA_KEY = 'raw-metadata' - -ARCHIVE_TYPE = 'archive' -METADATA_TYPE = 'metadata' - - -AUTHORIZED_PLATFORMS = ['development', 'production', 'testing'] - -DEPOSIT_STATUS_REJECTED = 'rejected' -DEPOSIT_STATUS_PARTIAL = 'partial' -DEPOSIT_STATUS_DEPOSITED = 'deposited' -DEPOSIT_STATUS_VERIFIED = 'verified' -DEPOSIT_STATUS_LOAD_SUCCESS = 'done' -DEPOSIT_STATUS_LOAD_FAILURE = 'failed' +EDIT_SE_IRI = "edit_se_iri" +EM_IRI = "em_iri" +CONT_FILE_IRI = "cont_file_iri" +SD_IRI = "servicedocument" +COL_IRI = "upload" +STATE_IRI = "state_iri" +PRIVATE_GET_RAW_CONTENT = "private-download" +PRIVATE_CHECK_DEPOSIT = "check-deposit" +PRIVATE_PUT_DEPOSIT = "private-update" +PRIVATE_GET_DEPOSIT_METADATA = "private-read" +PRIVATE_LIST_DEPOSITS = "private-deposit-list" + +ARCHIVE_KEY = "archive" +METADATA_KEY = "metadata" +RAW_METADATA_KEY = "raw-metadata" + +ARCHIVE_TYPE = "archive" +METADATA_TYPE = "metadata" + + +AUTHORIZED_PLATFORMS = ["development", "production", "testing"] + +DEPOSIT_STATUS_REJECTED = "rejected" +DEPOSIT_STATUS_PARTIAL = "partial" +DEPOSIT_STATUS_DEPOSITED = "deposited" +DEPOSIT_STATUS_VERIFIED = "verified" +DEPOSIT_STATUS_LOAD_SUCCESS = "done" +DEPOSIT_STATUS_LOAD_FAILURE = "failed" # Revision author for deposit SWH_PERSON = { - 'name': 'Software Heritage', - 'fullname': 'Software Heritage', - 'email': 'robot@softwareheritage.org' + "name": "Software Heritage", + "fullname": "Software Heritage", + "email": "robot@softwareheritage.org", } def setup_django_for(platform=None, config_file=None): """Setup function for command line tools (swh.deposit.create_user) to initialize the needed db access. Note: Do not import any django related module prior to this function call. Otherwise, this will raise an django.core.exceptions.ImproperlyConfigured error message. Args: platform (str): the platform the scheduling is running config_file (str): Extra configuration file (typically for the production platform) Raises: ValueError in case of wrong platform inputs. """ if platform is not None: if platform not in AUTHORIZED_PLATFORMS: - raise ValueError('Platform should be one of %s' % - AUTHORIZED_PLATFORMS) - if 'DJANGO_SETTINGS_MODULE' not in os.environ: - os.environ['DJANGO_SETTINGS_MODULE'] = ( - 'swh.deposit.settings.%s' % platform) + raise ValueError("Platform should be one of %s" % AUTHORIZED_PLATFORMS) + if "DJANGO_SETTINGS_MODULE" not in os.environ: + os.environ["DJANGO_SETTINGS_MODULE"] = "swh.deposit.settings.%s" % platform if config_file: - os.environ.setdefault('SWH_CONFIG_FILENAME', config_file) + os.environ.setdefault("SWH_CONFIG_FILENAME", config_file) import django + django.setup() class SWHDefaultConfig(SWHConfig): """Mixin intended to enrich views with SWH configuration. """ - CONFIG_BASE_FILENAME = 'deposit/server' + + CONFIG_BASE_FILENAME = "deposit/server" DEFAULT_CONFIG = { - 'max_upload_size': ('int', 209715200), - 'checks': ('bool', True), - 'scheduler': ('dict', { - 'cls': 'remote', - 'args': { - 'url': 'http://localhost:5008/' - } - }) + "max_upload_size": ("int", 209715200), + "checks": ("bool", True), + "scheduler": ( + "dict", + {"cls": "remote", "args": {"url": "http://localhost:5008/"}}, + ), } ADDITIONAL_CONFIG = {} # type: Dict[str, Tuple[str, Any]] def __init__(self, **config): super().__init__() self.config = self.parse_config_file( - additional_configs=[self.ADDITIONAL_CONFIG]) + additional_configs=[self.ADDITIONAL_CONFIG] + ) self.config.update(config) - self.log = logging.getLogger('swh.deposit') - if self.config.get('scheduler'): - self.scheduler = get_scheduler(**self.config['scheduler']) + self.log = logging.getLogger("swh.deposit") + if self.config.get("scheduler"): + self.scheduler = get_scheduler(**self.config["scheduler"]) diff --git a/swh/deposit/errors.py b/swh/deposit/errors.py index bd51a451..68cc346b 100644 --- a/swh/deposit/errors.py +++ b/swh/deposit/errors.py @@ -1,147 +1,151 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """Module in charge of providing the standard sword errors """ from rest_framework import status from django.shortcuts import render -FORBIDDEN = 'forbidden' -UNAUTHORIZED = 'unauthorized' -NOT_FOUND = 'unknown' -BAD_REQUEST = 'bad-request' -ERROR_CONTENT = 'error-content' -CHECKSUM_MISMATCH = 'checksum-mismatch' -MEDIATION_NOT_ALLOWED = 'mediation-not-allowed' -METHOD_NOT_ALLOWED = 'method-not-allowed' -MAX_UPLOAD_SIZE_EXCEEDED = 'max_upload_size_exceeded' -PARSING_ERROR = 'parsing-error' +FORBIDDEN = "forbidden" +UNAUTHORIZED = "unauthorized" +NOT_FOUND = "unknown" +BAD_REQUEST = "bad-request" +ERROR_CONTENT = "error-content" +CHECKSUM_MISMATCH = "checksum-mismatch" +MEDIATION_NOT_ALLOWED = "mediation-not-allowed" +METHOD_NOT_ALLOWED = "method-not-allowed" +MAX_UPLOAD_SIZE_EXCEEDED = "max_upload_size_exceeded" +PARSING_ERROR = "parsing-error" class ParserError(ValueError): """Specific parsing error detected when parsing the xml metadata input """ + pass ERRORS = { FORBIDDEN: { - 'status': status.HTTP_403_FORBIDDEN, - 'iri': 'http://purl.org/net/sword/error/ErrorForbidden', - 'tag': 'sword:ErrorForbidden', + "status": status.HTTP_403_FORBIDDEN, + "iri": "http://purl.org/net/sword/error/ErrorForbidden", + "tag": "sword:ErrorForbidden", }, UNAUTHORIZED: { - 'status': status.HTTP_401_UNAUTHORIZED, - 'iri': 'http://purl.org/net/sword/error/ErrorUnauthorized', - 'tag': 'sword:ErrorUnauthorized', + "status": status.HTTP_401_UNAUTHORIZED, + "iri": "http://purl.org/net/sword/error/ErrorUnauthorized", + "tag": "sword:ErrorUnauthorized", }, NOT_FOUND: { - 'status': status.HTTP_404_NOT_FOUND, - 'iri': 'http://purl.org/net/sword/error/ErrorNotFound', - 'tag': 'sword:ErrorNotFound', + "status": status.HTTP_404_NOT_FOUND, + "iri": "http://purl.org/net/sword/error/ErrorNotFound", + "tag": "sword:ErrorNotFound", }, ERROR_CONTENT: { - 'status': status.HTTP_415_UNSUPPORTED_MEDIA_TYPE, - 'iri': 'http://purl.org/net/sword/error/ErrorContent', - 'tag': 'sword:ErrorContent', + "status": status.HTTP_415_UNSUPPORTED_MEDIA_TYPE, + "iri": "http://purl.org/net/sword/error/ErrorContent", + "tag": "sword:ErrorContent", }, CHECKSUM_MISMATCH: { - 'status': status.HTTP_412_PRECONDITION_FAILED, - 'iri': 'http://purl.org/net/sword/error/ErrorChecksumMismatch', - 'tag': 'sword:ErrorChecksumMismatch', + "status": status.HTTP_412_PRECONDITION_FAILED, + "iri": "http://purl.org/net/sword/error/ErrorChecksumMismatch", + "tag": "sword:ErrorChecksumMismatch", }, BAD_REQUEST: { - 'status': status.HTTP_400_BAD_REQUEST, - 'iri': 'http://purl.org/net/sword/error/ErrorBadRequest', - 'tag': 'sword:ErrorBadRequest', + "status": status.HTTP_400_BAD_REQUEST, + "iri": "http://purl.org/net/sword/error/ErrorBadRequest", + "tag": "sword:ErrorBadRequest", }, PARSING_ERROR: { - 'status': status.HTTP_400_BAD_REQUEST, - 'iri': 'http://purl.org/net/sword/error/ErrorBadRequest', - 'tag': 'sword:ErrorBadRequest', + "status": status.HTTP_400_BAD_REQUEST, + "iri": "http://purl.org/net/sword/error/ErrorBadRequest", + "tag": "sword:ErrorBadRequest", }, MEDIATION_NOT_ALLOWED: { - 'status': status.HTTP_412_PRECONDITION_FAILED, - 'iri': 'http://purl.org/net/sword/error/MediationNotAllowed', - 'tag': 'sword:MediationNotAllowed', + "status": status.HTTP_412_PRECONDITION_FAILED, + "iri": "http://purl.org/net/sword/error/MediationNotAllowed", + "tag": "sword:MediationNotAllowed", }, METHOD_NOT_ALLOWED: { - 'status': status.HTTP_405_METHOD_NOT_ALLOWED, - 'iri': 'http://purl.org/net/sword/error/MethodNotAllowed', - 'tag': 'sword:MethodNotAllowed', + "status": status.HTTP_405_METHOD_NOT_ALLOWED, + "iri": "http://purl.org/net/sword/error/MethodNotAllowed", + "tag": "sword:MethodNotAllowed", }, MAX_UPLOAD_SIZE_EXCEEDED: { - 'status': status.HTTP_413_REQUEST_ENTITY_TOO_LARGE, - 'iri': 'http://purl.org/net/sword/error/MaxUploadSizeExceeded', - 'tag': 'sword:MaxUploadSizeExceeded', + "status": status.HTTP_413_REQUEST_ENTITY_TOO_LARGE, + "iri": "http://purl.org/net/sword/error/MaxUploadSizeExceeded", + "tag": "sword:MaxUploadSizeExceeded", }, } def make_error_dict(key, summary=None, verbose_description=None): """Utility function to factorize error message dictionary. Args: key (str): Error status key referenced in swh.deposit.errors module summary (str/None): Error message clarifying the status verbose_description (str/None): A more verbose description or work around a potential problem. Returns: Dictionary with key 'error' detailing the 'status' and associated 'message' """ return { - 'error': { - 'key': key, - 'summary': summary, - 'verboseDescription': verbose_description, + "error": { + "key": key, + "summary": summary, + "verboseDescription": verbose_description, }, } def make_error_response_from_dict(req, error): """Utility function to return an http response with error detail. Args: req (Request): original request error (dict): Error described as dict, typically generated from the make_error_dict function. Returns: HttpResponse with detailed error. """ - error_information = ERRORS[error['key']] + error_information = ERRORS[error["key"]] context = error context.update(error_information) - return render(req, 'deposit/error.xml', - context=error, - content_type='application/xml', - status=error_information['status']) + return render( + req, + "deposit/error.xml", + context=error, + content_type="application/xml", + status=error_information["status"], + ) def make_error_response(req, key, summary=None, verbose_description=None): """Utility function to create an http response with detailed error. Args: req (Request): original request key (str): Error status key referenced in swh.deposit.errors module summary (str): Error message clarifying the status verbose_description (str / None): A more verbose description or work around a potential problem. Returns: Dictionary with key 'error' detailing the 'status' and associated 'message' """ error = make_error_dict(key, summary, verbose_description) - return make_error_response_from_dict(req, error['error']) + return make_error_response_from_dict(req, error["error"]) diff --git a/swh/deposit/loader/__init__.py b/swh/deposit/loader/__init__.py index 6e1af4af..afbb69d1 100644 --- a/swh/deposit/loader/__init__.py +++ b/swh/deposit/loader/__init__.py @@ -1,7 +1,7 @@ # Copyright (C) 2019 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information def register(): - return {'task_modules': ['%s.tasks' % __name__]} + return {"task_modules": ["%s.tasks" % __name__]} diff --git a/swh/deposit/loader/checker.py b/swh/deposit/loader/checker.py index a5a1c798..17861ac7 100644 --- a/swh/deposit/loader/checker.py +++ b/swh/deposit/loader/checker.py @@ -1,35 +1,35 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import logging from typing import Mapping from swh.deposit.client import PrivateApiDepositClient logger = logging.getLogger(__name__) -class DepositChecker(): +class DepositChecker: """Deposit checker implementation. Trigger deposit's checks through the private api. """ + def __init__(self, client=None): super().__init__() self.client = client if client else PrivateApiDepositClient() def check(self, deposit_check_url: str) -> Mapping[str, str]: status = None try: r = self.client.check(deposit_check_url) - status = 'eventful' if r == 'verified' else 'failed' + status = "eventful" if r == "verified" else "failed" except Exception: - logger.exception("Failure during check on '%s'" % ( - deposit_check_url, )) - status = 'failed' - return {'status': status} + logger.exception("Failure during check on '%s'" % (deposit_check_url,)) + status = "failed" + return {"status": status} diff --git a/swh/deposit/loader/tasks.py b/swh/deposit/loader/tasks.py index 7bc23248..22c257eb 100644 --- a/swh/deposit/loader/tasks.py +++ b/swh/deposit/loader/tasks.py @@ -1,18 +1,18 @@ # Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from celery import shared_task from swh.deposit.loader.checker import DepositChecker -@shared_task(name=__name__ + '.ChecksDepositTsk') +@shared_task(name=__name__ + ".ChecksDepositTsk") def check_deposit(deposit_check_url): """Check a deposit's status Args: see :func:`DepositChecker.check`. """ checker = DepositChecker() return checker.check(deposit_check_url) diff --git a/swh/deposit/manage.py b/swh/deposit/manage.py index 2be65708..80fbcb86 100755 --- a/swh/deposit/manage.py +++ b/swh/deposit/manage.py @@ -1,53 +1,53 @@ #!/usr/bin/env python3 # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os import sys from swh.core import config DEFAULT_CONFIG = { - 'port': ('int', 5006), - 'host': ('str', '127.0.0.1'), + "port": ("int", 5006), + "host": ("str", "127.0.0.1"), } if __name__ == "__main__": - settings_file = 'development' - if sys.argv[1] == 'runserver': + settings_file = "development" + if sys.argv[1] == "runserver": # override the default host:port for the 'runserver' task - conf = config.load_named_config('deposit/server', - default_conf=DEFAULT_CONFIG) - extra_cmd = ['%s:%s' % (conf['host'], conf['port'])] + conf = config.load_named_config("deposit/server", default_conf=DEFAULT_CONFIG) + extra_cmd = ["%s:%s" % (conf["host"], conf["port"])] cmd = sys.argv + extra_cmd - elif sys.argv[1] == 'test': + elif sys.argv[1] == "test": # override the default settings file to read in testing mode - settings_file = 'testing' + settings_file = "testing" cmd = sys.argv else: # otherwise, do nothing cmd = sys.argv - os.environ.setdefault('DJANGO_SETTINGS_MODULE', - 'swh.deposit.settings.%s' % settings_file) + os.environ.setdefault( + "DJANGO_SETTINGS_MODULE", "swh.deposit.settings.%s" % settings_file + ) try: from django.core.management import execute_from_command_line except ImportError: # The above import may fail for some other reason. Ensure that the # issue is really that Django is missing to avoid masking other # exceptions on Python 2. try: import django # noqa except ImportError: raise ImportError( "Couldn't import Django. Are you sure it's installed and " "available on your PYTHONPATH environment variable? Did you " "forget to activate a virtual environment?" ) raise execute_from_command_line(cmd) diff --git a/swh/deposit/migrations/0001_initial.py b/swh/deposit/migrations/0001_initial.py index 1d49e299..bc91890a 100644 --- a/swh/deposit/migrations/0001_initial.py +++ b/swh/deposit/migrations/0001_initial.py @@ -1,138 +1,141 @@ # -*- coding: utf-8 -*- # Generated by Django 1.10.7 on 2017-09-24 10:03 from __future__ import unicode_literals from django.conf import settings import django.contrib.auth.models import django.contrib.postgres.fields import django.contrib.postgres.fields.jsonb from django.db import migrations, models import django.db.models.deletion import django.utils.timezone class Migration(migrations.Migration): initial = True dependencies = [ - ('auth', '0008_alter_user_username_max_length'), + ("auth", "0008_alter_user_username_max_length"), ] operations = [ migrations.CreateModel( - name='Dbversion', + name="Dbversion", fields=[ - ('version', models.IntegerField( - primary_key=True, serialize=False)), - ('release', models.DateTimeField( - default=django.utils.timezone.now, null=True)), - ('description', models.TextField(blank=True, null=True)), + ("version", models.IntegerField(primary_key=True, serialize=False)), + ( + "release", + models.DateTimeField(default=django.utils.timezone.now, null=True), + ), + ("description", models.TextField(blank=True, null=True)), ], - options={ - 'db_table': 'dbversion', - }, + options={"db_table": "dbversion",}, ), migrations.CreateModel( - name='Deposit', + name="Deposit", fields=[ - ('id', models.BigAutoField(primary_key=True, serialize=False)), - ('reception_date', models.DateTimeField(auto_now_add=True)), - ('complete_date', models.DateTimeField(null=True)), - ('external_id', models.TextField()), - ('swh_id', models.TextField(blank=True, null=True)), - ('status', models.TextField( - choices=[('partial', 'partial'), - ('expired', 'expired'), - ('ready', 'ready'), - ('injecting', 'injecting'), - ('success', 'success'), - ('failure', 'failure')], - default='partial')), + ("id", models.BigAutoField(primary_key=True, serialize=False)), + ("reception_date", models.DateTimeField(auto_now_add=True)), + ("complete_date", models.DateTimeField(null=True)), + ("external_id", models.TextField()), + ("swh_id", models.TextField(blank=True, null=True)), + ( + "status", + models.TextField( + choices=[ + ("partial", "partial"), + ("expired", "expired"), + ("ready", "ready"), + ("injecting", "injecting"), + ("success", "success"), + ("failure", "failure"), + ], + default="partial", + ), + ), ], - options={ - 'db_table': 'deposit', - }, + options={"db_table": "deposit",}, ), migrations.CreateModel( - name='DepositClient', + name="DepositClient", fields=[ - ('user_ptr', models.OneToOneField( - auto_created=True, - on_delete=django.db.models.deletion.CASCADE, - parent_link=True, - primary_key=True, - serialize=False, - to=settings.AUTH_USER_MODEL)), - ('collections', - django.contrib.postgres.fields.ArrayField( - base_field=models.IntegerField(), - null=True, - size=None)), - ], - options={ - 'db_table': 'deposit_client', - }, - bases=('auth.user',), - managers=[ - ('objects', django.contrib.auth.models.UserManager()), + ( + "user_ptr", + models.OneToOneField( + auto_created=True, + on_delete=django.db.models.deletion.CASCADE, + parent_link=True, + primary_key=True, + serialize=False, + to=settings.AUTH_USER_MODEL, + ), + ), + ( + "collections", + django.contrib.postgres.fields.ArrayField( + base_field=models.IntegerField(), null=True, size=None + ), + ), ], + options={"db_table": "deposit_client",}, + bases=("auth.user",), + managers=[("objects", django.contrib.auth.models.UserManager()),], ), migrations.CreateModel( - name='DepositCollection', + name="DepositCollection", fields=[ - ('id', models.BigAutoField(primary_key=True, serialize=False)), - ('name', models.TextField()), + ("id", models.BigAutoField(primary_key=True, serialize=False)), + ("name", models.TextField()), ], - options={ - 'db_table': 'deposit_collection', - }, + options={"db_table": "deposit_collection",}, ), migrations.CreateModel( - name='DepositRequest', + name="DepositRequest", fields=[ - ('id', models.BigAutoField(primary_key=True, serialize=False)), - ('date', models.DateTimeField(auto_now_add=True)), - ('metadata', - django.contrib.postgres.fields.jsonb.JSONField(null=True)), - ('deposit', - models.ForeignKey( - on_delete=django.db.models.deletion.DO_NOTHING, - to='deposit.Deposit')), + ("id", models.BigAutoField(primary_key=True, serialize=False)), + ("date", models.DateTimeField(auto_now_add=True)), + ("metadata", django.contrib.postgres.fields.jsonb.JSONField(null=True)), + ( + "deposit", + models.ForeignKey( + on_delete=django.db.models.deletion.DO_NOTHING, + to="deposit.Deposit", + ), + ), ], - options={ - 'db_table': 'deposit_request', - }, + options={"db_table": "deposit_request",}, ), migrations.CreateModel( - name='DepositRequestType', + name="DepositRequestType", fields=[ - ('id', models.BigAutoField(primary_key=True, serialize=False)), - ('name', models.TextField()), + ("id", models.BigAutoField(primary_key=True, serialize=False)), + ("name", models.TextField()), ], - options={ - 'db_table': 'deposit_request_type', - }, + options={"db_table": "deposit_request_type",}, ), migrations.AddField( - model_name='depositrequest', - name='type', + model_name="depositrequest", + name="type", field=models.ForeignKey( on_delete=django.db.models.deletion.DO_NOTHING, - to='deposit.DepositRequestType'), + to="deposit.DepositRequestType", + ), ), migrations.AddField( - model_name='deposit', - name='client', + model_name="deposit", + name="client", field=models.ForeignKey( on_delete=django.db.models.deletion.DO_NOTHING, - to='deposit.DepositClient'), + to="deposit.DepositClient", + ), ), migrations.AddField( - model_name='deposit', - name='collection', + model_name="deposit", + name="collection", field=models.ForeignKey( on_delete=django.db.models.deletion.DO_NOTHING, - to='deposit.DepositCollection'), + to="deposit.DepositCollection", + ), ), ] diff --git a/swh/deposit/migrations/0002_depositrequest_archive.py b/swh/deposit/migrations/0002_depositrequest_archive.py index ad78ba4f..68e0b080 100644 --- a/swh/deposit/migrations/0002_depositrequest_archive.py +++ b/swh/deposit/migrations/0002_depositrequest_archive.py @@ -1,23 +1,23 @@ # -*- coding: utf-8 -*- # Generated by Django 1.10.7 on 2017-10-05 10:36 from __future__ import unicode_literals from django.db import migrations, models import swh.deposit.models class Migration(migrations.Migration): dependencies = [ - ('deposit', '0001_initial'), + ("deposit", "0001_initial"), ] operations = [ migrations.AddField( - model_name='depositrequest', - name='archive', + model_name="depositrequest", + name="archive", field=models.FileField( - null=True, - upload_to=swh.deposit.models.client_directory_path), + null=True, upload_to=swh.deposit.models.client_directory_path + ), ), ] diff --git a/swh/deposit/migrations/0003_temporaryarchive.py b/swh/deposit/migrations/0003_temporaryarchive.py index 135a9718..737fb2b6 100644 --- a/swh/deposit/migrations/0003_temporaryarchive.py +++ b/swh/deposit/migrations/0003_temporaryarchive.py @@ -1,26 +1,24 @@ # -*- coding: utf-8 -*- # Generated by Django 1.10.7 on 2017-10-06 13:06 from __future__ import unicode_literals from django.db import migrations, models class Migration(migrations.Migration): dependencies = [ - ('deposit', '0002_depositrequest_archive'), + ("deposit", "0002_depositrequest_archive"), ] operations = [ migrations.CreateModel( - name='TemporaryArchive', + name="TemporaryArchive", fields=[ - ('id', models.BigAutoField(primary_key=True, serialize=False)), - ('path', models.TextField()), - ('date', models.DateTimeField(auto_now_add=True)), + ("id", models.BigAutoField(primary_key=True, serialize=False)), + ("path", models.TextField()), + ("date", models.DateTimeField(auto_now_add=True)), ], - options={ - 'db_table': 'deposit_temporary_archive', - }, + options={"db_table": "deposit_temporary_archive",}, ), ] diff --git a/swh/deposit/migrations/0004_delete_temporaryarchive.py b/swh/deposit/migrations/0004_delete_temporaryarchive.py index 6fa9d84b..8c995aea 100644 --- a/swh/deposit/migrations/0004_delete_temporaryarchive.py +++ b/swh/deposit/migrations/0004_delete_temporaryarchive.py @@ -1,18 +1,16 @@ # -*- coding: utf-8 -*- # Generated by Django 1.10.7 on 2017-10-18 09:03 from __future__ import unicode_literals from django.db import migrations class Migration(migrations.Migration): dependencies = [ - ('deposit', '0003_temporaryarchive'), + ("deposit", "0003_temporaryarchive"), ] operations = [ - migrations.DeleteModel( - name='TemporaryArchive', - ), + migrations.DeleteModel(name="TemporaryArchive",), ] diff --git a/swh/deposit/migrations/0005_auto_20171019_1436.py b/swh/deposit/migrations/0005_auto_20171019_1436.py index 75e9901a..3c6b8fe8 100644 --- a/swh/deposit/migrations/0005_auto_20171019_1436.py +++ b/swh/deposit/migrations/0005_auto_20171019_1436.py @@ -1,29 +1,32 @@ # -*- coding: utf-8 -*- # Generated by Django 1.10.7 on 2017-10-19 14:36 from __future__ import unicode_literals from django.db import migrations, models class Migration(migrations.Migration): dependencies = [ - ('deposit', '0004_delete_temporaryarchive'), + ("deposit", "0004_delete_temporaryarchive"), ] operations = [ migrations.AlterField( - model_name='deposit', - name='status', + model_name="deposit", + name="status", field=models.TextField( - choices=[('partial', 'partial'), - ('expired', 'expired'), - ('ready-for-checks', 'ready-for-checks'), - ('ready', 'ready'), - ('rejected', 'rejected'), - ('injecting', 'injecting'), - ('success', 'success'), - ('failure', 'failure')], - default='partial'), + choices=[ + ("partial", "partial"), + ("expired", "expired"), + ("ready-for-checks", "ready-for-checks"), + ("ready", "ready"), + ("rejected", "rejected"), + ("injecting", "injecting"), + ("success", "success"), + ("failure", "failure"), + ], + default="partial", + ), ), ] diff --git a/swh/deposit/migrations/0006_depositclient_url.py b/swh/deposit/migrations/0006_depositclient_url.py index dedb4155..b6baf889 100644 --- a/swh/deposit/migrations/0006_depositclient_url.py +++ b/swh/deposit/migrations/0006_depositclient_url.py @@ -1,22 +1,21 @@ # -*- coding: utf-8 -*- # Generated by Django 1.10.7 on 2017-11-07 13:12 from __future__ import unicode_literals from django.db import migrations, models class Migration(migrations.Migration): dependencies = [ - ('deposit', '0005_auto_20171019_1436'), + ("deposit", "0005_auto_20171019_1436"), ] operations = [ migrations.AddField( - model_name='depositclient', - name='url', - field=models.TextField( - default='https://hal.archives-ouvertes.fr/'), + model_name="depositclient", + name="url", + field=models.TextField(default="https://hal.archives-ouvertes.fr/"), preserve_default=False, ), ] diff --git a/swh/deposit/migrations/0007_auto_20171129_1609.py b/swh/deposit/migrations/0007_auto_20171129_1609.py index 371197bb..ee2f158a 100644 --- a/swh/deposit/migrations/0007_auto_20171129_1609.py +++ b/swh/deposit/migrations/0007_auto_20171129_1609.py @@ -1,20 +1,18 @@ # -*- coding: utf-8 -*- # Generated by Django 1.10.7 on 2017-11-29 16:09 from __future__ import unicode_literals from django.db import migrations, models class Migration(migrations.Migration): dependencies = [ - ('deposit', '0006_depositclient_url'), + ("deposit", "0006_depositclient_url"), ] operations = [ migrations.AlterField( - model_name='depositclient', - name='url', - field=models.TextField(null=False), + model_name="depositclient", name="url", field=models.TextField(null=False), ), ] diff --git a/swh/deposit/migrations/0008_auto_20171130_1513.py b/swh/deposit/migrations/0008_auto_20171130_1513.py index e8da74c2..0ccea160 100644 --- a/swh/deposit/migrations/0008_auto_20171130_1513.py +++ b/swh/deposit/migrations/0008_auto_20171130_1513.py @@ -1,29 +1,32 @@ # -*- coding: utf-8 -*- # Generated by Django 1.10.7 on 2017-11-30 15:13 from __future__ import unicode_literals from django.db import migrations, models class Migration(migrations.Migration): dependencies = [ - ('deposit', '0007_auto_20171129_1609'), + ("deposit", "0007_auto_20171129_1609"), ] operations = [ migrations.AlterField( - model_name='deposit', - name='status', + model_name="deposit", + name="status", field=models.TextField( - choices=[('partial', 'partial'), - ('expired', 'expired'), - ('ready-for-checks', 'ready-for-checks'), - ('ready-for-load', 'ready-for-load'), - ('rejected', 'rejected'), - ('loading', 'loading'), - ('success', 'success'), - ('failure', 'failure')], - default='partial'), + choices=[ + ("partial", "partial"), + ("expired", "expired"), + ("ready-for-checks", "ready-for-checks"), + ("ready-for-load", "ready-for-load"), + ("rejected", "rejected"), + ("loading", "loading"), + ("success", "success"), + ("failure", "failure"), + ], + default="partial", + ), ), ] diff --git a/swh/deposit/migrations/0009_deposit_parent.py b/swh/deposit/migrations/0009_deposit_parent.py index e2fb56a1..6683cfc7 100644 --- a/swh/deposit/migrations/0009_deposit_parent.py +++ b/swh/deposit/migrations/0009_deposit_parent.py @@ -1,24 +1,25 @@ # -*- coding: utf-8 -*- # Generated by Django 1.10.7 on 2017-12-04 12:49 from __future__ import unicode_literals from django.db import migrations, models import django.db.models.deletion class Migration(migrations.Migration): dependencies = [ - ('deposit', '0008_auto_20171130_1513'), + ("deposit", "0008_auto_20171130_1513"), ] operations = [ migrations.AddField( - model_name='deposit', - name='parent', + model_name="deposit", + name="parent", field=models.ForeignKey( null=True, on_delete=django.db.models.deletion.CASCADE, - to='deposit.Deposit'), + to="deposit.Deposit", + ), ), ] diff --git a/swh/deposit/migrations/0010_auto_20180110_0953.py b/swh/deposit/migrations/0010_auto_20180110_0953.py index 799771ad..469208ed 100644 --- a/swh/deposit/migrations/0010_auto_20180110_0953.py +++ b/swh/deposit/migrations/0010_auto_20180110_0953.py @@ -1,26 +1,24 @@ # -*- coding: utf-8 -*- # Generated by Django 1.10.7 on 2018-01-10 09:53 from __future__ import unicode_literals from django.db import migrations, models class Migration(migrations.Migration): dependencies = [ - ('deposit', '0009_deposit_parent'), + ("deposit", "0009_deposit_parent"), ] operations = [ migrations.RenameField( - model_name='depositclient', - old_name='url', - new_name='provider_url', + model_name="depositclient", old_name="url", new_name="provider_url", ), migrations.AddField( - model_name='depositclient', - name='domain', - field=models.TextField(default=''), + model_name="depositclient", + name="domain", + field=models.TextField(default=""), preserve_default=False, ), ] diff --git a/swh/deposit/migrations/0011_auto_20180115_1510.py b/swh/deposit/migrations/0011_auto_20180115_1510.py index 3c1645cb..4d929f0b 100644 --- a/swh/deposit/migrations/0011_auto_20180115_1510.py +++ b/swh/deposit/migrations/0011_auto_20180115_1510.py @@ -1,29 +1,32 @@ # -*- coding: utf-8 -*- # Generated by Django 1.10.7 on 2018-01-15 15:10 from __future__ import unicode_literals from django.db import migrations, models class Migration(migrations.Migration): dependencies = [ - ('deposit', '0010_auto_20180110_0953'), + ("deposit", "0010_auto_20180110_0953"), ] operations = [ migrations.AlterField( - model_name='deposit', - name='status', + model_name="deposit", + name="status", field=models.TextField( - choices=[('partial', 'partial'), - ('expired', 'expired'), - ('deposited', 'deposited'), - ('verified', 'verified'), - ('rejected', 'rejected'), - ('loading', 'loading'), - ('done', 'done'), - ('failed', 'failed')], - default='partial'), + choices=[ + ("partial", "partial"), + ("expired", "expired"), + ("deposited", "deposited"), + ("verified", "verified"), + ("rejected", "rejected"), + ("loading", "loading"), + ("done", "done"), + ("failed", "failed"), + ], + default="partial", + ), ), ] diff --git a/swh/deposit/migrations/0012_deposit_status_detail.py b/swh/deposit/migrations/0012_deposit_status_detail.py index b6825072..d301e304 100644 --- a/swh/deposit/migrations/0012_deposit_status_detail.py +++ b/swh/deposit/migrations/0012_deposit_status_detail.py @@ -1,21 +1,21 @@ # -*- coding: utf-8 -*- # Generated by Django 1.11.14 on 2018-07-09 13:08 from __future__ import unicode_literals import django.contrib.postgres.fields.jsonb from django.db import migrations class Migration(migrations.Migration): dependencies = [ - ('deposit', '0011_auto_20180115_1510'), + ("deposit", "0011_auto_20180115_1510"), ] operations = [ migrations.AddField( - model_name='deposit', - name='status_detail', + model_name="deposit", + name="status_detail", field=django.contrib.postgres.fields.jsonb.JSONField(null=True), ), ] diff --git a/swh/deposit/migrations/0013_depositrequest_raw_metadata.py b/swh/deposit/migrations/0013_depositrequest_raw_metadata.py index 4d0f0ea9..58055b37 100644 --- a/swh/deposit/migrations/0013_depositrequest_raw_metadata.py +++ b/swh/deposit/migrations/0013_depositrequest_raw_metadata.py @@ -1,20 +1,20 @@ # -*- coding: utf-8 -*- # Generated by Django 1.11.14 on 2018-07-19 13:07 from __future__ import unicode_literals from django.db import migrations, models class Migration(migrations.Migration): dependencies = [ - ('deposit', '0012_deposit_status_detail'), + ("deposit", "0012_deposit_status_detail"), ] operations = [ migrations.AddField( - model_name='depositrequest', - name='raw_metadata', + model_name="depositrequest", + name="raw_metadata", field=models.TextField(null=True), ), ] diff --git a/swh/deposit/migrations/0014_auto_20180720_1221.py b/swh/deposit/migrations/0014_auto_20180720_1221.py index 50d4d9a8..50ff8ba9 100644 --- a/swh/deposit/migrations/0014_auto_20180720_1221.py +++ b/swh/deposit/migrations/0014_auto_20180720_1221.py @@ -1,30 +1,30 @@ # -*- coding: utf-8 -*- # Generated by Django 1.11.14 on 2018-07-20 12:21 from __future__ import unicode_literals from django.db import migrations, models class Migration(migrations.Migration): dependencies = [ - ('deposit', '0013_depositrequest_raw_metadata'), + ("deposit", "0013_depositrequest_raw_metadata"), ] operations = [ migrations.AddField( - model_name='deposit', - name='swh_anchor_id', + model_name="deposit", + name="swh_anchor_id", field=models.TextField(blank=True, null=True), ), migrations.AddField( - model_name='deposit', - name='swh_anchor_id_context', + model_name="deposit", + name="swh_anchor_id_context", field=models.TextField(blank=True, null=True), ), migrations.AddField( - model_name='deposit', - name='swh_id_context', + model_name="deposit", + name="swh_id_context", field=models.TextField(blank=True, null=True), ), ] diff --git a/swh/deposit/migrations/0015_depositrequest_typemigration.py b/swh/deposit/migrations/0015_depositrequest_typemigration.py index 61942737..a9974287 100644 --- a/swh/deposit/migrations/0015_depositrequest_typemigration.py +++ b/swh/deposit/migrations/0015_depositrequest_typemigration.py @@ -1,46 +1,40 @@ # -*- coding: utf-8 -*- # Generated by Django 1.11.18 on 2019-04-12 16:40 from __future__ import unicode_literals from django.db import migrations, models def populate_deposit_type2(apps, schema_editor): # We can't import the DepositRequest model directly as it may be a newer # version than this migration expects. We use the historical version. - DepositRequest = apps.get_model('deposit', 'DepositRequest') + DepositRequest = apps.get_model("deposit", "DepositRequest") for deposit in DepositRequest.objects.all(): deposit.type2 = deposit.type.name deposit.save() class Migration(migrations.Migration): dependencies = [ - ('deposit', '0014_auto_20180720_1221'), + ("deposit", "0014_auto_20180720_1221"), ] operations = [ migrations.AddField( - model_name='depositrequest', - name='type2', + model_name="depositrequest", + name="type2", field=models.CharField( - choices=[('archive', 'archive'), - ('metadata', 'metadata')], - max_length=8, null=True), + choices=[("archive", "archive"), ("metadata", "metadata")], + max_length=8, + null=True, + ), ), migrations.RunPython(populate_deposit_type2), - migrations.RemoveField( - model_name='depositrequest', - name='type', - ), + migrations.RemoveField(model_name="depositrequest", name="type",), migrations.RenameField( - model_name='depositrequest', - old_name='type2', - new_name='type', - ), - migrations.DeleteModel( - name='DepositRequestType', + model_name="depositrequest", old_name="type2", new_name="type", ), + migrations.DeleteModel(name="DepositRequestType",), ] diff --git a/swh/deposit/migrations/0016_auto_20190507_1408.py b/swh/deposit/migrations/0016_auto_20190507_1408.py index 15c578ef..1d4aa257 100644 --- a/swh/deposit/migrations/0016_auto_20190507_1408.py +++ b/swh/deposit/migrations/0016_auto_20190507_1408.py @@ -1,31 +1,33 @@ # -*- coding: utf-8 -*- # Generated by Django 1.11.18 on 2019-05-07 14:08 from __future__ import unicode_literals from django.db import migrations, models class Migration(migrations.Migration): dependencies = [ - ('deposit', '0015_depositrequest_typemigration'), + ("deposit", "0015_depositrequest_typemigration"), ] operations = [ migrations.AddField( - model_name='deposit', - name='check_task_id', + model_name="deposit", + name="check_task_id", field=models.TextField( blank=True, null=True, - verbose_name="Scheduler's associated checking task id"), + verbose_name="Scheduler's associated checking task id", + ), ), migrations.AddField( - model_name='deposit', - name='load_task_id', + model_name="deposit", + name="load_task_id", field=models.TextField( blank=True, null=True, - verbose_name="Scheduler's associated loading task id"), + verbose_name="Scheduler's associated loading task id", + ), ), ] diff --git a/swh/deposit/migrations/0017_auto_20190925_0906.py b/swh/deposit/migrations/0017_auto_20190925_0906.py index 739cbc78..3aa8305c 100644 --- a/swh/deposit/migrations/0017_auto_20190925_0906.py +++ b/swh/deposit/migrations/0017_auto_20190925_0906.py @@ -1,24 +1,25 @@ # -*- coding: utf-8 -*- # Generated by Django 1.11.23 on 2019-09-25 09:06 from __future__ import unicode_literals from django.db import migrations, models import django.db.models.deletion class Migration(migrations.Migration): dependencies = [ - ('deposit', '0016_auto_20190507_1408'), + ("deposit", "0016_auto_20190507_1408"), ] operations = [ migrations.AlterField( - model_name='deposit', - name='parent', + model_name="deposit", + name="parent", field=models.ForeignKey( null=True, on_delete=django.db.models.deletion.PROTECT, - to='deposit.Deposit'), + to="deposit.Deposit", + ), ), ] diff --git a/swh/deposit/models.py b/swh/deposit/models.py index 1a7a78ea..4d986367 100644 --- a/swh/deposit/models.py +++ b/swh/deposit/models.py @@ -1,228 +1,235 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information # Generated from: # cd swh_deposit && \ # python3 -m manage inspectdb from django.contrib.postgres.fields import JSONField, ArrayField from django.contrib.auth.models import User, UserManager from django.db import models from django.utils.timezone import now from .config import ( - DEPOSIT_STATUS_VERIFIED, DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_PARTIAL, - DEPOSIT_STATUS_LOAD_SUCCESS, DEPOSIT_STATUS_LOAD_FAILURE, - DEPOSIT_STATUS_REJECTED, ARCHIVE_TYPE, METADATA_TYPE + DEPOSIT_STATUS_VERIFIED, + DEPOSIT_STATUS_DEPOSITED, + DEPOSIT_STATUS_PARTIAL, + DEPOSIT_STATUS_LOAD_SUCCESS, + DEPOSIT_STATUS_LOAD_FAILURE, + DEPOSIT_STATUS_REJECTED, + ARCHIVE_TYPE, + METADATA_TYPE, ) class Dbversion(models.Model): """Db version """ + version = models.IntegerField(primary_key=True) release = models.DateTimeField(default=now, null=True) description = models.TextField(blank=True, null=True) class Meta: - db_table = 'dbversion' + db_table = "dbversion" def __str__(self): - return str({ - 'version': self.version, - 'release': self.release, - 'description': self.description - }) + return str( + { + "version": self.version, + "release": self.release, + "description": self.description, + } + ) """Possible status""" DEPOSIT_STATUS = [ (DEPOSIT_STATUS_PARTIAL, DEPOSIT_STATUS_PARTIAL), - ('expired', 'expired'), + ("expired", "expired"), (DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_DEPOSITED), (DEPOSIT_STATUS_VERIFIED, DEPOSIT_STATUS_VERIFIED), (DEPOSIT_STATUS_REJECTED, DEPOSIT_STATUS_REJECTED), - ('loading', 'loading'), + ("loading", "loading"), (DEPOSIT_STATUS_LOAD_SUCCESS, DEPOSIT_STATUS_LOAD_SUCCESS), (DEPOSIT_STATUS_LOAD_FAILURE, DEPOSIT_STATUS_LOAD_FAILURE), ] """Possible status and the detailed meaning.""" DEPOSIT_STATUS_DETAIL = { - DEPOSIT_STATUS_PARTIAL: 'Deposit is partially received. To finalize it, ' - 'In-Progress header should be false', - 'expired': 'Deposit has been there too long and is now ' - 'deemed ready to be garbage collected', - DEPOSIT_STATUS_DEPOSITED: 'Deposit is ready for additional checks ' - '(tarball ok, metadata, etc...)', - DEPOSIT_STATUS_VERIFIED: 'Deposit is fully received, checked, and ' - 'ready for loading', - DEPOSIT_STATUS_REJECTED: 'Deposit failed the checks', - 'loading': "Loading is ongoing on swh's side", - DEPOSIT_STATUS_LOAD_SUCCESS: 'The deposit has been successfully ' - 'loaded into the Software Heritage archive', - DEPOSIT_STATUS_LOAD_FAILURE: 'The deposit loading into the ' - 'Software Heritage archive failed', + DEPOSIT_STATUS_PARTIAL: "Deposit is partially received. To finalize it, " + "In-Progress header should be false", + "expired": "Deposit has been there too long and is now " + "deemed ready to be garbage collected", + DEPOSIT_STATUS_DEPOSITED: "Deposit is ready for additional checks " + "(tarball ok, metadata, etc...)", + DEPOSIT_STATUS_VERIFIED: "Deposit is fully received, checked, and " + "ready for loading", + DEPOSIT_STATUS_REJECTED: "Deposit failed the checks", + "loading": "Loading is ongoing on swh's side", + DEPOSIT_STATUS_LOAD_SUCCESS: "The deposit has been successfully " + "loaded into the Software Heritage archive", + DEPOSIT_STATUS_LOAD_FAILURE: "The deposit loading into the " + "Software Heritage archive failed", } class DepositClient(User): """Deposit client """ + collections = ArrayField(models.IntegerField(), null=True) objects = UserManager() # type: ignore # this typing hint is due to a mypy/django-stubs limitation, # see https://github.com/typeddjango/django-stubs/issues/174 provider_url = models.TextField(null=False) domain = models.TextField(null=False) class Meta: - db_table = 'deposit_client' + db_table = "deposit_client" def __str__(self): - return str({ - 'id': self.id, - 'collections': self.collections, - 'username': super().username, - 'domain': self.domain, - 'provider_url': self.provider_url, - }) + return str( + { + "id": self.id, + "collections": self.collections, + "username": super().username, + "domain": self.domain, + "provider_url": self.provider_url, + } + ) class Deposit(models.Model): """Deposit reception table """ + id = models.BigAutoField(primary_key=True) # First deposit reception date reception_date = models.DateTimeField(auto_now_add=True) # Date when the deposit is deemed complete and ready for loading complete_date = models.DateTimeField(null=True) # collection concerned by the deposit - collection = models.ForeignKey( - 'DepositCollection', models.DO_NOTHING) + collection = models.ForeignKey("DepositCollection", models.DO_NOTHING) # Deposit's external identifier external_id = models.TextField() # Deposit client - client = models.ForeignKey('DepositClient', models.DO_NOTHING) + client = models.ForeignKey("DepositClient", models.DO_NOTHING) # SWH's loading result identifier swh_id = models.TextField(blank=True, null=True) swh_id_context = models.TextField(blank=True, null=True) swh_anchor_id = models.TextField(blank=True, null=True) swh_anchor_id_context = models.TextField(blank=True, null=True) # Deposit's status regarding loading - status = models.TextField( - choices=DEPOSIT_STATUS, - default=DEPOSIT_STATUS_PARTIAL) + status = models.TextField(choices=DEPOSIT_STATUS, default=DEPOSIT_STATUS_PARTIAL) status_detail = JSONField(null=True) # deposit can have one parent - parent = models.ForeignKey('self', on_delete=models.PROTECT, null=True) + parent = models.ForeignKey("self", on_delete=models.PROTECT, null=True) check_task_id = models.TextField( - blank=True, null=True, - verbose_name="Scheduler's associated checking task id" + blank=True, null=True, verbose_name="Scheduler's associated checking task id" ) load_task_id = models.TextField( - blank=True, null=True, - verbose_name="Scheduler's associated loading task id" + blank=True, null=True, verbose_name="Scheduler's associated loading task id" ) class Meta: - db_table = 'deposit' + db_table = "deposit" def __str__(self): d = { - 'id': self.id, - 'reception_date': self.reception_date, - 'collection': self.collection.name, - 'external_id': self.external_id, - 'client': self.client.username, - 'status': self.status, + "id": self.id, + "reception_date": self.reception_date, + "collection": self.collection.name, + "external_id": self.external_id, + "client": self.client.username, + "status": self.status, } if self.status in (DEPOSIT_STATUS_REJECTED): - d['status_detail'] = self.status_detail + d["status_detail"] = self.status_detail return str(d) @property def origin_url(self): - return '%s/%s' % (self.client.provider_url.rstrip('/'), - self.external_id) + return "%s/%s" % (self.client.provider_url.rstrip("/"), self.external_id) def client_directory_path(instance, filename): """Callable to upload archive in MEDIA_ROOT/user_/ Args: instance (DepositRequest): DepositRequest concerned by the upload filename (str): Filename of the uploaded file Returns: A path to be prefixed by the MEDIA_ROOT to access physically to the file uploaded. """ - return 'client_{0}/{1}'.format(instance.deposit.client.id, filename) + return "client_{0}/{1}".format(instance.deposit.client.id, filename) -REQUEST_TYPES = [(ARCHIVE_TYPE, ARCHIVE_TYPE), - (METADATA_TYPE, METADATA_TYPE)] +REQUEST_TYPES = [(ARCHIVE_TYPE, ARCHIVE_TYPE), (METADATA_TYPE, METADATA_TYPE)] class DepositRequest(models.Model): """Deposit request associated to one deposit. """ + id = models.BigAutoField(primary_key=True) # Deposit concerned by the request deposit = models.ForeignKey(Deposit, models.DO_NOTHING) date = models.DateTimeField(auto_now_add=True) # Deposit request information on the data to inject # this can be null when type is 'archive' metadata = JSONField(null=True) raw_metadata = models.TextField(null=True) # this can be null when type is 'metadata' archive = models.FileField(null=True, upload_to=client_directory_path) - type = models.CharField(max_length=8, - choices=REQUEST_TYPES, - null=True) + type = models.CharField(max_length=8, choices=REQUEST_TYPES, null=True) class Meta: - db_table = 'deposit_request' + db_table = "deposit_request" def __str__(self): meta = None if self.metadata: from json import dumps + meta = dumps(self.metadata) archive_name = None if self.archive: archive_name = self.archive.name - return str({ - 'id': self.id, - 'deposit': self.deposit, - 'metadata': meta, - 'archive': archive_name - }) + return str( + { + "id": self.id, + "deposit": self.deposit, + "metadata": meta, + "archive": archive_name, + } + ) class DepositCollection(models.Model): id = models.BigAutoField(primary_key=True) # Human readable name for the collection type e.g HAL, arXiv, etc... name = models.TextField() class Meta: - db_table = 'deposit_collection' + db_table = "deposit_collection" def __str__(self): - return str({'id': self.id, 'name': self.name}) + return str({"id": self.id, "name": self.name}) diff --git a/swh/deposit/parsers.py b/swh/deposit/parsers.py index 70f328fd..0cb49065 100644 --- a/swh/deposit/parsers.py +++ b/swh/deposit/parsers.py @@ -1,92 +1,96 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """Module in charge of defining parsers with SWORD 2.0 supported mediatypes. """ import xmltodict from django.conf import settings from rest_framework.parsers import BaseParser from rest_framework.parsers import FileUploadParser from rest_framework.parsers import MultiPartParser from xml.parsers.expat import ExpatError from swh.deposit.errors import ParserError class SWHFileUploadZipParser(FileUploadParser): """File upload parser limited to zip archive. """ - media_type = 'application/zip' + + media_type = "application/zip" class SWHFileUploadTarParser(FileUploadParser): """File upload parser limited to tarball (tar, tar.gz, tar.*) archives. """ - media_type = 'application/x-tar' + + media_type = "application/x-tar" class SWHXMLParser(BaseParser): """ XML parser. """ - media_type = 'application/xml' + + media_type = "application/xml" def parse(self, stream, media_type=None, parser_context=None): """ Parses the incoming bytestream as XML and returns the resulting data. """ parser_context = parser_context or {} - encoding = parser_context.get('encoding', settings.DEFAULT_CHARSET) - data = xmltodict.parse(stream, encoding=encoding, - process_namespaces=False) - if 'entry' in data: - data = data['entry'] + encoding = parser_context.get("encoding", settings.DEFAULT_CHARSET) + data = xmltodict.parse(stream, encoding=encoding, process_namespaces=False) + if "entry" in data: + data = data["entry"] return data class SWHAtomEntryParser(SWHXMLParser): """Atom entry parser limited to specific mediatype """ - media_type = 'application/atom+xml;type=entry' + + media_type = "application/atom+xml;type=entry" def parse(self, stream, media_type=None, parser_context=None): # We do not actually want to parse the stream yet # because we want to keep the raw data as well # this is done later in the atom entry call # (cf. swh.deposit.api.common.SWHBaseDeposit._atom_entry) return stream class SWHMultiPartParser(MultiPartParser): """Multipart parser limited to a subset of mediatypes. """ - media_type = 'multipart/*; *' + + media_type = "multipart/*; *" def parse_xml(raw_content): """Parse xml body. Args: raw_content (bytes): The content to parse Raises: ParserError in case of a malformed xml Returns: content parsed as dict. """ try: return SWHXMLParser().parse(raw_content) except ExpatError as e: raise ParserError(str(e)) diff --git a/swh/deposit/settings/common.py b/swh/deposit/settings/common.py index 4df0c3be..7d0f212c 100644 --- a/swh/deposit/settings/common.py +++ b/swh/deposit/settings/common.py @@ -1,119 +1,117 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """ Django settings for swh project. Generated by 'django-admin startproject' using Django 1.10.7. For more information on this file, see https://docs.djangoproject.com/en/1.10/topics/settings/ For the full list of settings and their values, see https://docs.djangoproject.com/en/1.10/ref/settings/ """ import os # Build paths inside the project like this: os.path.join(BASE_DIR, ...) BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) # Quick-start development settings - unsuitable for production # See https://docs.djangoproject.com/en/1.10/howto/deployment/checklist/ -ALLOWED_HOSTS = ['127.0.0.1', 'localhost'] +ALLOWED_HOSTS = ["127.0.0.1", "localhost"] # Application definition INSTALLED_APPS = [ - 'django.contrib.auth', - 'django.contrib.contenttypes', - 'django.contrib.staticfiles', - 'django.contrib.sessions', - 'django.contrib.messages', - 'django.contrib.postgres', # for JSONField, ArrayField - 'swh.deposit.apps.DepositConfig', + "django.contrib.auth", + "django.contrib.contenttypes", + "django.contrib.staticfiles", + "django.contrib.sessions", + "django.contrib.messages", + "django.contrib.postgres", # for JSONField, ArrayField + "swh.deposit.apps.DepositConfig", ] MIDDLEWARE = [ - 'django.middleware.security.SecurityMiddleware', - 'django.contrib.sessions.middleware.SessionMiddleware', - 'django.middleware.common.CommonMiddleware', - 'django.middleware.csrf.CsrfViewMiddleware', - 'django.contrib.auth.middleware.AuthenticationMiddleware', - 'django.contrib.messages.middleware.MessageMiddleware', - 'django.middleware.clickjacking.XFrameOptionsMiddleware', - 'swh.deposit.auth.WrapBasicAuthenticationResponseMiddleware', + "django.middleware.security.SecurityMiddleware", + "django.contrib.sessions.middleware.SessionMiddleware", + "django.middleware.common.CommonMiddleware", + "django.middleware.csrf.CsrfViewMiddleware", + "django.contrib.auth.middleware.AuthenticationMiddleware", + "django.contrib.messages.middleware.MessageMiddleware", + "django.middleware.clickjacking.XFrameOptionsMiddleware", + "swh.deposit.auth.WrapBasicAuthenticationResponseMiddleware", ] -ROOT_URLCONF = 'swh.deposit.urls' +ROOT_URLCONF = "swh.deposit.urls" TEMPLATES = [ { - 'BACKEND': 'django.template.backends.django.DjangoTemplates', - 'DIRS': [], - 'APP_DIRS': True, - 'OPTIONS': { - 'context_processors': [ - 'django.template.context_processors.debug', - 'django.template.context_processors.request', - 'django.contrib.auth.context_processors.auth', - 'django.contrib.messages.context_processors.messages', + "BACKEND": "django.template.backends.django.DjangoTemplates", + "DIRS": [], + "APP_DIRS": True, + "OPTIONS": { + "context_processors": [ + "django.template.context_processors.debug", + "django.template.context_processors.request", + "django.contrib.auth.context_processors.auth", + "django.contrib.messages.context_processors.messages", ], }, }, ] # Password validation # https://docs.djangoproject.com/en/1.10/ref/settings/#auth-password-validators AUTH_PASSWORD_VALIDATORS = [ { - 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', # noqa + "NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator", # noqa }, + {"NAME": "django.contrib.auth.password_validation.MinimumLengthValidator",}, # noqa { - 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', # noqa + "NAME": "django.contrib.auth.password_validation.CommonPasswordValidator", # noqa }, { - 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', # noqa - }, - { - 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', # noqa + "NAME": "django.contrib.auth.password_validation.NumericPasswordValidator", # noqa }, ] # Internationalization # https://docs.djangoproject.com/en/1.10/topics/i18n/ -LANGUAGE_CODE = 'en-us' +LANGUAGE_CODE = "en-us" -TIME_ZONE = 'UTC' +TIME_ZONE = "UTC" USE_I18N = True USE_L10N = True USE_TZ = True # Static files (CSS, JavaScript, Images) # https://docs.djangoproject.com/en/1.10/howto/static-files/ -STATIC_URL = '/static/' +STATIC_URL = "/static/" REST_FRAMEWORK = { - 'DEFAULT_AUTHENTICATION_CLASSES': ( - 'rest_framework.authentication.BasicAuthentication', + "DEFAULT_AUTHENTICATION_CLASSES": ( + "rest_framework.authentication.BasicAuthentication", ), } FILE_UPLOAD_HANDLERS = [ "django.core.files.uploadhandler.MemoryFileUploadHandler", "django.core.files.uploadhandler.TemporaryFileUploadHandler", ] diff --git a/swh/deposit/settings/development.py b/swh/deposit/settings/development.py index 4575a26c..86670558 100644 --- a/swh/deposit/settings/development.py +++ b/swh/deposit/settings/development.py @@ -1,59 +1,52 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from .common import * # noqa # SECURITY WARNING: don't run with debug turned on in production! DEBUG = True # SECURITY WARNING: keep the secret key used in production secret! -SECRET_KEY = 'development-key' +SECRET_KEY = "development-key" # https://docs.djangoproject.com/en/1.10/ref/settings/#logging LOGGING = { - 'version': 1, - 'disable_existing_loggers': False, - 'formatters': { - 'standard': { - 'format': "[%(asctime)s] %(levelname)s [%(name)s:%(lineno)s] %(message)s", # noqa - 'datefmt': "%d/%b/%Y %H:%M:%S" + "version": 1, + "disable_existing_loggers": False, + "formatters": { + "standard": { + "format": "[%(asctime)s] %(levelname)s [%(name)s:%(lineno)s] %(message)s", # noqa + "datefmt": "%d/%b/%Y %H:%M:%S", }, }, - 'handlers': { - 'console': { - 'level': 'DEBUG', - 'class': 'logging.StreamHandler', - 'formatter': 'standard' + "handlers": { + "console": { + "level": "DEBUG", + "class": "logging.StreamHandler", + "formatter": "standard", }, }, - 'loggers': { - 'django': { - 'handlers': ['console'], - 'level': 'DEBUG', - 'propagate': True, + "loggers": { + "django": {"handlers": ["console"], "level": "DEBUG", "propagate": True,}, + "django.db.backends": { + "handlers": ["console"], + "level": "INFO", + "propagate": False, }, - 'django.db.backends': { - 'handlers': ['console'], - 'level': 'INFO', - 'propagate': False, - }, - 'swh.deposit': { - 'handlers': ['console'], - 'level': 'DEBUG', - }, - } + "swh.deposit": {"handlers": ["console"], "level": "DEBUG",}, + }, } # https://docs.djangoproject.com/en/1.10/ref/settings/#databases DATABASES = { - 'default': { - 'ENGINE': 'django.db.backends.postgresql', - 'NAME': 'swh-deposit-dev', # this is no longer used in test env + "default": { + "ENGINE": "django.db.backends.postgresql", + "NAME": "swh-deposit-dev", # this is no longer used in test env } } # https://docs.djangoproject.com/en/1.11/ref/settings/#std:setting-MEDIA_ROOT # SECURITY WARNING: Override this in the production.py module -MEDIA_ROOT = '/tmp/swh-deposit/uploads/' +MEDIA_ROOT = "/tmp/swh-deposit/uploads/" diff --git a/swh/deposit/settings/production.py b/swh/deposit/settings/production.py index dea38a8d..725536b0 100644 --- a/swh/deposit/settings/production.py +++ b/swh/deposit/settings/production.py @@ -1,113 +1,110 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os from .common import * # noqa from .common import ALLOWED_HOSTS from swh.core import config -ALLOWED_HOSTS += ['deposit.softwareheritage.org'] +ALLOWED_HOSTS += ["deposit.softwareheritage.org"] # Setup support for proxy headers USE_X_FORWARDED_HOST = True -SECURE_PROXY_SSL_HEADER = ('HTTP_X_FORWARDED_PROTO', 'https') +SECURE_PROXY_SSL_HEADER = ("HTTP_X_FORWARDED_PROTO", "https") DEBUG = False # Database # https://docs.djangoproject.com/en/1.10/ref/settings/#databases # https://docs.djangoproject.com/en/1.10/ref/settings/#std:setting-DATABASES # https://docs.djangoproject.com/en/1.10/howto/deployment/checklist/#databases # Retrieve the deposit's configuration file # and check the required setup is ok # If not raise an error explaining the errors -config_file = os.environ.get('SWH_CONFIG_FILENAME') +config_file = os.environ.get("SWH_CONFIG_FILENAME") if not config_file: - raise ValueError('Production: SWH_CONFIG_FILENANE must be set to the' - ' configuration file needed!') + raise ValueError( + "Production: SWH_CONFIG_FILENANE must be set to the" + " configuration file needed!" + ) if not os.path.exists(config_file): - raise ValueError('Production: configuration file %s does not exist!' % ( - config_file, )) + raise ValueError( + "Production: configuration file %s does not exist!" % (config_file,) + ) conf = config.load_named_config(config_file) if not conf: - raise ValueError( - 'Production: configuration %s does not exist.' % ( - config_file, )) + raise ValueError("Production: configuration %s does not exist." % (config_file,)) -for key in ('scheduler', 'private'): +for key in ("scheduler", "private"): if not conf.get(key): raise ValueError( - "Production: invalid configuration; missing %s config entry." % ( - key, )) + "Production: invalid configuration; missing %s config entry." % (key,) + ) -ALLOWED_HOSTS += conf.get('allowed_hosts', []) +ALLOWED_HOSTS += conf.get("allowed_hosts", []) -private_conf = conf['private'] -SECRET_KEY = private_conf['secret_key'] +private_conf = conf["private"] +SECRET_KEY = private_conf["secret_key"] # https://docs.djangoproject.com/en/1.10/ref/settings/#logging LOGGING = { - 'version': 1, - 'disable_existing_loggers': False, - 'formatters': { - 'standard': { - 'format': "[%(asctime)s] %(levelname)s [%(name)s:%(lineno)s] %(message)s", # noqa - 'datefmt': "%d/%b/%Y %H:%M:%S" + "version": 1, + "disable_existing_loggers": False, + "formatters": { + "standard": { + "format": "[%(asctime)s] %(levelname)s [%(name)s:%(lineno)s] %(message)s", # noqa + "datefmt": "%d/%b/%Y %H:%M:%S", }, }, - 'handlers': { - 'console': { - 'level': 'INFO', - 'class': 'logging.StreamHandler', - 'formatter': 'standard' + "handlers": { + "console": { + "level": "INFO", + "class": "logging.StreamHandler", + "formatter": "standard", }, }, - 'loggers': { - 'django': { - 'handlers': ['console'], - 'level': 'INFO', - 'propagate': True, - }, + "loggers": { + "django": {"handlers": ["console"], "level": "INFO", "propagate": True,}, }, } # database -db_conf = private_conf.get('db', {'name': 'unset'}) +db_conf = private_conf.get("db", {"name": "unset"}) db = { - 'ENGINE': 'django.db.backends.postgresql', - 'NAME': db_conf['name'], + "ENGINE": "django.db.backends.postgresql", + "NAME": db_conf["name"], } -db_user = db_conf.get('user') +db_user = db_conf.get("user") if db_user: - db['USER'] = db_user + db["USER"] = db_user -db_pass = db_conf.get('password') +db_pass = db_conf.get("password") if db_pass: - db['PASSWORD'] = db_pass + db["PASSWORD"] = db_pass -db_host = db_conf.get('host') +db_host = db_conf.get("host") if db_host: - db['HOST'] = db_host + db["HOST"] = db_host -db_port = db_conf.get('port') +db_port = db_conf.get("port") if db_port: - db['PORT'] = db_port + db["PORT"] = db_port # https://docs.djangoproject.com/en/1.10/ref/settings/#databases DATABASES = { - 'default': db, + "default": db, } # Upload user directory # https://docs.djangoproject.com/en/1.11/ref/settings/#std:setting-MEDIA_ROOT -MEDIA_ROOT = private_conf.get('media_root') +MEDIA_ROOT = private_conf.get("media_root") diff --git a/swh/deposit/settings/testing.py b/swh/deposit/settings/testing.py index c35631b3..bc1ee990 100644 --- a/swh/deposit/settings/testing.py +++ b/swh/deposit/settings/testing.py @@ -1,47 +1,42 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from .common import * # noqa from .common import ALLOWED_HOSTS from .development import * # noqa from .development import INSTALLED_APPS # django setup -ALLOWED_HOSTS += ['testserver'] +ALLOWED_HOSTS += ["testserver"] -INSTALLED_APPS += ['pytest_django'] +INSTALLED_APPS += ["pytest_django"] # https://docs.djangoproject.com/en/1.10/ref/settings/#logging LOGGING = { - 'version': 1, - 'disable_existing_loggers': True, - 'formatters': { - 'standard': { - 'format': "[%(asctime)s] %(levelname)s [%(name)s:%(lineno)s] %(message)s", # noqa - 'datefmt': "%d/%b/%Y %H:%M:%S" + "version": 1, + "disable_existing_loggers": True, + "formatters": { + "standard": { + "format": "[%(asctime)s] %(levelname)s [%(name)s:%(lineno)s] %(message)s", # noqa + "datefmt": "%d/%b/%Y %H:%M:%S", }, }, - 'handlers': { - 'console': { - 'level': 'ERROR', - 'class': 'logging.StreamHandler', - 'formatter': 'standard' + "handlers": { + "console": { + "level": "ERROR", + "class": "logging.StreamHandler", + "formatter": "standard", }, }, - 'loggers': { - 'swh.deposit': { - 'handlers': ['console'], - 'level': 'ERROR', - }, - } + "loggers": {"swh.deposit": {"handlers": ["console"], "level": "ERROR",},}, } # https://docs.djangoproject.com/en/1.11/ref/settings/#std:setting-MEDIA_ROOT # SECURITY WARNING: Override this in the production.py module -MEDIA_ROOT = '/tmp/swh-deposit/test/uploads/' +MEDIA_ROOT = "/tmp/swh-deposit/test/uploads/" FILE_UPLOAD_HANDLERS = [ "django.core.files.uploadhandler.MemoryFileUploadHandler", ] diff --git a/swh/deposit/tests/api/conftest.py b/swh/deposit/tests/api/conftest.py index b348b291..1f5f779a 100644 --- a/swh/deposit/tests/api/conftest.py +++ b/swh/deposit/tests/api/conftest.py @@ -1,84 +1,87 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import hashlib import pytest from django.urls import reverse from swh.deposit.config import ( - DEPOSIT_STATUS_DEPOSITED, COL_IRI, DEPOSIT_STATUS_VERIFIED + DEPOSIT_STATUS_DEPOSITED, + COL_IRI, + DEPOSIT_STATUS_VERIFIED, ) from swh.deposit.models import Deposit from swh.deposit.parsers import parse_xml from swh.deposit.api.private.deposit_check import SWHChecksDeposit @pytest.fixture def ready_deposit_ok(partial_deposit_with_metadata): """Returns a deposit ready for checks (it will pass the checks). """ deposit = partial_deposit_with_metadata deposit.status = DEPOSIT_STATUS_DEPOSITED deposit.save() return deposit @pytest.fixture def ready_deposit_verified(partial_deposit_with_metadata): """Returns a deposit ready for checks (it will pass the checks). """ deposit = partial_deposit_with_metadata deposit.status = DEPOSIT_STATUS_VERIFIED deposit.save() return deposit @pytest.fixture def ready_deposit_only_metadata(partial_deposit_only_metadata): """Deposit in status ready that will fail the checks (because missing archive). """ deposit = partial_deposit_only_metadata deposit.status = DEPOSIT_STATUS_DEPOSITED deposit.save() return deposit @pytest.fixture def ready_deposit_invalid_archive(authenticated_client, deposit_collection): url = reverse(COL_IRI, args=[deposit_collection.name]) - data = b'some data which is clearly not a zip file' + data = b"some data which is clearly not a zip file" md5sum = hashlib.md5(data).hexdigest() # when response = authenticated_client.post( url, - content_type='application/zip', # as zip + content_type="application/zip", # as zip data=data, # + headers CONTENT_LENGTH=len(data), # other headers needs HTTP_ prefix to be taken into account - HTTP_SLUG='external-id-invalid', + HTTP_SLUG="external-id-invalid", HTTP_CONTENT_MD5=md5sum, - HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', - HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') + HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", + HTTP_CONTENT_DISPOSITION="attachment; filename=filename0", + ) response_content = parse_xml(response.content) - deposit_id = int(response_content['deposit_id']) + deposit_id = int(response_content["deposit_id"]) deposit = Deposit.objects.get(pk=deposit_id) deposit.status = DEPOSIT_STATUS_DEPOSITED deposit.save() return deposit @pytest.fixture def swh_checks_deposit(): return SWHChecksDeposit() diff --git a/swh/deposit/tests/api/test_converters.py b/swh/deposit/tests/api/test_converters.py index 6802ba1c..fe16776e 100644 --- a/swh/deposit/tests/api/test_converters.py +++ b/swh/deposit/tests/api/test_converters.py @@ -1,123 +1,95 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.deposit.api.converters import convert_status_detail def test_convert_status_detail_empty(): - for status_detail in [{}, {'dummy-keys': []}, None]: + for status_detail in [{}, {"dummy-keys": []}, None]: assert convert_status_detail(status_detail) is None def test_convert_status_detail(): status_detail = { - 'url': { - 'summary': "At least one url field must be compatible with the client\'s domain name. The following url fields failed the check", # noqa - 'fields': ['blahurl', 'testurl'], + "url": { + "summary": "At least one url field must be compatible with the client's domain name. The following url fields failed the check", # noqa + "fields": ["blahurl", "testurl"], }, - 'metadata': [ + "metadata": [ + {"summary": "Mandatory fields missing", "fields": ["url", "title"],}, { - 'summary': 'Mandatory fields missing', - 'fields': ['url', 'title'], + "summary": "Alternate fields missing", + "fields": ["name or title", "url or badurl"], }, - { - 'summary': 'Alternate fields missing', - 'fields': ['name or title', 'url or badurl'] - } ], - 'archive': [{ - 'summary': 'Unreadable archive', - 'fields': ['1'], - }], + "archive": [{"summary": "Unreadable archive", "fields": ["1"],}], } - expected_status_detail = '''- Mandatory fields missing (url, title) + expected_status_detail = """- Mandatory fields missing (url, title) - Alternate fields missing (name or title, url or badurl) - Unreadable archive (1) - At least one url field must be compatible with the client's domain name. The following url fields failed the check (blahurl, testurl) -''' # noqa +""" # noqa actual_status_detail = convert_status_detail(status_detail) assert actual_status_detail == expected_status_detail def test_convert_status_detail_2(): status_detail = { - 'url': { - 'summary': 'At least one compatible url field. Failed', - 'fields': ['testurl'], + "url": { + "summary": "At least one compatible url field. Failed", + "fields": ["testurl"], }, - 'metadata': [ - { - 'summary': 'Mandatory fields missing', - 'fields': ['name'], - }, - ], - 'archive': [ - { - 'summary': 'Invalid archive', - 'fields': ['2'], - }, - { - 'summary': 'Unsupported archive', - 'fields': ['1'], - } + "metadata": [{"summary": "Mandatory fields missing", "fields": ["name"],},], + "archive": [ + {"summary": "Invalid archive", "fields": ["2"],}, + {"summary": "Unsupported archive", "fields": ["1"],}, ], } - expected_status_detail = '''- Mandatory fields missing (name) + expected_status_detail = """- Mandatory fields missing (name) - Invalid archive (2) - Unsupported archive (1) - At least one compatible url field. Failed (testurl) -''' +""" actual_status_detail = convert_status_detail(status_detail) assert actual_status_detail == expected_status_detail def test_convert_status_detail_3(): status_detail = { - 'url': { - 'summary': 'At least one compatible url field', - }, + "url": {"summary": "At least one compatible url field",}, } - expected_status_detail = '- At least one compatible url field\n' + expected_status_detail = "- At least one compatible url field\n" actual_status_detail = convert_status_detail(status_detail) assert actual_status_detail == expected_status_detail def test_convert_status_detail_edge_case(): status_detail = { - 'url': { - 'summary': 'At least one compatible url field. Failed', - 'fields': ['testurl'], + "url": { + "summary": "At least one compatible url field. Failed", + "fields": ["testurl"], }, - 'metadata': [ - { - 'summary': 'Mandatory fields missing', - 'fields': ['9', 10, 1.212], - }, + "metadata": [ + {"summary": "Mandatory fields missing", "fields": ["9", 10, 1.212],}, ], - 'archive': [ - { - 'summary': 'Invalid archive', - 'fields': ['3'], - }, - { - 'summary': 'Unsupported archive', - 'fields': [2], - } + "archive": [ + {"summary": "Invalid archive", "fields": ["3"],}, + {"summary": "Unsupported archive", "fields": [2],}, ], } - expected_status_detail = '''- Mandatory fields missing (9, 10, 1.212) + expected_status_detail = """- Mandatory fields missing (9, 10, 1.212) - Invalid archive (3) - Unsupported archive (2) - At least one compatible url field. Failed (testurl) -''' +""" actual_status_detail = convert_status_detail(status_detail) assert actual_status_detail == expected_status_detail diff --git a/swh/deposit/tests/api/test_deposit.py b/swh/deposit/tests/api/test_deposit.py index dfced699..2e6cce7b 100644 --- a/swh/deposit/tests/api/test_deposit.py +++ b/swh/deposit/tests/api/test_deposit.py @@ -1,189 +1,195 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import hashlib from django.urls import reverse from io import BytesIO from rest_framework import status from swh.deposit.config import ( - COL_IRI, EDIT_SE_IRI, DEPOSIT_STATUS_REJECTED, - DEPOSIT_STATUS_PARTIAL, DEPOSIT_STATUS_LOAD_SUCCESS, - DEPOSIT_STATUS_LOAD_FAILURE + COL_IRI, + EDIT_SE_IRI, + DEPOSIT_STATUS_REJECTED, + DEPOSIT_STATUS_PARTIAL, + DEPOSIT_STATUS_LOAD_SUCCESS, + DEPOSIT_STATUS_LOAD_FAILURE, ) from swh.deposit.models import Deposit from swh.deposit.parsers import parse_xml def test_deposit_post_will_fail_with_401(client): """Without authentication, endpoint refuses access with 401 response """ - url = reverse(COL_IRI, args=['hal']) + url = reverse(COL_IRI, args=["hal"]) response = client.post(url) assert response.status_code == status.HTTP_401_UNAUTHORIZED def test_access_to_another_user_collection_is_forbidden( - authenticated_client, deposit_another_collection, deposit_user): + authenticated_client, deposit_another_collection, deposit_user +): """Access to another user collection should return a 403 """ coll2 = deposit_another_collection url = reverse(COL_IRI, args=[coll2.name]) response = authenticated_client.post(url) assert response.status_code == status.HTTP_403_FORBIDDEN - msg = 'Client %s cannot access collection %s' % ( - deposit_user.username, coll2.name, ) - assert msg in response.content.decode('utf-8') + msg = "Client %s cannot access collection %s" % (deposit_user.username, coll2.name,) + assert msg in response.content.decode("utf-8") -def test_delete_on_col_iri_not_supported( - authenticated_client, deposit_collection): +def test_delete_on_col_iri_not_supported(authenticated_client, deposit_collection): """Delete on col iri should return a 405 response """ url = reverse(COL_IRI, args=[deposit_collection.name]) response = authenticated_client.delete(url) assert response.status_code == status.HTTP_405_METHOD_NOT_ALLOWED - assert 'DELETE method is not supported on this endpoint' in \ - response.content.decode('utf-8') + assert "DELETE method is not supported on this endpoint" in response.content.decode( + "utf-8" + ) -def create_deposit_with_rejection_status( - authenticated_client, deposit_collection): +def create_deposit_with_rejection_status(authenticated_client, deposit_collection): url = reverse(COL_IRI, args=[deposit_collection.name]) - data = b'some data which is clearly not a zip file' + data = b"some data which is clearly not a zip file" md5sum = hashlib.md5(data).hexdigest() - external_id = 'some-external-id-1' + external_id = "some-external-id-1" # when response = authenticated_client.post( url, - content_type='application/zip', # as zip + content_type="application/zip", # as zip data=data, # + headers CONTENT_LENGTH=len(data), # other headers needs HTTP_ prefix to be taken into account HTTP_SLUG=external_id, HTTP_CONTENT_MD5=md5sum, - HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', - HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') + HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", + HTTP_CONTENT_DISPOSITION="attachment; filename=filename0", + ) assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) - actual_state = response_content['deposit_status'] + actual_state = response_content["deposit_status"] assert actual_state == DEPOSIT_STATUS_REJECTED def test_act_on_deposit_rejected_is_not_permitted( - authenticated_client, deposit_collection, rejected_deposit, - atom_dataset): + authenticated_client, deposit_collection, rejected_deposit, atom_dataset +): deposit = rejected_deposit response = authenticated_client.post( reverse(EDIT_SE_IRI, args=[deposit.collection.name, deposit.id]), - content_type='application/atom+xml;type=entry', - data=atom_dataset['entry-data1'], - HTTP_SLUG=deposit.external_id) + content_type="application/atom+xml;type=entry", + data=atom_dataset["entry-data1"], + HTTP_SLUG=deposit.external_id, + ) assert response.status_code == status.HTTP_400_BAD_REQUEST - msg = 'You can only act on deposit with status '%s'' % ( - DEPOSIT_STATUS_PARTIAL, ) - assert msg in response.content.decode('utf-8') + msg = "You can only act on deposit with status '%s'" % ( + DEPOSIT_STATUS_PARTIAL, + ) + assert msg in response.content.decode("utf-8") def test_add_deposit_when_partial_makes_new_deposit( - authenticated_client, deposit_collection, partial_deposit, - atom_dataset): + authenticated_client, deposit_collection, partial_deposit, atom_dataset +): """Posting deposit on collection when previous is partial makes new deposit """ deposit = partial_deposit assert deposit.status == DEPOSIT_STATUS_PARTIAL # adding a new deposit with the same external id response = authenticated_client.post( reverse(COL_IRI, args=[deposit_collection.name]), - content_type='application/atom+xml;type=entry', - data=atom_dataset['entry-data0'] % deposit.external_id, - HTTP_SLUG=deposit.external_id + content_type="application/atom+xml;type=entry", + data=atom_dataset["entry-data0"] % deposit.external_id, + HTTP_SLUG=deposit.external_id, ) assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) - deposit_id = response_content['deposit_id'] + deposit_id = response_content["deposit_id"] assert deposit_id != deposit.id # new deposit new_deposit = Deposit.objects.get(pk=deposit_id) assert new_deposit != deposit assert new_deposit.parent is None def test_add_deposit_when_failed_makes_new_deposit_with_no_parent( - authenticated_client, deposit_collection, failed_deposit, - atom_dataset): + authenticated_client, deposit_collection, failed_deposit, atom_dataset +): """Posting deposit on collection when deposit done makes new deposit with parent """ deposit = failed_deposit assert deposit.status == DEPOSIT_STATUS_LOAD_FAILURE # adding a new deposit with the same external id as a completed deposit # creates the parenting chain response = authenticated_client.post( reverse(COL_IRI, args=[deposit_collection.name]), - content_type='application/atom+xml;type=entry', - data=atom_dataset['entry-data0'] % deposit.external_id, - HTTP_SLUG=deposit.external_id) + content_type="application/atom+xml;type=entry", + data=atom_dataset["entry-data0"] % deposit.external_id, + HTTP_SLUG=deposit.external_id, + ) assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) - deposit_id = response_content['deposit_id'] + deposit_id = response_content["deposit_id"] assert deposit_id != deposit.id new_deposit = Deposit.objects.get(pk=deposit_id) assert new_deposit != deposit assert new_deposit.parent is None def test_add_deposit_when_done_makes_new_deposit_with_parent_old_one( - authenticated_client, deposit_collection, completed_deposit, - atom_dataset): + authenticated_client, deposit_collection, completed_deposit, atom_dataset +): """Posting deposit on collection when deposit done makes new deposit with parent """ # given multiple deposit already loaded deposit = completed_deposit assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS # adding a new deposit with the same external id as a completed deposit # creates the parenting chain response = authenticated_client.post( reverse(COL_IRI, args=[deposit_collection.name]), - content_type='application/atom+xml;type=entry', - data=atom_dataset['entry-data0'] % deposit.external_id, - HTTP_SLUG=deposit.external_id + content_type="application/atom+xml;type=entry", + data=atom_dataset["entry-data0"] % deposit.external_id, + HTTP_SLUG=deposit.external_id, ) assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) - deposit_id = response_content['deposit_id'] + deposit_id = response_content["deposit_id"] assert deposit_id != deposit.id new_deposit = Deposit.objects.get(pk=deposit_id) assert deposit.collection == new_deposit.collection assert deposit.external_id == new_deposit.external_id assert new_deposit != deposit assert new_deposit.parent == deposit diff --git a/swh/deposit/tests/api/test_deposit_atom.py b/swh/deposit/tests/api/test_deposit_atom.py index 7869133c..2de803d7 100644 --- a/swh/deposit/tests/api/test_deposit_atom.py +++ b/swh/deposit/tests/api/test_deposit_atom.py @@ -1,312 +1,326 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import pytest from django.urls import reverse from io import BytesIO from rest_framework import status from swh.deposit.config import COL_IRI, DEPOSIT_STATUS_DEPOSITED from swh.deposit.models import Deposit, DepositRequest, DepositCollection from swh.deposit.parsers import parse_xml def test_post_deposit_atom_201_even_with_decimal( - authenticated_client, deposit_collection, atom_dataset): + authenticated_client, deposit_collection, atom_dataset +): """Posting an initial atom entry should return 201 with deposit receipt """ - atom_error_with_decimal = atom_dataset['error-with-decimal'] + atom_error_with_decimal = atom_dataset["error-with-decimal"] response = authenticated_client.post( reverse(COL_IRI, args=[deposit_collection.name]), - content_type='application/atom+xml;type=entry', + content_type="application/atom+xml;type=entry", data=atom_error_with_decimal, - HTTP_SLUG='external-id', - HTTP_IN_PROGRESS='false') + HTTP_SLUG="external-id", + HTTP_IN_PROGRESS="false", + ) # then assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) - deposit_id = response_content['deposit_id'] + deposit_id = response_content["deposit_id"] deposit = Deposit.objects.get(pk=deposit_id) dr = DepositRequest.objects.get(deposit=deposit) assert dr.metadata is not None - sw_version = dr.metadata.get('codemeta:softwareVersion') - assert sw_version == '10.4' + sw_version = dr.metadata.get("codemeta:softwareVersion") + assert sw_version == "10.4" def test_post_deposit_atom_400_with_empty_body( - authenticated_client, deposit_collection, atom_dataset): + authenticated_client, deposit_collection, atom_dataset +): """Posting empty body request should return a 400 response """ response = authenticated_client.post( reverse(COL_IRI, args=[deposit_collection.name]), - content_type='application/atom+xml;type=entry', - data=atom_dataset['entry-data-empty-body']) + content_type="application/atom+xml;type=entry", + data=atom_dataset["entry-data-empty-body"], + ) assert response.status_code == status.HTTP_400_BAD_REQUEST def test_post_deposit_atom_400_badly_formatted_atom( - authenticated_client, deposit_collection, atom_dataset): + authenticated_client, deposit_collection, atom_dataset +): """Posting a badly formatted atom should return a 400 response """ response = authenticated_client.post( reverse(COL_IRI, args=[deposit_collection.name]), - content_type='application/atom+xml;type=entry', - data=atom_dataset['entry-data-badly-formatted']) + content_type="application/atom+xml;type=entry", + data=atom_dataset["entry-data-badly-formatted"], + ) assert response.status_code == status.HTTP_400_BAD_REQUEST def test_post_deposit_atom_parsing_error( - authenticated_client, deposit_collection, atom_dataset): + authenticated_client, deposit_collection, atom_dataset +): """Posting parsing error prone atom should return 400 """ response = authenticated_client.post( reverse(COL_IRI, args=[deposit_collection.name]), - content_type='application/atom+xml;type=entry', - data=atom_dataset['entry-data-parsing-error-prone']) + content_type="application/atom+xml;type=entry", + data=atom_dataset["entry-data-parsing-error-prone"], + ) assert response.status_code == status.HTTP_400_BAD_REQUEST def test_post_deposit_atom_no_slug_header( - authenticated_client, deposit_collection, atom_dataset): + authenticated_client, deposit_collection, atom_dataset +): """Posting an atom entry without a slug header should return a 400 """ url = reverse(COL_IRI, args=[deposit_collection.name]) # when response = authenticated_client.post( url, - content_type='application/atom+xml;type=entry', - data=atom_dataset['entry-data0'], + content_type="application/atom+xml;type=entry", + data=atom_dataset["entry-data0"], # + headers - HTTP_IN_PROGRESS='false') + HTTP_IN_PROGRESS="false", + ) - assert b'Missing SLUG header' in response.content + assert b"Missing SLUG header" in response.content assert response.status_code == status.HTTP_400_BAD_REQUEST -def test_post_deposit_atom_unknown_collection( - authenticated_client, atom_dataset): +def test_post_deposit_atom_unknown_collection(authenticated_client, atom_dataset): """Posting an atom entry to an unknown collection should return a 404 """ - unknown_collection = 'unknown-one' + unknown_collection = "unknown-one" with pytest.raises(DepositCollection.DoesNotExist): DepositCollection.objects.get(name=unknown_collection) response = authenticated_client.post( reverse(COL_IRI, args=[unknown_collection]), # <- unknown collection - content_type='application/atom+xml;type=entry', - data=atom_dataset['entry-data0'], - HTTP_SLUG='something') + content_type="application/atom+xml;type=entry", + data=atom_dataset["entry-data0"], + HTTP_SLUG="something", + ) assert response.status_code == status.HTTP_404_NOT_FOUND def test_post_deposit_atom_entry_initial( - authenticated_client, deposit_collection, atom_dataset): + authenticated_client, deposit_collection, atom_dataset +): """Posting an initial atom entry should return 201 with deposit receipt """ # given - external_id = 'urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a' + external_id = "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a" with pytest.raises(Deposit.DoesNotExist): Deposit.objects.get(external_id=external_id) - atom_entry_data = atom_dataset['entry-data0'] % external_id + atom_entry_data = atom_dataset["entry-data0"] % external_id # when response = authenticated_client.post( reverse(COL_IRI, args=[deposit_collection.name]), - content_type='application/atom+xml;type=entry', + content_type="application/atom+xml;type=entry", data=atom_entry_data, HTTP_SLUG=external_id, - HTTP_IN_PROGRESS='false') + HTTP_IN_PROGRESS="false", + ) # then assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) - deposit_id = response_content['deposit_id'] + deposit_id = response_content["deposit_id"] deposit = Deposit.objects.get(pk=deposit_id) assert deposit.collection == deposit_collection assert deposit.external_id == external_id assert deposit.status == DEPOSIT_STATUS_DEPOSITED # one associated request to a deposit deposit_request = DepositRequest.objects.get(deposit=deposit) assert deposit_request.metadata is not None assert deposit_request.raw_metadata == atom_entry_data assert bool(deposit_request.archive) is False def test_post_deposit_atom_entry_with_codemeta( - authenticated_client, deposit_collection, atom_dataset): + authenticated_client, deposit_collection, atom_dataset +): """Posting an initial atom entry should return 201 with deposit receipt """ # given - external_id = 'urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a' + external_id = "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a" with pytest.raises(Deposit.DoesNotExist): Deposit.objects.get(external_id=external_id) - atom_entry_data = atom_dataset['codemeta-sample'] % external_id + atom_entry_data = atom_dataset["codemeta-sample"] % external_id # when response = authenticated_client.post( reverse(COL_IRI, args=[deposit_collection.name]), - content_type='application/atom+xml;type=entry', + content_type="application/atom+xml;type=entry", data=atom_entry_data, HTTP_SLUG=external_id, - HTTP_IN_PROGRESS='false') + HTTP_IN_PROGRESS="false", + ) # then assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) - deposit_id = response_content['deposit_id'] + deposit_id = response_content["deposit_id"] deposit = Deposit.objects.get(pk=deposit_id) assert deposit.collection == deposit_collection assert deposit.external_id == external_id assert deposit.status == DEPOSIT_STATUS_DEPOSITED # one associated request to a deposit deposit_request = DepositRequest.objects.get(deposit=deposit) assert deposit_request.metadata is not None assert deposit_request.raw_metadata == atom_entry_data assert bool(deposit_request.archive) is False def test_post_deposit_atom_entry_tei( - authenticated_client, deposit_collection, atom_dataset): + authenticated_client, deposit_collection, atom_dataset +): """Posting initial atom entry as TEI should return 201 with receipt """ # given - external_id = 'urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a' + external_id = "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a" with pytest.raises(Deposit.DoesNotExist): Deposit.objects.get(external_id=external_id) - atom_entry_data = atom_dataset['tei-sample'] + atom_entry_data = atom_dataset["tei-sample"] # when response = authenticated_client.post( reverse(COL_IRI, args=[deposit_collection.name]), - content_type='application/atom+xml;type=entry', + content_type="application/atom+xml;type=entry", data=atom_entry_data, HTTP_SLUG=external_id, - HTTP_IN_PROGRESS='false') + HTTP_IN_PROGRESS="false", + ) # then assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) - deposit_id = response_content['deposit_id'] + deposit_id = response_content["deposit_id"] deposit = Deposit.objects.get(pk=deposit_id) assert deposit.collection == deposit_collection assert deposit.external_id == external_id assert deposit.status == DEPOSIT_STATUS_DEPOSITED # one associated request to a deposit deposit_request = DepositRequest.objects.get(deposit=deposit) assert deposit_request.metadata is not None assert deposit_request.raw_metadata == atom_entry_data assert bool(deposit_request.archive) is False def test_post_deposit_atom_entry_multiple_steps( - authenticated_client, deposit_collection, atom_dataset): + authenticated_client, deposit_collection, atom_dataset +): """After initial deposit, updating a deposit should return a 201 """ # given - external_id = 'urn:uuid:2225c695-cfb8-4ebb-aaaa-80da344efa6a' + external_id = "urn:uuid:2225c695-cfb8-4ebb-aaaa-80da344efa6a" with pytest.raises(Deposit.DoesNotExist): deposit = Deposit.objects.get(external_id=external_id) # when response = authenticated_client.post( reverse(COL_IRI, args=[deposit_collection.name]), - content_type='application/atom+xml;type=entry', - data=atom_dataset['entry-data1'], - HTTP_IN_PROGRESS='True', - HTTP_SLUG=external_id) + content_type="application/atom+xml;type=entry", + data=atom_dataset["entry-data1"], + HTTP_IN_PROGRESS="True", + HTTP_SLUG=external_id, + ) # then assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) - deposit_id = int(response_content['deposit_id']) + deposit_id = int(response_content["deposit_id"]) deposit = Deposit.objects.get(pk=deposit_id) assert deposit.collection == deposit_collection assert deposit.external_id == external_id - assert deposit.status == 'partial' + assert deposit.status == "partial" # one associated request to a deposit deposit_requests = DepositRequest.objects.filter(deposit=deposit) assert len(deposit_requests) == 1 - atom_entry_data = atom_dataset['entry-data-minimal'] % external_id.encode('utf-8') # noqa + atom_entry_data = atom_dataset["entry-data-minimal"] % external_id.encode( + "utf-8" + ) # noqa - update_uri = response._headers['location'][1] + update_uri = response._headers["location"][1] # when updating the first deposit post response = authenticated_client.post( update_uri, - content_type='application/atom+xml;type=entry', + content_type="application/atom+xml;type=entry", data=atom_entry_data, - HTTP_IN_PROGRESS='False') + HTTP_IN_PROGRESS="False", + ) # then assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) - deposit_id = int(response_content['deposit_id']) + deposit_id = int(response_content["deposit_id"]) deposit = Deposit.objects.get(pk=deposit_id) assert deposit.collection == deposit_collection assert deposit.external_id == external_id assert deposit.status == DEPOSIT_STATUS_DEPOSITED assert len(Deposit.objects.all()) == 1 # now 2 associated requests to a same deposit - deposit_requests = DepositRequest.objects.filter( - deposit=deposit).order_by('id') + deposit_requests = DepositRequest.objects.filter(deposit=deposit).order_by("id") assert len(deposit_requests) == 2 - atom_entry_data1 = atom_dataset['entry-data1'] + atom_entry_data1 = atom_dataset["entry-data1"] expected_meta = [ - { - 'metadata': parse_xml(atom_entry_data1), - 'raw_metadata': atom_entry_data1 - }, - { - 'metadata': parse_xml(atom_entry_data), - 'raw_metadata': atom_entry_data - } + {"metadata": parse_xml(atom_entry_data1), "raw_metadata": atom_entry_data1}, + {"metadata": parse_xml(atom_entry_data), "raw_metadata": atom_entry_data}, ] for i, deposit_request in enumerate(deposit_requests): actual_metadata = deposit_request.metadata - assert actual_metadata == expected_meta[i]['metadata'] - assert deposit_request.raw_metadata == expected_meta[i]['raw_metadata'] + assert actual_metadata == expected_meta[i]["metadata"] + assert deposit_request.raw_metadata == expected_meta[i]["raw_metadata"] assert bool(deposit_request.archive) is False diff --git a/swh/deposit/tests/api/test_deposit_binary.py b/swh/deposit/tests/api/test_deposit_binary.py index 5bcca36a..eced5d17 100644 --- a/swh/deposit/tests/api/test_deposit_binary.py +++ b/swh/deposit/tests/api/test_deposit_binary.py @@ -1,543 +1,567 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import pytest from django.core.files.uploadedfile import InMemoryUploadedFile from django.urls import reverse from io import BytesIO from rest_framework import status from swh.deposit.config import ( - COL_IRI, EM_IRI, DEPOSIT_STATUS_DEPOSITED, + COL_IRI, + EM_IRI, + DEPOSIT_STATUS_DEPOSITED, ) from swh.deposit.models import Deposit, DepositRequest from swh.deposit.parsers import parse_xml from swh.deposit.tests.common import create_arborescence_archive, check_archive def test_post_deposit_binary_no_slug( - authenticated_client, deposit_collection, sample_archive): + authenticated_client, deposit_collection, sample_archive +): """Posting a binary deposit without slug header should return 400 """ url = reverse(COL_IRI, args=[deposit_collection.name]) # when response = authenticated_client.post( url, - content_type='application/zip', # as zip - data=sample_archive['data'], + content_type="application/zip", # as zip + data=sample_archive["data"], # + headers - CONTENT_LENGTH=sample_archive['length'], - HTTP_CONTENT_MD5=sample_archive['md5sum'], - HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', - HTTP_IN_PROGRESS='false', - HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') - - assert b'Missing SLUG header' in response.content + CONTENT_LENGTH=sample_archive["length"], + HTTP_CONTENT_MD5=sample_archive["md5sum"], + HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", + HTTP_IN_PROGRESS="false", + HTTP_CONTENT_DISPOSITION="attachment; filename=filename0", + ) + + assert b"Missing SLUG header" in response.content assert response.status_code == status.HTTP_400_BAD_REQUEST def test_post_deposit_binary_support( - authenticated_client, deposit_collection, sample_archive): + authenticated_client, deposit_collection, sample_archive +): """Binary upload with content-type not in [zip,x-tar] should return 415 """ # given url = reverse(COL_IRI, args=[deposit_collection.name]) - external_id = 'some-external-id-1' + external_id = "some-external-id-1" # when response = authenticated_client.post( url, - content_type='application/octet-stream', - data=sample_archive['data'], + content_type="application/octet-stream", + data=sample_archive["data"], # + headers - CONTENT_LENGTH=sample_archive['length'], + CONTENT_LENGTH=sample_archive["length"], HTTP_SLUG=external_id, - HTTP_CONTENT_MD5=sample_archive['md5sum'], - HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', - HTTP_IN_PROGRESS='false', - HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') + HTTP_CONTENT_MD5=sample_archive["md5sum"], + HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", + HTTP_IN_PROGRESS="false", + HTTP_CONTENT_DISPOSITION="attachment; filename=filename0", + ) # then assert response.status_code == status.HTTP_415_UNSUPPORTED_MEDIA_TYPE with pytest.raises(Deposit.DoesNotExist): Deposit.objects.get(external_id=external_id) def test_post_deposit_binary_upload_ok( - authenticated_client, deposit_collection, sample_archive): + authenticated_client, deposit_collection, sample_archive +): """Binary upload with correct headers should return 201 with receipt """ # given url = reverse(COL_IRI, args=[deposit_collection.name]) - external_id = 'some-external-id-1' + external_id = "some-external-id-1" # when response = authenticated_client.post( url, - content_type='application/zip', # as zip - data=sample_archive['data'], + content_type="application/zip", # as zip + data=sample_archive["data"], # + headers - CONTENT_LENGTH=sample_archive['length'], + CONTENT_LENGTH=sample_archive["length"], # other headers needs HTTP_ prefix to be taken into account HTTP_SLUG=external_id, - HTTP_CONTENT_MD5=sample_archive['md5sum'], - HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', - HTTP_IN_PROGRESS='false', - HTTP_CONTENT_DISPOSITION='attachment; filename=%s' % ( - sample_archive['name'], )) + HTTP_CONTENT_MD5=sample_archive["md5sum"], + HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", + HTTP_IN_PROGRESS="false", + HTTP_CONTENT_DISPOSITION="attachment; filename=%s" % (sample_archive["name"],), + ) # then response_content = parse_xml(BytesIO(response.content)) assert response.status_code == status.HTTP_201_CREATED - deposit_id = response_content['deposit_id'] + deposit_id = response_content["deposit_id"] deposit = Deposit.objects.get(pk=deposit_id) assert deposit.status == DEPOSIT_STATUS_DEPOSITED assert deposit.external_id == external_id assert deposit.collection == deposit_collection assert deposit.swh_id is None deposit_request = DepositRequest.objects.get(deposit=deposit) - check_archive(sample_archive['name'], deposit_request.archive.name) + check_archive(sample_archive["name"], deposit_request.archive.name) assert deposit_request.metadata is None assert deposit_request.raw_metadata is None response_content = parse_xml(BytesIO(response.content)) - assert response_content['deposit_archive'] == sample_archive['name'] - assert int(response_content['deposit_id']) == deposit.id - assert response_content['deposit_status'] == deposit.status + assert response_content["deposit_archive"] == sample_archive["name"] + assert int(response_content["deposit_id"]) == deposit.id + assert response_content["deposit_status"] == deposit.status - edit_se_iri = reverse('edit_se_iri', - args=[deposit_collection.name, deposit.id]) + edit_se_iri = reverse("edit_se_iri", args=[deposit_collection.name, deposit.id]) - assert response._headers['location'] == ( - 'Location', 'http://testserver' + edit_se_iri) + assert response._headers["location"] == ( + "Location", + "http://testserver" + edit_se_iri, + ) def test_post_deposit_binary_failure_unsupported_packaging_header( - authenticated_client, deposit_collection, sample_archive): + authenticated_client, deposit_collection, sample_archive +): """Bin deposit without supported content_disposition header returns 400 """ # given url = reverse(COL_IRI, args=[deposit_collection.name]) - external_id = 'some-external-id' + external_id = "some-external-id" # when response = authenticated_client.post( url, - content_type='application/zip', - data=sample_archive['data'], + content_type="application/zip", + data=sample_archive["data"], # + headers - CONTENT_LENGTH=sample_archive['length'], + CONTENT_LENGTH=sample_archive["length"], HTTP_SLUG=external_id, - HTTP_CONTENT_MD5=sample_archive['md5sum'], - HTTP_PACKAGING='something-unsupported', - HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') + HTTP_CONTENT_MD5=sample_archive["md5sum"], + HTTP_PACKAGING="something-unsupported", + HTTP_CONTENT_DISPOSITION="attachment; filename=filename0", + ) # then assert response.status_code == status.HTTP_400_BAD_REQUEST with pytest.raises(Deposit.DoesNotExist): Deposit.objects.get(external_id=external_id) def test_post_deposit_binary_upload_no_content_disposition_header( - authenticated_client, deposit_collection, sample_archive): + authenticated_client, deposit_collection, sample_archive +): """Binary upload without content_disposition header should return 400 """ # given url = reverse(COL_IRI, args=[deposit_collection.name]) - external_id = 'some-external-id' + external_id = "some-external-id" # when response = authenticated_client.post( url, - content_type='application/zip', - data=sample_archive['data'], + content_type="application/zip", + data=sample_archive["data"], # + headers - CONTENT_LENGTH=sample_archive['length'], + CONTENT_LENGTH=sample_archive["length"], HTTP_SLUG=external_id, - HTTP_CONTENT_MD5=sample_archive['md5sum'], - HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', - HTTP_IN_PROGRESS='false') + HTTP_CONTENT_MD5=sample_archive["md5sum"], + HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", + HTTP_IN_PROGRESS="false", + ) # then assert response.status_code == status.HTTP_400_BAD_REQUEST with pytest.raises(Deposit.DoesNotExist): Deposit.objects.get(external_id=external_id) def test_post_deposit_mediation_not_supported( - authenticated_client, deposit_collection, sample_archive): + authenticated_client, deposit_collection, sample_archive +): """Binary upload with mediation should return a 412 response """ # given url = reverse(COL_IRI, args=[deposit_collection.name]) - external_id = 'some-external-id-1' + external_id = "some-external-id-1" # when response = authenticated_client.post( url, - content_type='application/zip', - data=sample_archive['data'], + content_type="application/zip", + data=sample_archive["data"], # + headers - CONTENT_LENGTH=sample_archive['length'], + CONTENT_LENGTH=sample_archive["length"], HTTP_SLUG=external_id, - HTTP_CONTENT_MD5=sample_archive['md5sum'], - HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', - HTTP_IN_PROGRESS='false', - HTTP_ON_BEHALF_OF='someone', - HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') + HTTP_CONTENT_MD5=sample_archive["md5sum"], + HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", + HTTP_IN_PROGRESS="false", + HTTP_ON_BEHALF_OF="someone", + HTTP_CONTENT_DISPOSITION="attachment; filename=filename0", + ) # then assert response.status_code == status.HTTP_412_PRECONDITION_FAILED with pytest.raises(Deposit.DoesNotExist): Deposit.objects.get(external_id=external_id) def test_post_deposit_binary_upload_fail_if_upload_size_limit_exceeded( - authenticated_client, deposit_collection, sample_archive, tmp_path): + authenticated_client, deposit_collection, sample_archive, tmp_path +): """Binary upload must not exceed the limit set up... """ tmp_path = str(tmp_path) url = reverse(COL_IRI, args=[deposit_collection.name]) archive = create_arborescence_archive( - tmp_path, 'archive2', 'file2', b'some content in file', - up_to_size=500) + tmp_path, "archive2", "file2", b"some content in file", up_to_size=500 + ) - external_id = 'some-external-id' + external_id = "some-external-id" # when response = authenticated_client.post( url, - content_type='application/zip', - data=archive['data'], + content_type="application/zip", + data=archive["data"], # + headers - CONTENT_LENGTH=archive['length'], + CONTENT_LENGTH=archive["length"], HTTP_SLUG=external_id, - HTTP_CONTENT_MD5=archive['md5sum'], - HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', - HTTP_IN_PROGRESS='false', - HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') + HTTP_CONTENT_MD5=archive["md5sum"], + HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", + HTTP_IN_PROGRESS="false", + HTTP_CONTENT_DISPOSITION="attachment; filename=filename0", + ) # then assert response.status_code == status.HTTP_413_REQUEST_ENTITY_TOO_LARGE - assert b'Upload size limit exceeded' in response.content + assert b"Upload size limit exceeded" in response.content with pytest.raises(Deposit.DoesNotExist): Deposit.objects.get(external_id=external_id) def test_post_deposit_2_post_2_different_deposits( - authenticated_client, deposit_collection, sample_archive): + authenticated_client, deposit_collection, sample_archive +): """2 posting deposits should return 2 different 201 with receipt """ url = reverse(COL_IRI, args=[deposit_collection.name]) # when response = authenticated_client.post( url, - content_type='application/zip', # as zip - data=sample_archive['data'], + content_type="application/zip", # as zip + data=sample_archive["data"], # + headers - CONTENT_LENGTH=sample_archive['length'], - HTTP_SLUG='some-external-id-1', - HTTP_CONTENT_MD5=sample_archive['md5sum'], - HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', - HTTP_IN_PROGRESS='false', - HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') + CONTENT_LENGTH=sample_archive["length"], + HTTP_SLUG="some-external-id-1", + HTTP_CONTENT_MD5=sample_archive["md5sum"], + HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", + HTTP_IN_PROGRESS="false", + HTTP_CONTENT_DISPOSITION="attachment; filename=filename0", + ) # then assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) - deposit_id = response_content['deposit_id'] + deposit_id = response_content["deposit_id"] deposit = Deposit.objects.get(pk=deposit_id) deposits = Deposit.objects.all() assert len(deposits) == 1 assert deposits[0] == deposit # second post response = authenticated_client.post( url, - content_type='application/x-tar', # as zip - data=sample_archive['data'], + content_type="application/x-tar", # as zip + data=sample_archive["data"], # + headers - CONTENT_LENGTH=sample_archive['length'], - HTTP_SLUG='another-external-id', - HTTP_CONTENT_MD5=sample_archive['md5sum'], - HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', - HTTP_IN_PROGRESS='false', - HTTP_CONTENT_DISPOSITION='attachment; filename=filename1') + CONTENT_LENGTH=sample_archive["length"], + HTTP_SLUG="another-external-id", + HTTP_CONTENT_MD5=sample_archive["md5sum"], + HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", + HTTP_IN_PROGRESS="false", + HTTP_CONTENT_DISPOSITION="attachment; filename=filename1", + ) assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) - deposit_id2 = response_content['deposit_id'] + deposit_id2 = response_content["deposit_id"] deposit2 = Deposit.objects.get(pk=deposit_id2) assert deposit != deposit2 - deposits = Deposit.objects.all().order_by('id') + deposits = Deposit.objects.all().order_by("id") assert len(deposits) == 2 assert list(deposits), [deposit == deposit2] def test_post_deposit_binary_and_post_to_add_another_archive( - authenticated_client, deposit_collection, sample_archive, tmp_path): + authenticated_client, deposit_collection, sample_archive, tmp_path +): """Updating a deposit should return a 201 with receipt """ tmp_path = str(tmp_path) url = reverse(COL_IRI, args=[deposit_collection.name]) - external_id = 'some-external-id-1' + external_id = "some-external-id-1" # when response = authenticated_client.post( url, - content_type='application/zip', # as zip - data=sample_archive['data'], + content_type="application/zip", # as zip + data=sample_archive["data"], # + headers - CONTENT_LENGTH=sample_archive['length'], + CONTENT_LENGTH=sample_archive["length"], HTTP_SLUG=external_id, - HTTP_CONTENT_MD5=sample_archive['md5sum'], - HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', - HTTP_IN_PROGRESS='true', - HTTP_CONTENT_DISPOSITION='attachment; filename=%s' % ( - sample_archive['name'], )) + HTTP_CONTENT_MD5=sample_archive["md5sum"], + HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", + HTTP_IN_PROGRESS="true", + HTTP_CONTENT_DISPOSITION="attachment; filename=%s" % (sample_archive["name"],), + ) # then assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) - deposit_id = response_content['deposit_id'] + deposit_id = response_content["deposit_id"] deposit = Deposit.objects.get(pk=deposit_id) - assert deposit.status == 'partial' + assert deposit.status == "partial" assert deposit.external_id == external_id assert deposit.collection == deposit_collection assert deposit.swh_id is None deposit_request = DepositRequest.objects.get(deposit=deposit) assert deposit_request.deposit == deposit - assert deposit_request.type == 'archive' - check_archive(sample_archive['name'], deposit_request.archive.name) + assert deposit_request.type == "archive" + check_archive(sample_archive["name"], deposit_request.archive.name) # 2nd archive to upload archive2 = create_arborescence_archive( - tmp_path, 'archive2', 'file2', b'some other content in file') + tmp_path, "archive2", "file2", b"some other content in file" + ) # uri to update the content update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit_id]) # adding another archive for the deposit and finalizing it response = authenticated_client.post( update_uri, - content_type='application/zip', # as zip - data=archive2['data'], + content_type="application/zip", # as zip + data=archive2["data"], # + headers - CONTENT_LENGTH=archive2['length'], + CONTENT_LENGTH=archive2["length"], HTTP_SLUG=external_id, - HTTP_CONTENT_MD5=archive2['md5sum'], - HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', - HTTP_CONTENT_DISPOSITION='attachment; filename=%s' % ( - archive2['name'])) + HTTP_CONTENT_MD5=archive2["md5sum"], + HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", + HTTP_CONTENT_DISPOSITION="attachment; filename=%s" % (archive2["name"]), + ) assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) deposit = Deposit.objects.get(pk=deposit_id) assert deposit.status == DEPOSIT_STATUS_DEPOSITED assert deposit.external_id == external_id assert deposit.collection == deposit_collection assert deposit.swh_id is None - deposit_requests = list(DepositRequest.objects.filter(deposit=deposit). - order_by('id')) + deposit_requests = list( + DepositRequest.objects.filter(deposit=deposit).order_by("id") + ) # 2 deposit requests for the same deposit assert len(deposit_requests) == 2 assert deposit_requests[0].deposit == deposit - assert deposit_requests[0].type == 'archive' - check_archive(sample_archive['name'], deposit_requests[0].archive.name) + assert deposit_requests[0].type == "archive" + check_archive(sample_archive["name"], deposit_requests[0].archive.name) assert deposit_requests[1].deposit == deposit - assert deposit_requests[1].type == 'archive' - check_archive(archive2['name'], deposit_requests[1].archive.name) + assert deposit_requests[1].type == "archive" + check_archive(archive2["name"], deposit_requests[1].archive.name) # only 1 deposit in db deposits = Deposit.objects.all() assert len(deposits) == 1 def test_post_deposit_then_update_refused( - authenticated_client, deposit_collection, - sample_archive, atom_dataset, tmp_path): + authenticated_client, deposit_collection, sample_archive, atom_dataset, tmp_path +): """Updating a deposit with status 'ready' should return a 400 """ tmp_path = str(tmp_path) url = reverse(COL_IRI, args=[deposit_collection.name]) - external_id = 'some-external-id-1' + external_id = "some-external-id-1" # when response = authenticated_client.post( url, - content_type='application/zip', # as zip - data=sample_archive['data'], + content_type="application/zip", # as zip + data=sample_archive["data"], # + headers - CONTENT_LENGTH=sample_archive['length'], + CONTENT_LENGTH=sample_archive["length"], HTTP_SLUG=external_id, - HTTP_CONTENT_MD5=sample_archive['md5sum'], - HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', - HTTP_IN_PROGRESS='false', - HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') + HTTP_CONTENT_MD5=sample_archive["md5sum"], + HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", + HTTP_IN_PROGRESS="false", + HTTP_CONTENT_DISPOSITION="attachment; filename=filename0", + ) # then assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) - deposit_id = response_content['deposit_id'] + deposit_id = response_content["deposit_id"] deposit = Deposit.objects.get(pk=deposit_id) assert deposit.status == DEPOSIT_STATUS_DEPOSITED assert deposit.external_id == external_id assert deposit.collection == deposit_collection assert deposit.swh_id is None deposit_request = DepositRequest.objects.get(deposit=deposit) assert deposit_request.deposit == deposit - check_archive('filename0', deposit_request.archive.name) + check_archive("filename0", deposit_request.archive.name) # updating/adding is forbidden # uri to update the content - edit_se_iri = reverse( - 'edit_se_iri', args=[deposit_collection.name, deposit_id]) - em_iri = reverse( - 'em_iri', args=[deposit_collection.name, deposit_id]) + edit_se_iri = reverse("edit_se_iri", args=[deposit_collection.name, deposit_id]) + em_iri = reverse("em_iri", args=[deposit_collection.name, deposit_id]) # Testing all update/add endpoint should fail # since the status is ready archive2 = create_arborescence_archive( - tmp_path, 'archive2', 'file2', b'some content in file 2') + tmp_path, "archive2", "file2", b"some content in file 2" + ) # replacing file is no longer possible since the deposit's # status is ready r = authenticated_client.put( em_iri, - content_type='application/zip', - data=archive2['data'], - CONTENT_LENGTH=archive2['length'], + content_type="application/zip", + data=archive2["data"], + CONTENT_LENGTH=archive2["length"], HTTP_SLUG=external_id, - HTTP_CONTENT_MD5=archive2['md5sum'], - HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', - HTTP_IN_PROGRESS='false', - HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') + HTTP_CONTENT_MD5=archive2["md5sum"], + HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", + HTTP_IN_PROGRESS="false", + HTTP_CONTENT_DISPOSITION="attachment; filename=filename0", + ) assert r.status_code == status.HTTP_400_BAD_REQUEST # adding file is no longer possible since the deposit's status # is ready r = authenticated_client.post( em_iri, - content_type='application/zip', - data=archive2['data'], - CONTENT_LENGTH=archive2['length'], + content_type="application/zip", + data=archive2["data"], + CONTENT_LENGTH=archive2["length"], HTTP_SLUG=external_id, - HTTP_CONTENT_MD5=archive2['md5sum'], - HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', - HTTP_IN_PROGRESS='false', - HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') + HTTP_CONTENT_MD5=archive2["md5sum"], + HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", + HTTP_IN_PROGRESS="false", + HTTP_CONTENT_DISPOSITION="attachment; filename=filename0", + ) assert r.status_code == status.HTTP_400_BAD_REQUEST # replacing metadata is no longer possible since the deposit's # status is ready r = authenticated_client.put( edit_se_iri, - content_type='application/atom+xml;type=entry', - data=atom_dataset['entry-data-deposit-binary'], - CONTENT_LENGTH=len(atom_dataset['entry-data-deposit-binary']), - HTTP_SLUG=external_id) + content_type="application/atom+xml;type=entry", + data=atom_dataset["entry-data-deposit-binary"], + CONTENT_LENGTH=len(atom_dataset["entry-data-deposit-binary"]), + HTTP_SLUG=external_id, + ) assert r.status_code == status.HTTP_400_BAD_REQUEST # adding new metadata is no longer possible since the # deposit's status is ready r = authenticated_client.post( edit_se_iri, - content_type='application/atom+xml;type=entry', - data=atom_dataset['entry-data-deposit-binary'], - CONTENT_LENGTH=len(atom_dataset['entry-data-deposit-binary']), - HTTP_SLUG=external_id) + content_type="application/atom+xml;type=entry", + data=atom_dataset["entry-data-deposit-binary"], + CONTENT_LENGTH=len(atom_dataset["entry-data-deposit-binary"]), + HTTP_SLUG=external_id, + ) assert r.status_code == status.HTTP_400_BAD_REQUEST - archive_content = b'some content representing archive' + archive_content = b"some content representing archive" archive = InMemoryUploadedFile( BytesIO(archive_content), - field_name='archive0', - name='archive0', - content_type='application/zip', + field_name="archive0", + name="archive0", + content_type="application/zip", size=len(archive_content), - charset=None) + charset=None, + ) atom_entry = InMemoryUploadedFile( - BytesIO(atom_dataset['entry-data-deposit-binary'].encode('utf-8')), - field_name='atom0', - name='atom0', + BytesIO(atom_dataset["entry-data-deposit-binary"].encode("utf-8")), + field_name="atom0", + name="atom0", content_type='application/atom+xml; charset="utf-8"', - size=len(atom_dataset['entry-data-deposit-binary']), - charset='utf-8') + size=len(atom_dataset["entry-data-deposit-binary"]), + charset="utf-8", + ) # replacing multipart metadata is no longer possible since the # deposit's status is ready r = authenticated_client.put( edit_se_iri, - format='multipart', - data={ - 'archive': archive, - 'atom_entry': atom_entry, - }) + format="multipart", + data={"archive": archive, "atom_entry": atom_entry,}, + ) assert r.status_code == status.HTTP_400_BAD_REQUEST # adding new metadata is no longer possible since the # deposit's status is ready r = authenticated_client.post( edit_se_iri, - format='multipart', - data={ - 'archive': archive, - 'atom_entry': atom_entry, - }) + format="multipart", + data={"archive": archive, "atom_entry": atom_entry,}, + ) assert r.status_code == status.HTTP_400_BAD_REQUEST diff --git a/swh/deposit/tests/api/test_deposit_delete.py b/swh/deposit/tests/api/test_deposit_delete.py index 27cec1b3..496af061 100644 --- a/swh/deposit/tests/api/test_deposit_delete.py +++ b/swh/deposit/tests/api/test_deposit_delete.py @@ -1,121 +1,123 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from collections import defaultdict from django.urls import reverse from rest_framework import status from typing import Dict, Mapping from swh.deposit.config import ( - EDIT_SE_IRI, EM_IRI, ARCHIVE_KEY, METADATA_KEY, - DEPOSIT_STATUS_DEPOSITED + EDIT_SE_IRI, + EM_IRI, + ARCHIVE_KEY, + METADATA_KEY, + DEPOSIT_STATUS_DEPOSITED, ) from swh.deposit.models import Deposit, DepositRequest def count_deposit_request_types(deposit_requests) -> Mapping[str, int]: deposit_request_types = defaultdict(int) # type: Dict[str, int] for dr in deposit_requests: deposit_request_types[dr.type] += 1 return deposit_request_types def test_delete_archive_on_partial_deposit_works( - authenticated_client, partial_deposit_with_metadata, - deposit_collection): + authenticated_client, partial_deposit_with_metadata, deposit_collection +): """Removing partial deposit's archive should return a 204 response """ deposit_id = partial_deposit_with_metadata.id deposit = Deposit.objects.get(pk=deposit_id) deposit_requests = DepositRequest.objects.filter(deposit=deposit) # deposit request type: 'archive', 1 'metadata' deposit_request_types = count_deposit_request_types(deposit_requests) - assert deposit_request_types == { - ARCHIVE_KEY: 1, - METADATA_KEY: 1 - } + assert deposit_request_types == {ARCHIVE_KEY: 1, METADATA_KEY: 1} # when update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit_id]) response = authenticated_client.delete(update_uri) # then assert response.status_code == status.HTTP_204_NO_CONTENT deposit = Deposit.objects.get(pk=deposit_id) deposit_requests2 = DepositRequest.objects.filter(deposit=deposit) deposit_request_types = count_deposit_request_types(deposit_requests2) - assert deposit_request_types == { - METADATA_KEY: 1 - } + assert deposit_request_types == {METADATA_KEY: 1} def test_delete_archive_on_undefined_deposit_fails( - authenticated_client, deposit_collection, sample_archive): + authenticated_client, deposit_collection, sample_archive +): """Delete undefined deposit returns a 404 response """ # when update_uri = reverse(EM_IRI, args=[deposit_collection.name, 999]) response = authenticated_client.delete(update_uri) # then assert response.status_code == status.HTTP_404_NOT_FOUND def test_delete_non_partial_deposit( - authenticated_client, deposit_collection, deposited_deposit): + authenticated_client, deposit_collection, deposited_deposit +): """Delete !partial status deposit should return a 400 response """ deposit = deposited_deposit assert deposit.status == DEPOSIT_STATUS_DEPOSITED # when update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit.id]) response = authenticated_client.delete(update_uri) # then assert response.status_code == status.HTTP_400_BAD_REQUEST deposit = Deposit.objects.get(pk=deposit.id) assert deposit is not None def test_delete_partial_deposit( - authenticated_client, deposit_collection, partial_deposit): + authenticated_client, deposit_collection, partial_deposit +): """Delete deposit should return a 204 response """ # given deposit = partial_deposit # when url = reverse(EDIT_SE_IRI, args=[deposit_collection.name, deposit.id]) response = authenticated_client.delete(url) # then assert response.status_code == status.HTTP_204_NO_CONTENT deposit_requests = list(DepositRequest.objects.filter(deposit=deposit)) assert deposit_requests == [] deposits = list(Deposit.objects.filter(pk=deposit.id)) assert deposits == [] def test_delete_on_edit_se_iri_cannot_delete_non_partial_deposit( - authenticated_client, deposit_collection, complete_deposit): + authenticated_client, deposit_collection, complete_deposit +): """Delete !partial deposit should return a 400 response """ # given deposit = complete_deposit # when url = reverse(EDIT_SE_IRI, args=[deposit_collection.name, deposit.id]) response = authenticated_client.delete(url) # then assert response.status_code == status.HTTP_400_BAD_REQUEST deposit = Deposit.objects.get(pk=deposit.id) assert deposit is not None diff --git a/swh/deposit/tests/api/test_deposit_list.py b/swh/deposit/tests/api/test_deposit_list.py index bcdfe9b9..1d1ffb60 100644 --- a/swh/deposit/tests/api/test_deposit_list.py +++ b/swh/deposit/tests/api/test_deposit_list.py @@ -1,83 +1,75 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.urls import reverse from rest_framework import status from swh.deposit.api.converters import convert_status_detail from swh.deposit.config import ( - DEPOSIT_STATUS_PARTIAL, PRIVATE_LIST_DEPOSITS, DEPOSIT_STATUS_DEPOSITED + DEPOSIT_STATUS_PARTIAL, + PRIVATE_LIST_DEPOSITS, + DEPOSIT_STATUS_DEPOSITED, ) -def test_deposit_list( - partial_deposit, deposited_deposit, authenticated_client): +def test_deposit_list(partial_deposit, deposited_deposit, authenticated_client): """Deposit list api should return the deposits """ status_detail = { - 'url': { - 'summary': 'At least one compatible url field. Failed', - 'fields': ['testurl'], + "url": { + "summary": "At least one compatible url field. Failed", + "fields": ["testurl"], }, - 'metadata': [ - { - 'summary': 'Mandatory fields missing', - 'fields': ['9', 10, 1.212], - }, + "metadata": [ + {"summary": "Mandatory fields missing", "fields": ["9", 10, 1.212],}, ], - 'archive': [ - { - 'summary': 'Invalid archive', - 'fields': ['3'], - }, - { - 'summary': 'Unsupported archive', - 'fields': [2], - } + "archive": [ + {"summary": "Invalid archive", "fields": ["3"],}, + {"summary": "Unsupported archive", "fields": [2],}, ], } partial_deposit.status_detail = status_detail partial_deposit.save() deposit_id = partial_deposit.id deposit_id2 = deposited_deposit.id # NOTE: does not work as documented # https://docs.djangoproject.com/en/1.11/ref/urlresolvers/#django.core.urlresolvers.reverse # noqa # url = reverse(PRIVATE_LIST_DEPOSITS, kwargs={'page_size': 1}) main_url = reverse(PRIVATE_LIST_DEPOSITS) - url = '%s?page_size=1' % main_url + url = "%s?page_size=1" % main_url response = authenticated_client.get(url) assert response.status_code == status.HTTP_200_OK data = response.json() - assert data['count'] == 2 # 2 deposits - expected_next = '%s?page=2&page_size=1' % main_url - assert data['next'].endswith(expected_next) is True - assert data['previous'] is None - assert len(data['results']) == 1 # page of size 1 - deposit = data['results'][0] - assert deposit['id'] == deposit_id - assert deposit['status'] == DEPOSIT_STATUS_PARTIAL + assert data["count"] == 2 # 2 deposits + expected_next = "%s?page=2&page_size=1" % main_url + assert data["next"].endswith(expected_next) is True + assert data["previous"] is None + assert len(data["results"]) == 1 # page of size 1 + deposit = data["results"][0] + assert deposit["id"] == deposit_id + assert deposit["status"] == DEPOSIT_STATUS_PARTIAL expected_status_detail = convert_status_detail(status_detail) - assert deposit['status_detail'] == expected_status_detail + assert deposit["status_detail"] == expected_status_detail # then 2nd page response2 = authenticated_client.get(expected_next) assert response2.status_code == status.HTTP_200_OK data2 = response2.json() - assert data2['count'] == 2 # still 2 deposits - assert data2['next'] is None + assert data2["count"] == 2 # still 2 deposits + assert data2["next"] is None - expected_previous = '%s?page_size=1' % main_url - assert data2['previous'].endswith(expected_previous) is True - assert len(data2['results']) == 1 # page of size 1 + expected_previous = "%s?page_size=1" % main_url + assert data2["previous"].endswith(expected_previous) is True + assert len(data2["results"]) == 1 # page of size 1 - deposit2 = data2['results'][0] - assert deposit2['id'] == deposit_id2 - assert deposit2['status'] == DEPOSIT_STATUS_DEPOSITED + deposit2 = data2["results"][0] + assert deposit2["id"] == deposit_id2 + assert deposit2["status"] == DEPOSIT_STATUS_DEPOSITED diff --git a/swh/deposit/tests/api/test_deposit_multipart.py b/swh/deposit/tests/api/test_deposit_multipart.py index ac73597c..bb4f42d7 100644 --- a/swh/deposit/tests/api/test_deposit_multipart.py +++ b/swh/deposit/tests/api/test_deposit_multipart.py @@ -1,391 +1,400 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.core.files.uploadedfile import InMemoryUploadedFile from django.urls import reverse from io import BytesIO from rest_framework import status -from swh.deposit.config import ( - COL_IRI, DEPOSIT_STATUS_DEPOSITED -) +from swh.deposit.config import COL_IRI, DEPOSIT_STATUS_DEPOSITED from swh.deposit.models import Deposit, DepositRequest from swh.deposit.parsers import parse_xml from swh.deposit.tests.common import check_archive def test_post_deposit_multipart_without_slug_header_is_bad_request( - authenticated_client, deposit_collection, atom_dataset): + authenticated_client, deposit_collection, atom_dataset +): # given url = reverse(COL_IRI, args=[deposit_collection.name]) - archive_content = b'some content representing archive' + archive_content = b"some content representing archive" archive = InMemoryUploadedFile( BytesIO(archive_content), - field_name='archive0', - name='archive0', - content_type='application/zip', + field_name="archive0", + name="archive0", + content_type="application/zip", size=len(archive_content), - charset=None) + charset=None, + ) - data_atom_entry = atom_dataset['entry-data-deposit-binary'] + data_atom_entry = atom_dataset["entry-data-deposit-binary"] atom_entry = InMemoryUploadedFile( - BytesIO(data_atom_entry.encode('utf-8')), - field_name='atom0', - name='atom0', + BytesIO(data_atom_entry.encode("utf-8")), + field_name="atom0", + name="atom0", content_type='application/atom+xml; charset="utf-8"', size=len(data_atom_entry), - charset='utf-8') + charset="utf-8", + ) # when response = authenticated_client.post( url, - format='multipart', - data={ - 'archive': archive, - 'atom_entry': atom_entry, - }, + format="multipart", + data={"archive": archive, "atom_entry": atom_entry,}, # + headers - HTTP_IN_PROGRESS='false') + HTTP_IN_PROGRESS="false", + ) - assert b'Missing SLUG header' in response.content + assert b"Missing SLUG header" in response.content assert response.status_code == status.HTTP_400_BAD_REQUEST def test_post_deposit_multipart_zip( - authenticated_client, deposit_collection, - atom_dataset, sample_archive): + authenticated_client, deposit_collection, atom_dataset, sample_archive +): """one multipart deposit (zip+xml) should be accepted """ # given url = reverse(COL_IRI, args=[deposit_collection.name]) archive = InMemoryUploadedFile( - BytesIO(sample_archive['data']), - field_name=sample_archive['name'], - name=sample_archive['name'], - content_type='application/zip', - size=sample_archive['length'], - charset=None) - - data_atom_entry = atom_dataset['entry-data-deposit-binary'] + BytesIO(sample_archive["data"]), + field_name=sample_archive["name"], + name=sample_archive["name"], + content_type="application/zip", + size=sample_archive["length"], + charset=None, + ) + + data_atom_entry = atom_dataset["entry-data-deposit-binary"] atom_entry = InMemoryUploadedFile( - BytesIO(data_atom_entry.encode('utf-8')), - field_name='atom0', - name='atom0', + BytesIO(data_atom_entry.encode("utf-8")), + field_name="atom0", + name="atom0", content_type='application/atom+xml; charset="utf-8"', size=len(data_atom_entry), - charset='utf-8') + charset="utf-8", + ) - external_id = 'external-id' + external_id = "external-id" # when response = authenticated_client.post( url, - format='multipart', - data={ - 'archive': archive, - 'atom_entry': atom_entry, - }, + format="multipart", + data={"archive": archive, "atom_entry": atom_entry,}, # + headers - HTTP_IN_PROGRESS='false', - HTTP_SLUG=external_id) + HTTP_IN_PROGRESS="false", + HTTP_SLUG=external_id, + ) # then assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) - deposit_id = response_content['deposit_id'] + deposit_id = response_content["deposit_id"] deposit = Deposit.objects.get(pk=deposit_id) assert deposit.status == DEPOSIT_STATUS_DEPOSITED assert deposit.external_id == external_id assert deposit.collection == deposit_collection assert deposit.swh_id is None deposit_requests = DepositRequest.objects.filter(deposit=deposit) assert len(deposit_requests) == 2 for deposit_request in deposit_requests: assert deposit_request.deposit == deposit - if deposit_request.type == 'archive': - check_archive(sample_archive['name'], deposit_request.archive.name) + if deposit_request.type == "archive": + check_archive(sample_archive["name"], deposit_request.archive.name) assert deposit_request.metadata is None assert deposit_request.raw_metadata is None else: - assert deposit_request.metadata['id'] == \ - 'urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a' - assert deposit_request.raw_metadata == \ - data_atom_entry + assert ( + deposit_request.metadata["id"] + == "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a" + ) + assert deposit_request.raw_metadata == data_atom_entry def test_post_deposit_multipart_tar( - authenticated_client, deposit_collection, - atom_dataset, sample_archive): + authenticated_client, deposit_collection, atom_dataset, sample_archive +): """one multipart deposit (tar+xml) should be accepted """ # given url = reverse(COL_IRI, args=[deposit_collection.name]) # from django.core.files import uploadedfile - data_atom_entry = atom_dataset['entry-data-deposit-binary'] + data_atom_entry = atom_dataset["entry-data-deposit-binary"] archive = InMemoryUploadedFile( - BytesIO(sample_archive['data']), - field_name=sample_archive['name'], - name=sample_archive['name'], - content_type='application/x-tar', - size=sample_archive['length'], - charset=None) + BytesIO(sample_archive["data"]), + field_name=sample_archive["name"], + name=sample_archive["name"], + content_type="application/x-tar", + size=sample_archive["length"], + charset=None, + ) atom_entry = InMemoryUploadedFile( - BytesIO(data_atom_entry.encode('utf-8')), - field_name='atom0', - name='atom0', + BytesIO(data_atom_entry.encode("utf-8")), + field_name="atom0", + name="atom0", content_type='application/atom+xml; charset="utf-8"', size=len(data_atom_entry), - charset='utf-8') + charset="utf-8", + ) - external_id = 'external-id' + external_id = "external-id" # when response = authenticated_client.post( url, - format='multipart', - data={ - 'archive': archive, - 'atom_entry': atom_entry, - }, + format="multipart", + data={"archive": archive, "atom_entry": atom_entry,}, # + headers - HTTP_IN_PROGRESS='false', - HTTP_SLUG=external_id) + HTTP_IN_PROGRESS="false", + HTTP_SLUG=external_id, + ) # then assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) - deposit_id = response_content['deposit_id'] + deposit_id = response_content["deposit_id"] deposit = Deposit.objects.get(pk=deposit_id) assert deposit.status == DEPOSIT_STATUS_DEPOSITED assert deposit.external_id == external_id assert deposit.collection == deposit_collection assert deposit.swh_id is None deposit_requests = DepositRequest.objects.filter(deposit=deposit) assert len(deposit_requests) == 2 for deposit_request in deposit_requests: assert deposit_request.deposit == deposit - if deposit_request.type == 'archive': - check_archive(sample_archive['name'], deposit_request.archive.name) + if deposit_request.type == "archive": + check_archive(sample_archive["name"], deposit_request.archive.name) assert deposit_request.metadata is None assert deposit_request.raw_metadata is None else: - assert deposit_request.metadata['id'] == \ - 'urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a' - assert deposit_request.raw_metadata == \ - data_atom_entry + assert ( + deposit_request.metadata["id"] + == "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a" + ) + assert deposit_request.raw_metadata == data_atom_entry def test_post_deposit_multipart_put_to_replace_metadata( - authenticated_client, deposit_collection, - atom_dataset, sample_archive): + authenticated_client, deposit_collection, atom_dataset, sample_archive +): """One multipart deposit followed by a metadata update should be accepted """ # given url = reverse(COL_IRI, args=[deposit_collection.name]) - data_atom_entry = atom_dataset['entry-data-deposit-binary'] + data_atom_entry = atom_dataset["entry-data-deposit-binary"] archive = InMemoryUploadedFile( - BytesIO(sample_archive['data']), - field_name=sample_archive['name'], - name=sample_archive['name'], - content_type='application/zip', - size=sample_archive['length'], - charset=None) + BytesIO(sample_archive["data"]), + field_name=sample_archive["name"], + name=sample_archive["name"], + content_type="application/zip", + size=sample_archive["length"], + charset=None, + ) atom_entry = InMemoryUploadedFile( - BytesIO(data_atom_entry.encode('utf-8')), - field_name='atom0', - name='atom0', + BytesIO(data_atom_entry.encode("utf-8")), + field_name="atom0", + name="atom0", content_type='application/atom+xml; charset="utf-8"', size=len(data_atom_entry), - charset='utf-8') + charset="utf-8", + ) - external_id = 'external-id' + external_id = "external-id" # when response = authenticated_client.post( url, - format='multipart', - data={ - 'archive': archive, - 'atom_entry': atom_entry, - }, + format="multipart", + data={"archive": archive, "atom_entry": atom_entry,}, # + headers - HTTP_IN_PROGRESS='true', - HTTP_SLUG=external_id) + HTTP_IN_PROGRESS="true", + HTTP_SLUG=external_id, + ) # then assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) - deposit_id = response_content['deposit_id'] + deposit_id = response_content["deposit_id"] deposit = Deposit.objects.get(pk=deposit_id) - assert deposit.status == 'partial' + assert deposit.status == "partial" assert deposit.external_id == external_id assert deposit.collection == deposit_collection assert deposit.swh_id is None deposit_requests = DepositRequest.objects.filter(deposit=deposit) assert len(deposit_requests) == 2 for deposit_request in deposit_requests: assert deposit_request.deposit == deposit - if deposit_request.type == 'archive': - check_archive(sample_archive['name'], deposit_request.archive.name) + if deposit_request.type == "archive": + check_archive(sample_archive["name"], deposit_request.archive.name) else: - assert deposit_request.metadata['id'] == \ - 'urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a' - assert deposit_request.raw_metadata == \ - data_atom_entry + assert ( + deposit_request.metadata["id"] + == "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a" + ) + assert deposit_request.raw_metadata == data_atom_entry - replace_metadata_uri = response._headers['location'][1] + replace_metadata_uri = response._headers["location"][1] response = authenticated_client.put( replace_metadata_uri, - content_type='application/atom+xml;type=entry', - data=atom_dataset['entry-data-deposit-binary'], - HTTP_IN_PROGRESS='false') + content_type="application/atom+xml;type=entry", + data=atom_dataset["entry-data-deposit-binary"], + HTTP_IN_PROGRESS="false", + ) assert response.status_code == status.HTTP_204_NO_CONTENT # deposit_id did not change deposit = Deposit.objects.get(pk=deposit_id) assert deposit.status == DEPOSIT_STATUS_DEPOSITED assert deposit.external_id == external_id assert deposit.collection == deposit_collection assert deposit.swh_id is None deposit_requests = DepositRequest.objects.filter(deposit=deposit) assert len(deposit_requests) == 2 for deposit_request in deposit_requests: assert deposit_request.deposit == deposit - if deposit_request.type == 'archive': - check_archive(sample_archive['name'], deposit_request.archive.name) + if deposit_request.type == "archive": + check_archive(sample_archive["name"], deposit_request.archive.name) else: - assert deposit_request.metadata['id'] == \ - 'urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a' - assert deposit_request.raw_metadata == \ - atom_dataset['entry-data-deposit-binary'] + assert ( + deposit_request.metadata["id"] + == "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a" + ) + assert ( + deposit_request.raw_metadata + == atom_dataset["entry-data-deposit-binary"] + ) + # FAILURE scenarios def test_post_deposit_multipart_only_archive_and_atom_entry( - authenticated_client, deposit_collection): + authenticated_client, deposit_collection +): """Multipart deposit only accepts one archive and one atom+xml""" # given url = reverse(COL_IRI, args=[deposit_collection.name]) - archive_content = b'some content representing archive' + archive_content = b"some content representing archive" archive = InMemoryUploadedFile( BytesIO(archive_content), - field_name='archive0', - name='archive0', - content_type='application/x-tar', + field_name="archive0", + name="archive0", + content_type="application/x-tar", size=len(archive_content), - charset=None) + charset=None, + ) other_archive_content = b"some-other-content" other_archive = InMemoryUploadedFile( BytesIO(other_archive_content), - field_name='atom0', - name='atom0', - content_type='application/x-tar', + field_name="atom0", + name="atom0", + content_type="application/x-tar", size=len(other_archive_content), - charset='utf-8') + charset="utf-8", + ) # when response = authenticated_client.post( url, - format='multipart', - data={ - 'archive': archive, - 'atom_entry': other_archive, - }, + format="multipart", + data={"archive": archive, "atom_entry": other_archive,}, # + headers - HTTP_IN_PROGRESS='false', - HTTP_SLUG='external-id') + HTTP_IN_PROGRESS="false", + HTTP_SLUG="external-id", + ) # then assert response.status_code == status.HTTP_415_UNSUPPORTED_MEDIA_TYPE - assert 'Only 1 application/zip (or application/x-tar) archive' in \ - response.content.decode('utf-8') + assert ( + "Only 1 application/zip (or application/x-tar) archive" + in response.content.decode("utf-8") + ) # when archive.seek(0) response = authenticated_client.post( url, - format='multipart', - data={ - 'archive': archive, - }, + format="multipart", + data={"archive": archive,}, # + headers - HTTP_IN_PROGRESS='false', - HTTP_SLUG='external-id') + HTTP_IN_PROGRESS="false", + HTTP_SLUG="external-id", + ) # then assert response.status_code == status.HTTP_415_UNSUPPORTED_MEDIA_TYPE assert ( - 'You must provide both 1 application/zip (or ' - 'application/x-tar) and 1 atom+xml entry for ' - 'multipart deposit' in response.content.decode('utf-8') + "You must provide both 1 application/zip (or " + "application/x-tar) and 1 atom+xml entry for " + "multipart deposit" in response.content.decode("utf-8") ) is True def test_post_deposit_multipart_400_when_badly_formatted_xml( - authenticated_client, deposit_collection, - sample_archive, atom_dataset): + authenticated_client, deposit_collection, sample_archive, atom_dataset +): # given url = reverse(COL_IRI, args=[deposit_collection.name]) - archive_content = sample_archive['data'] + archive_content = sample_archive["data"] archive = InMemoryUploadedFile( BytesIO(archive_content), - field_name=sample_archive['name'], - name=sample_archive['name'], - content_type='application/zip', + field_name=sample_archive["name"], + name=sample_archive["name"], + content_type="application/zip", size=len(archive_content), - charset=None) + charset=None, + ) - data_atom_entry_ko = atom_dataset['entry-data-ko'] + data_atom_entry_ko = atom_dataset["entry-data-ko"] atom_entry = InMemoryUploadedFile( - BytesIO(data_atom_entry_ko.encode('utf-8')), - field_name='atom0', - name='atom0', + BytesIO(data_atom_entry_ko.encode("utf-8")), + field_name="atom0", + name="atom0", content_type='application/atom+xml; charset="utf-8"', size=len(data_atom_entry_ko), - charset='utf-8') + charset="utf-8", + ) # when response = authenticated_client.post( url, - format='multipart', - data={ - 'archive': archive, - 'atom_entry': atom_entry, - }, + format="multipart", + data={"archive": archive, "atom_entry": atom_entry,}, # + headers - HTTP_IN_PROGRESS='false', - HTTP_SLUG='external-id', + HTTP_IN_PROGRESS="false", + HTTP_SLUG="external-id", ) - assert b'Malformed xml metadata' in response.content + assert b"Malformed xml metadata" in response.content assert response.status_code == status.HTTP_400_BAD_REQUEST diff --git a/swh/deposit/tests/api/test_deposit_private_check.py b/swh/deposit/tests/api/test_deposit_private_check.py index a6bab004..8982f232 100644 --- a/swh/deposit/tests/api/test_deposit_private_check.py +++ b/swh/deposit/tests/api/test_deposit_private_check.py @@ -1,266 +1,283 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.urls import reverse import pytest from rest_framework import status from swh.deposit.config import ( - DEPOSIT_STATUS_VERIFIED, PRIVATE_CHECK_DEPOSIT, - DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_REJECTED, COL_IRI + DEPOSIT_STATUS_VERIFIED, + PRIVATE_CHECK_DEPOSIT, + DEPOSIT_STATUS_DEPOSITED, + DEPOSIT_STATUS_REJECTED, + COL_IRI, ) from swh.deposit.api.private.deposit_check import ( - MANDATORY_ARCHIVE_INVALID, MANDATORY_FIELDS_MISSING, - MANDATORY_ARCHIVE_UNSUPPORTED, ALTERNATE_FIELDS_MISSING, - MANDATORY_ARCHIVE_MISSING + MANDATORY_ARCHIVE_INVALID, + MANDATORY_FIELDS_MISSING, + MANDATORY_ARCHIVE_UNSUPPORTED, + ALTERNATE_FIELDS_MISSING, + MANDATORY_ARCHIVE_MISSING, ) from swh.deposit.models import Deposit from swh.deposit.parsers import parse_xml from swh.deposit.tests.common import ( - create_arborescence_archive, create_archive_with_archive + create_arborescence_archive, + create_archive_with_archive, ) -PRIVATE_CHECK_DEPOSIT_NC = PRIVATE_CHECK_DEPOSIT + '-nc' +PRIVATE_CHECK_DEPOSIT_NC = PRIVATE_CHECK_DEPOSIT + "-nc" def private_check_url_endpoints(collection, deposit): """There are 2 endpoints to check (one with collection, one without)""" return [ reverse(PRIVATE_CHECK_DEPOSIT, args=[collection.name, deposit.id]), - reverse(PRIVATE_CHECK_DEPOSIT_NC, args=[deposit.id]) + reverse(PRIVATE_CHECK_DEPOSIT_NC, args=[deposit.id]), ] -@pytest.mark.parametrize( - "extension", ['zip', 'tar', 'tar.gz', 'tar.bz2', 'tar.xz']) +@pytest.mark.parametrize("extension", ["zip", "tar", "tar.gz", "tar.bz2", "tar.xz"]) def test_deposit_ok( - authenticated_client, deposit_collection, ready_deposit_ok, extension): + authenticated_client, deposit_collection, ready_deposit_ok, extension +): """Proper deposit should succeed the checks (-> status ready) """ deposit = ready_deposit_ok for url in private_check_url_endpoints(deposit_collection, deposit): response = authenticated_client.get(url) assert response.status_code == status.HTTP_200_OK data = response.json() - assert data['status'] == DEPOSIT_STATUS_VERIFIED + assert data["status"] == DEPOSIT_STATUS_VERIFIED deposit = Deposit.objects.get(pk=deposit.id) assert deposit.status == DEPOSIT_STATUS_VERIFIED deposit.status = DEPOSIT_STATUS_DEPOSITED deposit.save() -@pytest.mark.parametrize( - "extension", ['zip', 'tar', 'tar.gz', 'tar.bz2', 'tar.xz']) + +@pytest.mark.parametrize("extension", ["zip", "tar", "tar.gz", "tar.bz2", "tar.xz"]) def test_deposit_invalid_tarball( - tmp_path, authenticated_client, deposit_collection, extension): + tmp_path, authenticated_client, deposit_collection, extension +): """Deposit with tarball (of 1 tarball) should fail the checks: rejected """ deposit = create_deposit_archive_with_archive( - tmp_path, extension, - authenticated_client, - deposit_collection.name) + tmp_path, extension, authenticated_client, deposit_collection.name + ) for url in private_check_url_endpoints(deposit_collection, deposit): response = authenticated_client.get(url) assert response.status_code == status.HTTP_200_OK data = response.json() - assert data['status'] == DEPOSIT_STATUS_REJECTED - details = data['details'] + assert data["status"] == DEPOSIT_STATUS_REJECTED + details = data["details"] # archive checks failure - assert len(details['archive']) == 1 - assert details['archive'][0]['summary'] == \ - MANDATORY_ARCHIVE_INVALID + assert len(details["archive"]) == 1 + assert details["archive"][0]["summary"] == MANDATORY_ARCHIVE_INVALID deposit = Deposit.objects.get(pk=deposit.id) assert deposit.status == DEPOSIT_STATUS_REJECTED def test_deposit_ko_missing_tarball( - authenticated_client, deposit_collection, ready_deposit_only_metadata): + authenticated_client, deposit_collection, ready_deposit_only_metadata +): """Deposit without archive should fail the checks: rejected """ deposit = ready_deposit_only_metadata assert deposit.status == DEPOSIT_STATUS_DEPOSITED for url in private_check_url_endpoints(deposit_collection, deposit): response = authenticated_client.get(url) assert response.status_code == status.HTTP_200_OK data = response.json() - assert data['status'] == DEPOSIT_STATUS_REJECTED - details = data['details'] + assert data["status"] == DEPOSIT_STATUS_REJECTED + details = data["details"] # archive checks failure - assert len(details['archive']) == 1 - assert details['archive'][0]['summary'] == MANDATORY_ARCHIVE_MISSING + assert len(details["archive"]) == 1 + assert details["archive"][0]["summary"] == MANDATORY_ARCHIVE_MISSING deposit = Deposit.objects.get(pk=deposit.id) assert deposit.status == DEPOSIT_STATUS_REJECTED deposit.status = DEPOSIT_STATUS_DEPOSITED deposit.save() def test_deposit_ko_unsupported_tarball( - tmp_path, authenticated_client, deposit_collection, - ready_deposit_invalid_archive): + tmp_path, authenticated_client, deposit_collection, ready_deposit_invalid_archive +): """Deposit with an unsupported tarball should fail the checks: rejected """ deposit = ready_deposit_invalid_archive assert DEPOSIT_STATUS_DEPOSITED == deposit.status for url in private_check_url_endpoints(deposit_collection, deposit): response = authenticated_client.get(url) assert response.status_code == status.HTTP_200_OK data = response.json() - assert data['status'] == DEPOSIT_STATUS_REJECTED - details = data['details'] + assert data["status"] == DEPOSIT_STATUS_REJECTED + details = data["details"] # archive checks failure - assert len(details['archive']) == 1 - assert details['archive'][0]['summary'] == \ - MANDATORY_ARCHIVE_UNSUPPORTED + assert len(details["archive"]) == 1 + assert details["archive"][0]["summary"] == MANDATORY_ARCHIVE_UNSUPPORTED # metadata check failure - assert len(details['metadata']) == 2 - mandatory = details['metadata'][0] - assert mandatory['summary'] == MANDATORY_FIELDS_MISSING - assert set(mandatory['fields']) == set(['author']) - alternate = details['metadata'][1] - assert alternate['summary'] == ALTERNATE_FIELDS_MISSING - assert alternate['fields'] == ['name or title'] + assert len(details["metadata"]) == 2 + mandatory = details["metadata"][0] + assert mandatory["summary"] == MANDATORY_FIELDS_MISSING + assert set(mandatory["fields"]) == set(["author"]) + alternate = details["metadata"][1] + assert alternate["summary"] == ALTERNATE_FIELDS_MISSING + assert alternate["fields"] == ["name or title"] deposit = Deposit.objects.get(pk=deposit.id) assert deposit.status == DEPOSIT_STATUS_REJECTED deposit.status = DEPOSIT_STATUS_DEPOSITED deposit.save() def test_check_deposit_metadata_ok( - authenticated_client, deposit_collection, ready_deposit_ok): + authenticated_client, deposit_collection, ready_deposit_ok +): """Proper deposit should succeed the checks (-> status ready) with all **MUST** metadata using the codemeta metadata test set """ deposit = ready_deposit_ok assert deposit.status == DEPOSIT_STATUS_DEPOSITED for url in private_check_url_endpoints(deposit_collection, deposit): response = authenticated_client.get(url) assert response.status_code == status.HTTP_200_OK data = response.json() - assert data['status'] == DEPOSIT_STATUS_VERIFIED + assert data["status"] == DEPOSIT_STATUS_VERIFIED deposit = Deposit.objects.get(pk=deposit.id) assert deposit.status == DEPOSIT_STATUS_VERIFIED deposit.status = DEPOSIT_STATUS_DEPOSITED deposit.save() def test_check_metadata_ok(swh_checks_deposit): - actual_check, detail = swh_checks_deposit._check_metadata({ - 'url': 'something', - 'external_identifier': 'something-else', - 'name': 'foo', - 'author': 'someone', - }) + actual_check, detail = swh_checks_deposit._check_metadata( + { + "url": "something", + "external_identifier": "something-else", + "name": "foo", + "author": "someone", + } + ) assert actual_check is True assert detail is None def test_check_metadata_ok2(swh_checks_deposit): - actual_check, detail = swh_checks_deposit._check_metadata({ - 'url': 'something', - 'external_identifier': 'something-else', - 'title': 'bar', - 'author': 'someone', - }) + actual_check, detail = swh_checks_deposit._check_metadata( + { + "url": "something", + "external_identifier": "something-else", + "title": "bar", + "author": "someone", + } + ) assert actual_check is True assert detail is None def test_check_metadata_ko(swh_checks_deposit): """Missing optional field should be caught """ - actual_check, error_detail = swh_checks_deposit._check_metadata({ - 'url': 'something', - 'external_identifier': 'something-else', - 'author': 'someone', - }) + actual_check, error_detail = swh_checks_deposit._check_metadata( + { + "url": "something", + "external_identifier": "something-else", + "author": "someone", + } + ) expected_error = { - 'metadata': [{ - 'summary': 'Mandatory alternate fields are missing', - 'fields': ['name or title'], - }] + "metadata": [ + { + "summary": "Mandatory alternate fields are missing", + "fields": ["name or title"], + } + ] } assert actual_check is False assert error_detail == expected_error def test_check_metadata_ko2(swh_checks_deposit): """Missing mandatory fields should be caught """ - actual_check, error_detail = swh_checks_deposit._check_metadata({ - 'url': 'something', - 'external_identifier': 'something-else', - 'title': 'foobar', - }) + actual_check, error_detail = swh_checks_deposit._check_metadata( + { + "url": "something", + "external_identifier": "something-else", + "title": "foobar", + } + ) expected_error = { - 'metadata': [{ - 'summary': 'Mandatory fields are missing', - 'fields': ['author'], - }] + "metadata": [{"summary": "Mandatory fields are missing", "fields": ["author"],}] } assert actual_check is False assert error_detail == expected_error def create_deposit_archive_with_archive( - root_path, archive_extension, client, collection_name): + root_path, archive_extension, client, collection_name +): # we create the holding archive to a given extension archive = create_arborescence_archive( - root_path, 'archive1', 'file1', b'some content in file', - extension=archive_extension) + root_path, + "archive1", + "file1", + b"some content in file", + extension=archive_extension, + ) # now we create an archive holding the first created archive - invalid_archive = create_archive_with_archive( - root_path, 'invalid.tgz', archive) + invalid_archive = create_archive_with_archive(root_path, "invalid.tgz", archive) # we deposit it response = client.post( reverse(COL_IRI, args=[collection_name]), - content_type='application/x-tar', - data=invalid_archive['data'], - CONTENT_LENGTH=invalid_archive['length'], - HTTP_MD5SUM=invalid_archive['md5sum'], - HTTP_SLUG='external-id', + content_type="application/x-tar", + data=invalid_archive["data"], + CONTENT_LENGTH=invalid_archive["length"], + HTTP_MD5SUM=invalid_archive["md5sum"], + HTTP_SLUG="external-id", HTTP_IN_PROGRESS=False, - HTTP_CONTENT_DISPOSITION='attachment; filename=%s' % ( - invalid_archive['name'], )) + HTTP_CONTENT_DISPOSITION="attachment; filename=%s" % (invalid_archive["name"],), + ) # then assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(response.content) - deposit_status = response_content['deposit_status'] + deposit_status = response_content["deposit_status"] assert deposit_status == DEPOSIT_STATUS_DEPOSITED - deposit_id = int(response_content['deposit_id']) + deposit_id = int(response_content["deposit_id"]) deposit = Deposit.objects.get(pk=deposit_id) assert DEPOSIT_STATUS_DEPOSITED == deposit.status return deposit diff --git a/swh/deposit/tests/api/test_deposit_private_read_archive.py b/swh/deposit/tests/api/test_deposit_private_read_archive.py index b0f86dfb..1724a2a9 100644 --- a/swh/deposit/tests/api/test_deposit_private_read_archive.py +++ b/swh/deposit/tests/api/test_deposit_private_read_archive.py @@ -1,86 +1,87 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import io import zipfile from django.urls import reverse from rest_framework import status from swh.deposit.config import PRIVATE_GET_RAW_CONTENT, EM_IRI from swh.deposit.tests.common import create_arborescence_archive -PRIVATE_GET_RAW_CONTENT_NC = PRIVATE_GET_RAW_CONTENT + '-nc' +PRIVATE_GET_RAW_CONTENT_NC = PRIVATE_GET_RAW_CONTENT + "-nc" def private_get_raw_url_endpoints(collection, deposit): """There are 2 endpoints to check (one with collection, one without)""" return [ reverse(PRIVATE_GET_RAW_CONTENT, args=[collection.name, deposit.id]), - reverse(PRIVATE_GET_RAW_CONTENT_NC, args=[deposit.id]) + reverse(PRIVATE_GET_RAW_CONTENT_NC, args=[deposit.id]), ] def test_access_to_existing_deposit_with_one_archive( - authenticated_client, deposit_collection, complete_deposit, - sample_archive): + authenticated_client, deposit_collection, complete_deposit, sample_archive +): """Access to deposit should stream a 200 response with its raw content """ deposit = complete_deposit for url in private_get_raw_url_endpoints(deposit_collection, deposit): r = authenticated_client.get(url) assert r.status_code == status.HTTP_200_OK - assert r._headers['content-type'][1] == 'application/zip' + assert r._headers["content-type"][1] == "application/zip" # read the stream - data = b''.join(r.streaming_content) + data = b"".join(r.streaming_content) # extract the file from the zip zfile = zipfile.ZipFile(io.BytesIO(data)) - assert zfile.namelist() == ['file1'] - assert zfile.open('file1').read() == b'some content in file' + assert zfile.namelist() == ["file1"] + assert zfile.open("file1").read() == b"some content in file" def test_access_to_existing_deposit_with_multiple_archives( - tmp_path, authenticated_client, deposit_collection, partial_deposit, - sample_archive): + tmp_path, authenticated_client, deposit_collection, partial_deposit, sample_archive +): """Access to deposit should stream a 200 response with its raw contents """ deposit = partial_deposit archive2 = create_arborescence_archive( - tmp_path, 'archive2', 'file2', b'some other content in file') + tmp_path, "archive2", "file2", b"some other content in file" + ) # Add a second archive to deposit update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit.id]) response = authenticated_client.post( update_uri, - content_type='application/zip', # as zip - data=archive2['data'], + content_type="application/zip", # as zip + data=archive2["data"], # + headers - CONTENT_LENGTH=archive2['length'], + CONTENT_LENGTH=archive2["length"], HTTP_SLUG=deposit.external_id, - HTTP_CONTENT_MD5=archive2['md5sum'], - HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', - HTTP_IN_PROGRESS='false', - HTTP_CONTENT_DISPOSITION='attachment; filename=%s' % ( - archive2['name'], )) + HTTP_CONTENT_MD5=archive2["md5sum"], + HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", + HTTP_IN_PROGRESS="false", + HTTP_CONTENT_DISPOSITION="attachment; filename=%s" % (archive2["name"],), + ) assert response.status_code == status.HTTP_201_CREATED for url in private_get_raw_url_endpoints(deposit_collection, deposit): r = authenticated_client.get(url) assert r.status_code == status.HTTP_200_OK - assert r._headers['content-type'][1] == 'application/zip' + assert r._headers["content-type"][1] == "application/zip" # read the stream - data = b''.join(r.streaming_content) + data = b"".join(r.streaming_content) # extract the file from the zip zfile = zipfile.ZipFile(io.BytesIO(data)) - assert set(zfile.namelist()) == {'file1', 'file2'} - assert zfile.open('file1').read() == b'some content in file' - assert zfile.open('file2').read() == b'some other content in file' + assert set(zfile.namelist()) == {"file1", "file2"} + assert zfile.open("file1").read() == b"some content in file" + assert zfile.open("file2").read() == b"some other content in file" diff --git a/swh/deposit/tests/api/test_deposit_private_read_metadata.py b/swh/deposit/tests/api/test_deposit_private_read_metadata.py index 3738eebf..75be1ffc 100644 --- a/swh/deposit/tests/api/test_deposit_private_read_metadata.py +++ b/swh/deposit/tests/api/test_deposit_private_read_metadata.py @@ -1,601 +1,588 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.urls import reverse from rest_framework import status from swh.deposit.models import Deposit -from swh.deposit.config import ( - PRIVATE_GET_DEPOSIT_METADATA, SWH_PERSON, EDIT_SE_IRI -) +from swh.deposit.config import PRIVATE_GET_DEPOSIT_METADATA, SWH_PERSON, EDIT_SE_IRI -PRIVATE_GET_DEPOSIT_METADATA_NC = PRIVATE_GET_DEPOSIT_METADATA + '-nc' +PRIVATE_GET_DEPOSIT_METADATA_NC = PRIVATE_GET_DEPOSIT_METADATA + "-nc" def private_get_raw_url_endpoints(collection, deposit): """There are 2 endpoints to check (one with collection, one without)""" deposit_id = deposit if isinstance(deposit, int) else deposit.id return [ - reverse(PRIVATE_GET_DEPOSIT_METADATA, - args=[collection.name, deposit_id]), - reverse(PRIVATE_GET_DEPOSIT_METADATA_NC, - args=[deposit_id]) + reverse(PRIVATE_GET_DEPOSIT_METADATA, args=[collection.name, deposit_id]), + reverse(PRIVATE_GET_DEPOSIT_METADATA_NC, args=[deposit_id]), ] def update_deposit(authenticated_client, collection, deposit, atom_dataset): - for atom_data in ['entry-data2', 'entry-data3']: + for atom_data in ["entry-data2", "entry-data3"]: update_deposit_with_metadata( authenticated_client, collection, deposit, atom_dataset[atom_data] ) return deposit -def update_deposit_with_metadata(authenticated_client, collection, deposit, - metadata): +def update_deposit_with_metadata(authenticated_client, collection, deposit, metadata): # update deposit's metadata response = authenticated_client.post( reverse(EDIT_SE_IRI, args=[collection.name, deposit.id]), - content_type='application/atom+xml;type=entry', + content_type="application/atom+xml;type=entry", data=metadata, HTTP_SLUG=deposit.external_id, - HTTP_IN_PROGRESS=True) + HTTP_IN_PROGRESS=True, + ) assert response.status_code == status.HTTP_201_CREATED return deposit def test_read_metadata( - authenticated_client, deposit_collection, partial_deposit, - atom_dataset): + authenticated_client, deposit_collection, partial_deposit, atom_dataset +): """Private metadata read api to existing deposit should return metadata """ deposit = partial_deposit - deposit.external_id = 'some-external-id' + deposit.external_id = "some-external-id" deposit.save() - deposit = update_deposit(authenticated_client, deposit_collection, deposit, - atom_dataset) + deposit = update_deposit( + authenticated_client, deposit_collection, deposit, atom_dataset + ) for url in private_get_raw_url_endpoints(deposit_collection, deposit): response = authenticated_client.get(url) assert response.status_code == status.HTTP_200_OK - assert response._headers['content-type'][1] == 'application/json' + assert response._headers["content-type"][1] == "application/json" data = response.json() expected_meta = { - 'branch_name': 'master', - 'origin': { - 'type': 'deposit', - 'url': 'https://hal-test.archives-ouvertes.fr/some-external-id' + "branch_name": "master", + "origin": { + "type": "deposit", + "url": "https://hal-test.archives-ouvertes.fr/some-external-id", }, - 'origin_metadata': { - 'metadata': { - '@xmlns': ['http://www.w3.org/2005/Atom'], - 'author': [ - 'some awesome author', - 'another one', - 'no one' - ], - 'codemeta:dateCreated': '2017-10-07T15:17:08Z', - 'external_identifier': 'some-external-id', - 'url': 'https://hal-test.archives-ouvertes.fr/some-external-id' # noqa + "origin_metadata": { + "metadata": { + "@xmlns": ["http://www.w3.org/2005/Atom"], + "author": ["some awesome author", "another one", "no one"], + "codemeta:dateCreated": "2017-10-07T15:17:08Z", + "external_identifier": "some-external-id", + "url": "https://hal-test.archives-ouvertes.fr/some-external-id", # noqa }, - 'provider': { - 'metadata': {}, - 'provider_name': '', - 'provider_type': 'deposit_client', - 'provider_url': 'https://hal-test.archives-ouvertes.fr/' + "provider": { + "metadata": {}, + "provider_name": "", + "provider_type": "deposit_client", + "provider_url": "https://hal-test.archives-ouvertes.fr/", + }, + "tool": { + "configuration": {"sword_version": "2"}, + "name": "swh-deposit", + "version": "0.0.1", }, - 'tool': { - 'configuration': {'sword_version': '2'}, - 'name': 'swh-deposit', - 'version': '0.0.1' - } }, - 'revision': { - 'author': SWH_PERSON, - 'committer': SWH_PERSON, - 'committer_date': { - 'negative_utc': False, - 'offset': 0, - 'timestamp': { - 'microseconds': 0, - 'seconds': 1507389428 - } + "revision": { + "author": SWH_PERSON, + "committer": SWH_PERSON, + "committer_date": { + "negative_utc": False, + "offset": 0, + "timestamp": {"microseconds": 0, "seconds": 1507389428}, }, - 'date': { - 'negative_utc': False, - 'offset': 0, - 'timestamp': {'microseconds': 0, 'seconds': 1507389428} + "date": { + "negative_utc": False, + "offset": 0, + "timestamp": {"microseconds": 0, "seconds": 1507389428}, }, - 'message': 'test: Deposit %s in collection test' % deposit.id, - 'metadata': { - '@xmlns': ['http://www.w3.org/2005/Atom'], - 'author': ['some awesome author', - 'another one', - 'no one'], - 'codemeta:dateCreated': '2017-10-07T15:17:08Z', - 'external_identifier': 'some-external-id', - 'url': 'https://hal-test.archives-ouvertes.fr/some-external-id' # noqa + "message": "test: Deposit %s in collection test" % deposit.id, + "metadata": { + "@xmlns": ["http://www.w3.org/2005/Atom"], + "author": ["some awesome author", "another one", "no one"], + "codemeta:dateCreated": "2017-10-07T15:17:08Z", + "external_identifier": "some-external-id", + "url": "https://hal-test.archives-ouvertes.fr/some-external-id", # noqa }, - 'synthetic': True, - 'type': 'tar' - } + "synthetic": True, + "type": "tar", + }, } assert data == expected_meta def test_read_metadata_revision_with_parent( - authenticated_client, deposit_collection, partial_deposit, - atom_dataset): + authenticated_client, deposit_collection, partial_deposit, atom_dataset +): """Private read metadata to a deposit (with parent) returns metadata """ deposit = partial_deposit - deposit.external_id = 'some-external-id' + deposit.external_id = "some-external-id" deposit.save() - deposit = update_deposit(authenticated_client, deposit_collection, deposit, - atom_dataset) - rev_id = 'da78a9d4cf1d5d29873693fd496142e3a18c20fa' - swh_id = 'swh:1:rev:%s' % rev_id - fake_parent = Deposit(swh_id=swh_id, - client=deposit.client, collection=deposit.collection) + deposit = update_deposit( + authenticated_client, deposit_collection, deposit, atom_dataset + ) + rev_id = "da78a9d4cf1d5d29873693fd496142e3a18c20fa" + swh_id = "swh:1:rev:%s" % rev_id + fake_parent = Deposit( + swh_id=swh_id, client=deposit.client, collection=deposit.collection + ) fake_parent.save() deposit.parent = fake_parent deposit.save() for url in private_get_raw_url_endpoints(deposit_collection, deposit): response = authenticated_client.get(url) assert response.status_code == status.HTTP_200_OK - assert response._headers['content-type'][1] == 'application/json' + assert response._headers["content-type"][1] == "application/json" data = response.json() expected_meta = { - 'branch_name': 'master', - 'origin': { - 'type': 'deposit', - 'url': 'https://hal-test.archives-ouvertes.fr/some-external-id' + "branch_name": "master", + "origin": { + "type": "deposit", + "url": "https://hal-test.archives-ouvertes.fr/some-external-id", }, - 'origin_metadata': { - 'metadata': { - '@xmlns': ['http://www.w3.org/2005/Atom'], - 'author': [ - 'some awesome author', - 'another one', - 'no one' - ], - 'codemeta:dateCreated': '2017-10-07T15:17:08Z', - 'external_identifier': 'some-external-id', - 'url': 'https://hal-test.archives-ouvertes.fr/some-external-id' # noqa + "origin_metadata": { + "metadata": { + "@xmlns": ["http://www.w3.org/2005/Atom"], + "author": ["some awesome author", "another one", "no one"], + "codemeta:dateCreated": "2017-10-07T15:17:08Z", + "external_identifier": "some-external-id", + "url": "https://hal-test.archives-ouvertes.fr/some-external-id", # noqa + }, + "provider": { + "metadata": {}, + "provider_name": "", + "provider_type": "deposit_client", + "provider_url": "https://hal-test.archives-ouvertes.fr/", }, - 'provider': { - 'metadata': {}, - 'provider_name': '', - 'provider_type': 'deposit_client', - 'provider_url': 'https://hal-test.archives-ouvertes.fr/' + "tool": { + "configuration": {"sword_version": "2"}, + "name": "swh-deposit", + "version": "0.0.1", }, - 'tool': { - 'configuration': {'sword_version': '2'}, - 'name': 'swh-deposit', - 'version': '0.0.1' - } }, - 'revision': { - 'author': SWH_PERSON, - 'committer': SWH_PERSON, - 'committer_date': { - 'negative_utc': False, - 'offset': 0, - 'timestamp': { - 'microseconds': 0, - 'seconds': 1507389428 - } + "revision": { + "author": SWH_PERSON, + "committer": SWH_PERSON, + "committer_date": { + "negative_utc": False, + "offset": 0, + "timestamp": {"microseconds": 0, "seconds": 1507389428}, }, - 'date': { - 'negative_utc': False, - 'offset': 0, - 'timestamp': {'microseconds': 0, 'seconds': 1507389428} + "date": { + "negative_utc": False, + "offset": 0, + "timestamp": {"microseconds": 0, "seconds": 1507389428}, }, - 'message': 'test: Deposit %s in collection test' % deposit.id, - 'metadata': { - '@xmlns': ['http://www.w3.org/2005/Atom'], - 'author': ['some awesome author', - 'another one', - 'no one'], - 'codemeta:dateCreated': '2017-10-07T15:17:08Z', - 'external_identifier': 'some-external-id', - 'url': 'https://hal-test.archives-ouvertes.fr/some-external-id' # noqa + "message": "test: Deposit %s in collection test" % deposit.id, + "metadata": { + "@xmlns": ["http://www.w3.org/2005/Atom"], + "author": ["some awesome author", "another one", "no one"], + "codemeta:dateCreated": "2017-10-07T15:17:08Z", + "external_identifier": "some-external-id", + "url": "https://hal-test.archives-ouvertes.fr/some-external-id", # noqa }, - 'synthetic': True, - 'type': 'tar', - 'parents': [rev_id], - } + "synthetic": True, + "type": "tar", + "parents": [rev_id], + }, } assert data == expected_meta def test_read_metadata_3( - authenticated_client, deposit_collection, partial_deposit, - atom_dataset): + authenticated_client, deposit_collection, partial_deposit, atom_dataset +): """date(Created|Published) provided, uses author/committer date """ deposit = partial_deposit - deposit.external_id = 'hal-01243065' + deposit.external_id = "hal-01243065" deposit.save() deposit = update_deposit( - authenticated_client, deposit_collection, deposit, - atom_dataset) + authenticated_client, deposit_collection, deposit, atom_dataset + ) # add metadata to the deposit with datePublished and dateCreated - codemeta_entry_data = atom_dataset['metadata'] % """ + codemeta_entry_data = ( + atom_dataset["metadata"] + % """ 2015-04-06T17:08:47+02:00 2017-05-03T16:08:47+02:00 """ + ) update_deposit_with_metadata( - authenticated_client, deposit_collection, deposit, - codemeta_entry_data + authenticated_client, deposit_collection, deposit, codemeta_entry_data ) for url in private_get_raw_url_endpoints(deposit_collection, deposit): response = authenticated_client.get(url) assert response.status_code == status.HTTP_200_OK - assert response._headers['content-type'][1] == 'application/json' + assert response._headers["content-type"][1] == "application/json" data = response.json() metadata = { - '@xmlns': ['http://www.w3.org/2005/Atom'], - '@xmlns:codemeta': 'https://doi.org/10.5063/SCHEMA/CODEMETA-2.0', - 'author': [ - 'some awesome author', - 'another one', - 'no one', - { - 'email': 'hal@ccsd.cnrs.fr', - 'name': 'HAL' - } + "@xmlns": ["http://www.w3.org/2005/Atom"], + "@xmlns:codemeta": "https://doi.org/10.5063/SCHEMA/CODEMETA-2.0", + "author": [ + "some awesome author", + "another one", + "no one", + {"email": "hal@ccsd.cnrs.fr", "name": "HAL"}, ], - 'client': 'hal', - 'codemeta:applicationCategory': 'test', - 'codemeta:author': { - 'codemeta:name': 'Morane Gruenpeter' - }, - 'codemeta:dateCreated': ['2017-10-07T15:17:08Z', - '2015-04-06T17:08:47+02:00'], - 'codemeta:datePublished': '2017-05-03T16:08:47+02:00', - 'codemeta:description': 'this is the description', - 'codemeta:developmentStatus': 'stable', - 'codemeta:keywords': 'DSP programming', - 'codemeta:license': [ - {'codemeta:name': 'GNU General Public License v3.0 only'}, - {'codemeta:name': 'CeCILL ' - 'Free ' - 'Software ' - 'License ' - 'Agreement ' - 'v1.1'}], - 'codemeta:programmingLanguage': [ - 'php', - 'python', - 'C' + "client": "hal", + "codemeta:applicationCategory": "test", + "codemeta:author": {"codemeta:name": "Morane Gruenpeter"}, + "codemeta:dateCreated": [ + "2017-10-07T15:17:08Z", + "2015-04-06T17:08:47+02:00", ], - 'codemeta:runtimePlatform': 'phpstorm', - 'codemeta:url': 'https://hal-test.archives-ouvertes.fr/hal-01243065', # noqa - 'codemeta:version': '1', - 'external_identifier': [ - 'some-external-id', - 'hal-01243065' + "codemeta:datePublished": "2017-05-03T16:08:47+02:00", + "codemeta:description": "this is the description", + "codemeta:developmentStatus": "stable", + "codemeta:keywords": "DSP programming", + "codemeta:license": [ + {"codemeta:name": "GNU General Public License v3.0 only"}, + { + "codemeta:name": "CeCILL " + "Free " + "Software " + "License " + "Agreement " + "v1.1" + }, ], - 'id': 'hal-01243065', - 'title': 'Composing a Web of Audio ' - 'Applications', - 'url': 'https://hal-test.archives-ouvertes.fr/some-external-id' + "codemeta:programmingLanguage": ["php", "python", "C"], + "codemeta:runtimePlatform": "phpstorm", + "codemeta:url": "https://hal-test.archives-ouvertes.fr/hal-01243065", # noqa + "codemeta:version": "1", + "external_identifier": ["some-external-id", "hal-01243065"], + "id": "hal-01243065", + "title": "Composing a Web of Audio " "Applications", + "url": "https://hal-test.archives-ouvertes.fr/some-external-id", } expected_meta = { - 'branch_name': 'master', - 'origin': { - 'type': 'deposit', - 'url': 'https://hal-test.archives-ouvertes.fr/hal-01243065' + "branch_name": "master", + "origin": { + "type": "deposit", + "url": "https://hal-test.archives-ouvertes.fr/hal-01243065", }, - 'origin_metadata': { - 'metadata': metadata, - 'provider': { - 'metadata': {}, - 'provider_name': '', - 'provider_type': 'deposit_client', - 'provider_url': 'https://hal-test.archives-ouvertes.fr/' + "origin_metadata": { + "metadata": metadata, + "provider": { + "metadata": {}, + "provider_name": "", + "provider_type": "deposit_client", + "provider_url": "https://hal-test.archives-ouvertes.fr/", + }, + "tool": { + "configuration": {"sword_version": "2"}, + "name": "swh-deposit", + "version": "0.0.1", }, - 'tool': { - 'configuration': {'sword_version': '2'}, - 'name': 'swh-deposit', - 'version': '0.0.1' - } }, - 'revision': { - 'author': SWH_PERSON, - 'committer': SWH_PERSON, - 'committer_date': {'negative_utc': False, - 'offset': 120, - 'timestamp': {'microseconds': 0, - 'seconds': 1493820527}}, - 'date': { - 'negative_utc': False, - 'offset': 0, - 'timestamp': {'microseconds': 0, 'seconds': 1507389428} + "revision": { + "author": SWH_PERSON, + "committer": SWH_PERSON, + "committer_date": { + "negative_utc": False, + "offset": 120, + "timestamp": {"microseconds": 0, "seconds": 1493820527}, + }, + "date": { + "negative_utc": False, + "offset": 0, + "timestamp": {"microseconds": 0, "seconds": 1507389428}, }, - 'message': '%s: Deposit %s in collection %s' % ( - deposit_collection.name, - deposit.id, - deposit_collection.name - ), - 'metadata': metadata, - 'synthetic': True, - 'type': 'tar' - } + "message": "%s: Deposit %s in collection %s" + % (deposit_collection.name, deposit.id, deposit_collection.name), + "metadata": metadata, + "synthetic": True, + "type": "tar", + }, } assert data == expected_meta def test_read_metadata_4( - authenticated_client, deposit_collection, atom_dataset, - partial_deposit): + authenticated_client, deposit_collection, atom_dataset, partial_deposit +): """dateCreated/datePublished not provided, revision uses complete_date """ deposit = partial_deposit - codemeta_entry_data = atom_dataset['metadata'] % '' + codemeta_entry_data = atom_dataset["metadata"] % "" deposit = update_deposit_with_metadata( - authenticated_client, deposit_collection, deposit, - codemeta_entry_data) + authenticated_client, deposit_collection, deposit, codemeta_entry_data + ) # will use the deposit completed date as fallback date - deposit.complete_date = '2016-04-06' + deposit.complete_date = "2016-04-06" deposit.save() for url in private_get_raw_url_endpoints(deposit_collection, deposit): response = authenticated_client.get(url) assert response.status_code == status.HTTP_200_OK - assert response._headers['content-type'][1] == 'application/json' + assert response._headers["content-type"][1] == "application/json" data = response.json() metadata = { - '@xmlns': 'http://www.w3.org/2005/Atom', - '@xmlns:codemeta': 'https://doi.org/10.5063/SCHEMA/CODEMETA-2.0', - 'author': {'email': 'hal@ccsd.cnrs.fr', - 'name': 'HAL'}, - 'client': 'hal', - 'codemeta:applicationCategory': 'test', - 'codemeta:author': {'codemeta:name': 'Morane ' - 'Gruenpeter'}, - 'codemeta:description': 'this is the ' - 'description', - 'codemeta:developmentStatus': 'stable', - 'codemeta:keywords': 'DSP programming', - 'codemeta:license': [{'codemeta:name': 'GNU ' - 'General ' - 'Public ' - 'License ' - 'v3.0 ' - 'only'}, - {'codemeta:name': 'CeCILL ' - 'Free ' - 'Software ' - 'License ' - 'Agreement ' - 'v1.1'}], - 'codemeta:programmingLanguage': ['php', - 'python', - 'C'], - 'codemeta:runtimePlatform': 'phpstorm', - 'codemeta:url': - 'https://hal-test.archives-ouvertes.fr/hal-01243065', - 'codemeta:version': '1', - 'external_identifier': 'hal-01243065', - 'id': 'hal-01243065', - 'title': 'Composing a Web of Audio ' - 'Applications' + "@xmlns": "http://www.w3.org/2005/Atom", + "@xmlns:codemeta": "https://doi.org/10.5063/SCHEMA/CODEMETA-2.0", + "author": {"email": "hal@ccsd.cnrs.fr", "name": "HAL"}, + "client": "hal", + "codemeta:applicationCategory": "test", + "codemeta:author": {"codemeta:name": "Morane " "Gruenpeter"}, + "codemeta:description": "this is the " "description", + "codemeta:developmentStatus": "stable", + "codemeta:keywords": "DSP programming", + "codemeta:license": [ + { + "codemeta:name": "GNU " + "General " + "Public " + "License " + "v3.0 " + "only" + }, + { + "codemeta:name": "CeCILL " + "Free " + "Software " + "License " + "Agreement " + "v1.1" + }, + ], + "codemeta:programmingLanguage": ["php", "python", "C"], + "codemeta:runtimePlatform": "phpstorm", + "codemeta:url": "https://hal-test.archives-ouvertes.fr/hal-01243065", + "codemeta:version": "1", + "external_identifier": "hal-01243065", + "id": "hal-01243065", + "title": "Composing a Web of Audio " "Applications", } expected_origin = { - 'type': 'deposit', - 'url': 'https://hal-test.archives-ouvertes.fr/%s' % ( - deposit.external_id) + "type": "deposit", + "url": "https://hal-test.archives-ouvertes.fr/%s" % (deposit.external_id), } expected_origin_metadata = { - 'metadata': metadata, - 'provider': { - 'metadata': {}, - 'provider_name': '', - 'provider_type': 'deposit_client', - 'provider_url': 'https://hal-test.archives-ouvertes.fr/' + "metadata": metadata, + "provider": { + "metadata": {}, + "provider_name": "", + "provider_type": "deposit_client", + "provider_url": "https://hal-test.archives-ouvertes.fr/", + }, + "tool": { + "configuration": {"sword_version": "2"}, + "name": "swh-deposit", + "version": "0.0.1", }, - 'tool': { - 'configuration': {'sword_version': '2'}, - 'name': 'swh-deposit', - 'version': '0.0.1' - } } expected_revision = { - 'author': {'email': 'robot@softwareheritage.org', - 'fullname': 'Software Heritage', - 'name': 'Software Heritage'}, - 'committer': {'email': 'robot@softwareheritage.org', - 'fullname': 'Software Heritage', - 'name': 'Software Heritage'}, - 'committer_date': {'negative_utc': False, - 'offset': 0, - 'timestamp': {'microseconds': 0, - 'seconds': 1459900800}}, - 'date': { - 'negative_utc': False, - 'offset': 0, - 'timestamp': {'microseconds': 0, 'seconds': 1459900800}}, - 'message': '%s: Deposit %s in collection %s' % ( - deposit_collection.name, deposit.id, deposit_collection.name - ), - 'metadata': metadata, - 'synthetic': True, - 'type': 'tar' + "author": { + "email": "robot@softwareheritage.org", + "fullname": "Software Heritage", + "name": "Software Heritage", + }, + "committer": { + "email": "robot@softwareheritage.org", + "fullname": "Software Heritage", + "name": "Software Heritage", + }, + "committer_date": { + "negative_utc": False, + "offset": 0, + "timestamp": {"microseconds": 0, "seconds": 1459900800}, + }, + "date": { + "negative_utc": False, + "offset": 0, + "timestamp": {"microseconds": 0, "seconds": 1459900800}, + }, + "message": "%s: Deposit %s in collection %s" + % (deposit_collection.name, deposit.id, deposit_collection.name), + "metadata": metadata, + "synthetic": True, + "type": "tar", } expected_meta = { - 'branch_name': 'master', - 'origin': expected_origin, - 'origin_metadata': expected_origin_metadata, - 'revision': expected_revision, + "branch_name": "master", + "origin": expected_origin, + "origin_metadata": expected_origin_metadata, + "revision": expected_revision, } assert data == expected_meta def test_read_metadata_5( - authenticated_client, deposit_collection, partial_deposit, - atom_dataset): + authenticated_client, deposit_collection, partial_deposit, atom_dataset +): """dateCreated/datePublished provided, revision uses author/committer date If multiple dateCreated provided, the first occurrence (of dateCreated) is selected. If multiple datePublished provided, the first occurrence (of datePublished) is selected. """ deposit = partial_deposit # add metadata to the deposit with multiple datePublished/dateCreated - codemeta_entry_data = atom_dataset['metadata'] % """ + codemeta_entry_data = ( + atom_dataset["metadata"] + % """ 2015-04-06T17:08:47+02:00 2017-05-03T16:08:47+02:00 2016-04-06T17:08:47+02:00 2018-05-03T16:08:47+02:00 """ + ) deposit = update_deposit_with_metadata( - authenticated_client, deposit_collection, deposit, - codemeta_entry_data) + authenticated_client, deposit_collection, deposit, codemeta_entry_data + ) for url in private_get_raw_url_endpoints(deposit_collection, deposit): response = authenticated_client.get(url) assert response.status_code == status.HTTP_200_OK - assert response._headers['content-type'][1] == 'application/json' + assert response._headers["content-type"][1] == "application/json" data = response.json() expected_origin = { - 'type': 'deposit', - 'url': 'https://hal-test.archives-ouvertes.fr/external-id-partial' + "type": "deposit", + "url": "https://hal-test.archives-ouvertes.fr/external-id-partial", } metadata = { - '@xmlns': 'http://www.w3.org/2005/Atom', - '@xmlns:codemeta': 'https://doi.org/10.5063/SCHEMA/CODEMETA-2.0', - 'author': {'email': 'hal@ccsd.cnrs.fr', - 'name': 'HAL'}, - 'client': 'hal', - 'codemeta:applicationCategory': 'test', - 'codemeta:author': {'codemeta:name': 'Morane ' - 'Gruenpeter'}, - 'codemeta:dateCreated': ['2015-04-06T17:08:47+02:00', - '2016-04-06T17:08:47+02:00'], - 'codemeta:datePublished': ['2017-05-03T16:08:47+02:00', - '2018-05-03T16:08:47+02:00'], - 'codemeta:description': 'this is the description', - 'codemeta:developmentStatus': 'stable', - 'codemeta:keywords': 'DSP programming', - 'codemeta:license': [ + "@xmlns": "http://www.w3.org/2005/Atom", + "@xmlns:codemeta": "https://doi.org/10.5063/SCHEMA/CODEMETA-2.0", + "author": {"email": "hal@ccsd.cnrs.fr", "name": "HAL"}, + "client": "hal", + "codemeta:applicationCategory": "test", + "codemeta:author": {"codemeta:name": "Morane " "Gruenpeter"}, + "codemeta:dateCreated": [ + "2015-04-06T17:08:47+02:00", + "2016-04-06T17:08:47+02:00", + ], + "codemeta:datePublished": [ + "2017-05-03T16:08:47+02:00", + "2018-05-03T16:08:47+02:00", + ], + "codemeta:description": "this is the description", + "codemeta:developmentStatus": "stable", + "codemeta:keywords": "DSP programming", + "codemeta:license": [ { - 'codemeta:name': 'GNU ' - 'General ' - 'Public ' - 'License ' - 'v3.0 ' - 'only'}, + "codemeta:name": "GNU " + "General " + "Public " + "License " + "v3.0 " + "only" + }, { - 'codemeta:name': 'CeCILL ' - 'Free ' - 'Software ' - 'License ' - 'Agreement ' - 'v1.1' - } + "codemeta:name": "CeCILL " + "Free " + "Software " + "License " + "Agreement " + "v1.1" + }, ], - 'codemeta:programmingLanguage': ['php', - 'python', - 'C'], - 'codemeta:runtimePlatform': 'phpstorm', - 'codemeta:url': 'https://hal-test.archives-ouvertes.fr/hal-01243065', # noqa - 'codemeta:version': '1', - 'external_identifier': 'hal-01243065', - 'id': 'hal-01243065', - 'title': 'Composing a Web of Audio ' - 'Applications' + "codemeta:programmingLanguage": ["php", "python", "C"], + "codemeta:runtimePlatform": "phpstorm", + "codemeta:url": "https://hal-test.archives-ouvertes.fr/hal-01243065", # noqa + "codemeta:version": "1", + "external_identifier": "hal-01243065", + "id": "hal-01243065", + "title": "Composing a Web of Audio " "Applications", } expected_origin_metadata = { - 'metadata': metadata, - 'provider': { - 'metadata': {}, - 'provider_name': '', - 'provider_type': 'deposit_client', - 'provider_url': 'https://hal-test.archives-ouvertes.fr/'}, - 'tool': { - 'configuration': {'sword_version': '2'}, - 'name': 'swh-deposit', - 'version': '0.0.1' - } + "metadata": metadata, + "provider": { + "metadata": {}, + "provider_name": "", + "provider_type": "deposit_client", + "provider_url": "https://hal-test.archives-ouvertes.fr/", + }, + "tool": { + "configuration": {"sword_version": "2"}, + "name": "swh-deposit", + "version": "0.0.1", + }, } expected_revision = { - 'author': {'email': 'robot@softwareheritage.org', - 'fullname': 'Software Heritage', - 'name': 'Software Heritage'}, - 'committer': {'email': 'robot@softwareheritage.org', - 'fullname': 'Software Heritage', - 'name': 'Software Heritage'}, - 'committer_date': {'negative_utc': False, - 'offset': 120, - 'timestamp': {'microseconds': 0, - 'seconds': 1493820527}}, - 'date': {'negative_utc': False, - 'offset': 120, - 'timestamp': {'microseconds': 0, 'seconds': 1428332927}}, - 'message': '%s: Deposit %s in collection %s' % ( - deposit_collection.name, deposit.id, deposit_collection.name - ), - 'metadata': metadata, - 'synthetic': True, - 'type': 'tar' + "author": { + "email": "robot@softwareheritage.org", + "fullname": "Software Heritage", + "name": "Software Heritage", + }, + "committer": { + "email": "robot@softwareheritage.org", + "fullname": "Software Heritage", + "name": "Software Heritage", + }, + "committer_date": { + "negative_utc": False, + "offset": 120, + "timestamp": {"microseconds": 0, "seconds": 1493820527}, + }, + "date": { + "negative_utc": False, + "offset": 120, + "timestamp": {"microseconds": 0, "seconds": 1428332927}, + }, + "message": "%s: Deposit %s in collection %s" + % (deposit_collection.name, deposit.id, deposit_collection.name), + "metadata": metadata, + "synthetic": True, + "type": "tar", } expected_meta = { - 'branch_name': 'master', - 'origin': expected_origin, - 'origin_metadata': expected_origin_metadata, - 'revision': expected_revision + "branch_name": "master", + "origin": expected_origin, + "origin_metadata": expected_origin_metadata, + "revision": expected_revision, } assert data == expected_meta def test_access_to_nonexisting_deposit_returns_404_response( - authenticated_client, deposit_collection, ): + authenticated_client, deposit_collection, +): """Read unknown collection should return a 404 response """ unknown_id = 999 try: Deposit.objects.get(pk=unknown_id) except Deposit.DoesNotExist: assert True for url in private_get_raw_url_endpoints(deposit_collection, unknown_id): response = authenticated_client.get(url) assert response.status_code == status.HTTP_404_NOT_FOUND - msg = 'Deposit with id %s does not exist' % unknown_id - assert msg in response.content.decode('utf-8') + msg = "Deposit with id %s does not exist" % unknown_id + assert msg in response.content.decode("utf-8") diff --git a/swh/deposit/tests/api/test_deposit_private_update_status.py b/swh/deposit/tests/api/test_deposit_private_update_status.py index cc347f86..c9bc27a5 100644 --- a/swh/deposit/tests/api/test_deposit_private_update_status.py +++ b/swh/deposit/tests/api/test_deposit_private_update_status.py @@ -1,140 +1,154 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json from django.urls import reverse from rest_framework import status from swh.deposit.models import Deposit, DEPOSIT_STATUS_DETAIL from swh.deposit.config import ( - PRIVATE_PUT_DEPOSIT, DEPOSIT_STATUS_VERIFIED, DEPOSIT_STATUS_LOAD_SUCCESS + PRIVATE_PUT_DEPOSIT, + DEPOSIT_STATUS_VERIFIED, + DEPOSIT_STATUS_LOAD_SUCCESS, ) -PRIVATE_PUT_DEPOSIT_NC = PRIVATE_PUT_DEPOSIT + '-nc' +PRIVATE_PUT_DEPOSIT_NC = PRIVATE_PUT_DEPOSIT + "-nc" def private_check_url_endpoints(collection, deposit): """There are 2 endpoints to check (one with collection, one without)""" return [ reverse(PRIVATE_PUT_DEPOSIT, args=[collection.name, deposit.id]), - reverse(PRIVATE_PUT_DEPOSIT_NC, args=[deposit.id]) + reverse(PRIVATE_PUT_DEPOSIT_NC, args=[deposit.id]), ] def test_update_deposit_status( - authenticated_client, deposit_collection, ready_deposit_verified): + authenticated_client, deposit_collection, ready_deposit_verified +): """Existing status for update should return a 204 response """ deposit = ready_deposit_verified for url in private_check_url_endpoints(deposit_collection, deposit): possible_status = set(DEPOSIT_STATUS_DETAIL.keys()) - set( - [DEPOSIT_STATUS_LOAD_SUCCESS]) + [DEPOSIT_STATUS_LOAD_SUCCESS] + ) for _status in possible_status: response = authenticated_client.put( url, - content_type='application/json', - data=json.dumps({'status': _status})) + content_type="application/json", + data=json.dumps({"status": _status}), + ) assert response.status_code == status.HTTP_204_NO_CONTENT deposit = Deposit.objects.get(pk=deposit.id) assert deposit.status == _status deposit.status = DEPOSIT_STATUS_VERIFIED deposit.save() # hack the same deposit def test_update_deposit_status_with_info( - authenticated_client, deposit_collection, ready_deposit_verified): + authenticated_client, deposit_collection, ready_deposit_verified +): """Existing status for update with info should return a 204 response """ deposit = ready_deposit_verified for url in private_check_url_endpoints(deposit_collection, deposit): expected_status = DEPOSIT_STATUS_LOAD_SUCCESS - origin_url = 'something' - directory_id = '42a13fc721c8716ff695d0d62fc851d641f3a12b' - revision_id = '47dc6b4636c7f6cba0df83e3d5490bf4334d987e' - expected_swh_id = 'swh:1:dir:%s' % directory_id - expected_swh_id_context = 'swh:1:dir:%s;origin=%s' % ( - directory_id, origin_url) - expected_swh_anchor_id = 'swh:1:rev:%s' % revision_id - expected_swh_anchor_id_context = 'swh:1:rev:%s;origin=%s' % ( - revision_id, origin_url) + origin_url = "something" + directory_id = "42a13fc721c8716ff695d0d62fc851d641f3a12b" + revision_id = "47dc6b4636c7f6cba0df83e3d5490bf4334d987e" + expected_swh_id = "swh:1:dir:%s" % directory_id + expected_swh_id_context = "swh:1:dir:%s;origin=%s" % (directory_id, origin_url) + expected_swh_anchor_id = "swh:1:rev:%s" % revision_id + expected_swh_anchor_id_context = "swh:1:rev:%s;origin=%s" % ( + revision_id, + origin_url, + ) response = authenticated_client.put( url, - content_type='application/json', - data=json.dumps({ - 'status': expected_status, - 'revision_id': revision_id, - 'directory_id': directory_id, - 'origin_url': origin_url, - })) + content_type="application/json", + data=json.dumps( + { + "status": expected_status, + "revision_id": revision_id, + "directory_id": directory_id, + "origin_url": origin_url, + } + ), + ) assert response.status_code == status.HTTP_204_NO_CONTENT deposit = Deposit.objects.get(pk=deposit.id) assert deposit.status == expected_status assert deposit.swh_id == expected_swh_id assert deposit.swh_id_context == expected_swh_id_context assert deposit.swh_anchor_id == expected_swh_anchor_id assert deposit.swh_anchor_id_context == expected_swh_anchor_id_context deposit.swh_id = None deposit.swh_id_context = None deposit.swh_anchor_id = None deposit.swh_anchor_id_context = None deposit.status = DEPOSIT_STATUS_VERIFIED deposit.save() def test_update_deposit_status_will_fail_with_unknown_status( - authenticated_client, deposit_collection, ready_deposit_verified): + authenticated_client, deposit_collection, ready_deposit_verified +): """Unknown status for update should return a 400 response """ deposit = ready_deposit_verified for url in private_check_url_endpoints(deposit_collection, deposit): response = authenticated_client.put( - url, - content_type='application/json', - data=json.dumps({'status': 'unknown'})) + url, content_type="application/json", data=json.dumps({"status": "unknown"}) + ) assert response.status_code == status.HTTP_400_BAD_REQUEST def test_update_deposit_status_will_fail_with_no_status_key( - authenticated_client, deposit_collection, ready_deposit_verified): + authenticated_client, deposit_collection, ready_deposit_verified +): """No status provided for update should return a 400 response """ deposit = ready_deposit_verified for url in private_check_url_endpoints(deposit_collection, deposit): response = authenticated_client.put( url, - content_type='application/json', - data=json.dumps({'something': 'something'})) + content_type="application/json", + data=json.dumps({"something": "something"}), + ) assert response.status_code == status.HTTP_400_BAD_REQUEST def test_update_deposit_status_success_without_swh_id_fail( - authenticated_client, deposit_collection, ready_deposit_verified): + authenticated_client, deposit_collection, ready_deposit_verified +): """Providing successful status without swh_id should return a 400 """ deposit = ready_deposit_verified for url in private_check_url_endpoints(deposit_collection, deposit): response = authenticated_client.put( url, - content_type='application/json', - data=json.dumps({'status': DEPOSIT_STATUS_LOAD_SUCCESS})) + content_type="application/json", + data=json.dumps({"status": DEPOSIT_STATUS_LOAD_SUCCESS}), + ) assert response.status_code == status.HTTP_400_BAD_REQUEST diff --git a/swh/deposit/tests/api/test_deposit_status.py b/swh/deposit/tests/api/test_deposit_status.py index e2f8ae36..28d212ae 100644 --- a/swh/deposit/tests/api/test_deposit_status.py +++ b/swh/deposit/tests/api/test_deposit_status.py @@ -1,130 +1,124 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.urls import reverse from io import BytesIO from rest_framework import status -from swh.deposit.config import (STATE_IRI, DEPOSIT_STATUS_DEPOSITED, - DEPOSIT_STATUS_REJECTED) -from swh.deposit.models import ( - DEPOSIT_STATUS_DETAIL, DEPOSIT_STATUS_LOAD_SUCCESS +from swh.deposit.config import ( + STATE_IRI, + DEPOSIT_STATUS_DEPOSITED, + DEPOSIT_STATUS_REJECTED, ) +from swh.deposit.models import DEPOSIT_STATUS_DETAIL, DEPOSIT_STATUS_LOAD_SUCCESS from swh.deposit.parsers import parse_xml -def test_post_deposit_with_status_check( - authenticated_client, deposited_deposit): +def test_post_deposit_with_status_check(authenticated_client, deposited_deposit): """Successful but not loaded deposit should have a status 'deposited' """ deposit = deposited_deposit - status_url = reverse(STATE_IRI, - args=[deposit.collection.name, deposit.id]) + status_url = reverse(STATE_IRI, args=[deposit.collection.name, deposit.id]) # check status status_response = authenticated_client.get(status_url) assert status_response.status_code == status.HTTP_200_OK r = parse_xml(BytesIO(status_response.content)) - assert int(r['deposit_id']) == deposit.id - assert r['deposit_status'] == DEPOSIT_STATUS_DEPOSITED - assert r['deposit_status_detail'] == \ - DEPOSIT_STATUS_DETAIL[DEPOSIT_STATUS_DEPOSITED] - assert r['deposit_external_id'] == deposit.external_id + assert int(r["deposit_id"]) == deposit.id + assert r["deposit_status"] == DEPOSIT_STATUS_DEPOSITED + assert r["deposit_status_detail"] == DEPOSIT_STATUS_DETAIL[DEPOSIT_STATUS_DEPOSITED] + assert r["deposit_external_id"] == deposit.external_id def test_status_unknown_deposit(authenticated_client, deposit_collection): """Unknown deposit status should return 404 response """ unknown_deposit_id = 999 - status_url = reverse(STATE_IRI, - args=[deposit_collection.name, unknown_deposit_id]) + status_url = reverse(STATE_IRI, args=[deposit_collection.name, unknown_deposit_id]) status_response = authenticated_client.get(status_url) assert status_response.status_code == status.HTTP_404_NOT_FOUND -def test_status_unknown_collection( - authenticated_client, deposited_deposit): +def test_status_unknown_collection(authenticated_client, deposited_deposit): """Unknown collection status should return 404 response""" deposit = deposited_deposit - unknown_collection = 'something-unknown' - status_url = reverse(STATE_IRI, - args=[unknown_collection, deposit.id]) + unknown_collection = "something-unknown" + status_url = reverse(STATE_IRI, args=[unknown_collection, deposit.id]) status_response = authenticated_client.get(status_url) assert status_response.status_code == status.HTTP_404_NOT_FOUND def test_status_deposit_rejected(authenticated_client, rejected_deposit): """Rejected deposit status should be 'rejected' with detailed summary """ deposit = rejected_deposit # _status_detail = {'url': {'summary': 'Wrong url'}} - url = reverse(STATE_IRI, - args=[deposit.collection.name, deposit.id]) + url = reverse(STATE_IRI, args=[deposit.collection.name, deposit.id]) # when status_response = authenticated_client.get(url) # then assert status_response.status_code == status.HTTP_200_OK r = parse_xml(BytesIO(status_response.content)) - assert int(r['deposit_id']) == deposit.id - assert r['deposit_status'] == DEPOSIT_STATUS_REJECTED - assert r['deposit_status_detail'] == 'Deposit failed the checks' + assert int(r["deposit_id"]) == deposit.id + assert r["deposit_status"] == DEPOSIT_STATUS_REJECTED + assert r["deposit_status_detail"] == "Deposit failed the checks" if deposit.swh_id: - assert r['deposit_swh_id'] == deposit.swh_id + assert r["deposit_swh_id"] == deposit.swh_id def test_status_with_http_accept_header_should_not_break( - authenticated_client, partial_deposit): + authenticated_client, partial_deposit +): """Asking deposit status with Accept header should return 200 """ deposit = partial_deposit - status_url = reverse(STATE_IRI, args=[ - deposit.collection.name, deposit.id]) + status_url = reverse(STATE_IRI, args=[deposit.collection.name, deposit.id]) response = authenticated_client.get(status_url) assert response.status_code == status.HTTP_200_OK response = authenticated_client.get( - status_url, - HTTP_ACCEPT='text/html,application/xml;q=9,*/*,q=8') + status_url, HTTP_ACCEPT="text/html,application/xml;q=9,*/*,q=8" + ) assert response.status_code == status.HTTP_200_OK -def test_status_complete_deposit( - authenticated_client, complete_deposit): +def test_status_complete_deposit(authenticated_client, complete_deposit): """Successful and loaded deposit should be 'done' and have detailed swh ids """ deposit = complete_deposit url = reverse(STATE_IRI, args=[deposit.collection.name, deposit.id]) # when status_response = authenticated_client.get(url) # then assert status_response.status_code == status.HTTP_200_OK r = parse_xml(BytesIO(status_response.content)) - assert int(r['deposit_id']) == deposit.id - assert r['deposit_status'] == DEPOSIT_STATUS_LOAD_SUCCESS - assert r['deposit_status_detail'] == \ - DEPOSIT_STATUS_DETAIL[DEPOSIT_STATUS_LOAD_SUCCESS] + assert int(r["deposit_id"]) == deposit.id + assert r["deposit_status"] == DEPOSIT_STATUS_LOAD_SUCCESS + assert ( + r["deposit_status_detail"] == DEPOSIT_STATUS_DETAIL[DEPOSIT_STATUS_LOAD_SUCCESS] + ) assert deposit.swh_id is not None - assert r['deposit_swh_id'] == deposit.swh_id + assert r["deposit_swh_id"] == deposit.swh_id assert deposit.swh_id_context is not None - assert r['deposit_swh_id_context'] == deposit.swh_id_context + assert r["deposit_swh_id_context"] == deposit.swh_id_context assert deposit.swh_anchor_id is not None - assert r['deposit_swh_anchor_id'] == deposit.swh_anchor_id + assert r["deposit_swh_anchor_id"] == deposit.swh_anchor_id assert deposit.swh_anchor_id_context is not None - assert r['deposit_swh_anchor_id_context'] == deposit.swh_anchor_id_context + assert r["deposit_swh_anchor_id_context"] == deposit.swh_anchor_id_context diff --git a/swh/deposit/tests/api/test_deposit_update.py b/swh/deposit/tests/api/test_deposit_update.py index a09c30fc..43b268cd 100644 --- a/swh/deposit/tests/api/test_deposit_update.py +++ b/swh/deposit/tests/api/test_deposit_update.py @@ -1,383 +1,395 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.urls import reverse from rest_framework import status from swh.deposit.models import Deposit, DepositRequest, DepositCollection from swh.deposit.config import EDIT_SE_IRI, EM_IRI from swh.deposit.parsers import parse_xml from swh.deposit.tests.common import create_arborescence_archive, check_archive def test_replace_archive_to_deposit_is_possible( - tmp_path, partial_deposit, deposit_collection, authenticated_client, - sample_archive, atom_dataset): + tmp_path, + partial_deposit, + deposit_collection, + authenticated_client, + sample_archive, + atom_dataset, +): """Replace all archive with another one should return a 204 response """ tmp_path = str(tmp_path) # given deposit = partial_deposit - requests = DepositRequest.objects.filter( - deposit=deposit, - type='archive') + requests = DepositRequest.objects.filter(deposit=deposit, type="archive") assert len(list(requests)) == 1 - check_archive(sample_archive['name'], requests[0].archive.name) + check_archive(sample_archive["name"], requests[0].archive.name) # we have no metadata for that deposit - requests = list(DepositRequest.objects.filter( - deposit=deposit, type='metadata')) + requests = list(DepositRequest.objects.filter(deposit=deposit, type="metadata")) assert len(requests) == 0 response = authenticated_client.post( reverse(EDIT_SE_IRI, args=[deposit_collection.name, deposit.id]), - content_type='application/atom+xml;type=entry', - data=atom_dataset['entry-data1'], + content_type="application/atom+xml;type=entry", + data=atom_dataset["entry-data1"], HTTP_SLUG=deposit.external_id, - HTTP_IN_PROGRESS=True) + HTTP_IN_PROGRESS=True, + ) - requests = list(DepositRequest.objects.filter( - deposit=deposit, type='metadata')) + requests = list(DepositRequest.objects.filter(deposit=deposit, type="metadata")) assert len(requests) == 1 update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit.id]) - external_id = 'some-external-id-1' + external_id = "some-external-id-1" archive2 = create_arborescence_archive( - tmp_path, 'archive2', 'file2', b'some other content in file') + tmp_path, "archive2", "file2", b"some other content in file" + ) response = authenticated_client.put( update_uri, - content_type='application/zip', # as zip - data=archive2['data'], + content_type="application/zip", # as zip + data=archive2["data"], # + headers - CONTENT_LENGTH=archive2['length'], + CONTENT_LENGTH=archive2["length"], HTTP_SLUG=external_id, - HTTP_CONTENT_MD5=archive2['md5sum'], - HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', - HTTP_IN_PROGRESS='false', - HTTP_CONTENT_DISPOSITION='attachment; filename=%s' % ( - archive2['name'], )) + HTTP_CONTENT_MD5=archive2["md5sum"], + HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", + HTTP_IN_PROGRESS="false", + HTTP_CONTENT_DISPOSITION="attachment; filename=%s" % (archive2["name"],), + ) assert response.status_code == status.HTTP_204_NO_CONTENT - requests = DepositRequest.objects.filter( - deposit=deposit, - type='archive') + requests = DepositRequest.objects.filter(deposit=deposit, type="archive") assert len(list(requests)) == 1 - check_archive(archive2['name'], requests[0].archive.name) + check_archive(archive2["name"], requests[0].archive.name) # check we did not touch the other parts - requests = list(DepositRequest.objects.filter( - deposit=deposit, type='metadata')) + requests = list(DepositRequest.objects.filter(deposit=deposit, type="metadata")) assert len(requests) == 1 def test_replace_metadata_to_deposit_is_possible( - tmp_path, authenticated_client, partial_deposit_with_metadata, - deposit_collection, atom_dataset): + tmp_path, + authenticated_client, + partial_deposit_with_metadata, + deposit_collection, + atom_dataset, +): """Replace all metadata with another one should return a 204 response """ # given deposit = partial_deposit_with_metadata - raw_metadata0 = atom_dataset['entry-data0'] % deposit.external_id.encode( - 'utf-8') + raw_metadata0 = atom_dataset["entry-data0"] % deposit.external_id.encode("utf-8") - requests_meta = DepositRequest.objects.filter( - deposit=deposit, - type='metadata') + requests_meta = DepositRequest.objects.filter(deposit=deposit, type="metadata") assert len(requests_meta) == 1 request_meta0 = requests_meta[0] assert request_meta0.raw_metadata == raw_metadata0 - requests_archive0 = DepositRequest.objects.filter( - deposit=deposit, type='archive') + requests_archive0 = DepositRequest.objects.filter(deposit=deposit, type="archive") assert len(requests_archive0) == 1 - update_uri = reverse(EDIT_SE_IRI, args=[ - deposit_collection.name, deposit.id]) + update_uri = reverse(EDIT_SE_IRI, args=[deposit_collection.name, deposit.id]) response = authenticated_client.put( update_uri, - content_type='application/atom+xml;type=entry', - data=atom_dataset['entry-data1']) + content_type="application/atom+xml;type=entry", + data=atom_dataset["entry-data1"], + ) assert response.status_code == status.HTTP_204_NO_CONTENT - requests_meta = DepositRequest.objects.filter( - deposit=deposit, - type='metadata') + requests_meta = DepositRequest.objects.filter(deposit=deposit, type="metadata") assert len(requests_meta) == 1 request_meta1 = requests_meta[0] raw_metadata1 = request_meta1.raw_metadata - assert raw_metadata1 == atom_dataset['entry-data1'] + assert raw_metadata1 == atom_dataset["entry-data1"] assert raw_metadata0 != raw_metadata1 assert request_meta0 != request_meta1 # check we did not touch the other parts - requests_archive1 = DepositRequest.objects.filter( - deposit=deposit, type='archive') + requests_archive1 = DepositRequest.objects.filter(deposit=deposit, type="archive") assert len(requests_archive1) == 1 assert set(requests_archive0) == set(requests_archive1) def test_add_archive_to_deposit_is_possible( - tmp_path, authenticated_client, deposit_collection, - partial_deposit_with_metadata, sample_archive): + tmp_path, + authenticated_client, + deposit_collection, + partial_deposit_with_metadata, + sample_archive, +): """Add another archive to a deposit return a 201 response """ tmp_path = str(tmp_path) deposit = partial_deposit_with_metadata - requests = DepositRequest.objects.filter( - deposit=deposit, - type='archive') + requests = DepositRequest.objects.filter(deposit=deposit, type="archive") assert len(requests) == 1 - check_archive(sample_archive['name'], requests[0].archive.name) + check_archive(sample_archive["name"], requests[0].archive.name) - requests_meta0 = DepositRequest.objects.filter( - deposit=deposit, type='metadata') + requests_meta0 = DepositRequest.objects.filter(deposit=deposit, type="metadata") assert len(requests_meta0) == 1 update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit.id]) - external_id = 'some-external-id-1' + external_id = "some-external-id-1" archive2 = create_arborescence_archive( - tmp_path, 'archive2', 'file2', b'some other content in file') + tmp_path, "archive2", "file2", b"some other content in file" + ) response = authenticated_client.post( update_uri, - content_type='application/zip', # as zip - data=archive2['data'], + content_type="application/zip", # as zip + data=archive2["data"], # + headers - CONTENT_LENGTH=archive2['length'], + CONTENT_LENGTH=archive2["length"], HTTP_SLUG=external_id, - HTTP_CONTENT_MD5=archive2['md5sum'], - HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', - HTTP_IN_PROGRESS='false', - HTTP_CONTENT_DISPOSITION='attachment; filename=%s' % ( - archive2['name'],)) + HTTP_CONTENT_MD5=archive2["md5sum"], + HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", + HTTP_IN_PROGRESS="false", + HTTP_CONTENT_DISPOSITION="attachment; filename=%s" % (archive2["name"],), + ) assert response.status_code == status.HTTP_201_CREATED - requests = DepositRequest.objects.filter( - deposit=deposit, - type='archive').order_by('id') + requests = DepositRequest.objects.filter(deposit=deposit, type="archive").order_by( + "id" + ) assert len(requests) == 2 # first archive still exists - check_archive(sample_archive['name'], requests[0].archive.name) + check_archive(sample_archive["name"], requests[0].archive.name) # a new one was added - check_archive(archive2['name'], requests[1].archive.name) + check_archive(archive2["name"], requests[1].archive.name) # check we did not touch the other parts - requests_meta1 = DepositRequest.objects.filter( - deposit=deposit, type='metadata') + requests_meta1 = DepositRequest.objects.filter(deposit=deposit, type="metadata") assert len(requests_meta1) == 1 assert set(requests_meta0) == set(requests_meta1) def test_add_metadata_to_deposit_is_possible( - authenticated_client, deposit_collection, - partial_deposit_with_metadata, atom_dataset): + authenticated_client, + deposit_collection, + partial_deposit_with_metadata, + atom_dataset, +): """Add metadata with another one should return a 204 response """ deposit = partial_deposit_with_metadata - requests = DepositRequest.objects.filter( - deposit=deposit, - type='metadata') + requests = DepositRequest.objects.filter(deposit=deposit, type="metadata") assert len(requests) == 1 - requests_archive0 = DepositRequest.objects.filter( - deposit=deposit, type='archive') + requests_archive0 = DepositRequest.objects.filter(deposit=deposit, type="archive") assert len(requests_archive0) == 1 - update_uri = reverse(EDIT_SE_IRI, args=[deposit_collection.name, - deposit.id]) + update_uri = reverse(EDIT_SE_IRI, args=[deposit_collection.name, deposit.id]) - atom_entry = atom_dataset['entry-data1'] + atom_entry = atom_dataset["entry-data1"] response = authenticated_client.post( - update_uri, - content_type='application/atom+xml;type=entry', - data=atom_entry) + update_uri, content_type="application/atom+xml;type=entry", data=atom_entry + ) assert response.status_code == status.HTTP_201_CREATED - requests = DepositRequest.objects.filter( - deposit=deposit, - type='metadata').order_by('id') + requests = DepositRequest.objects.filter(deposit=deposit, type="metadata").order_by( + "id" + ) assert len(requests) == 2 - expected_raw_meta0 = atom_dataset['entry-data0'] % ( - deposit.external_id.encode('utf-8')) + expected_raw_meta0 = atom_dataset["entry-data0"] % ( + deposit.external_id.encode("utf-8") + ) # a new one was added assert requests[0].raw_metadata == expected_raw_meta0 assert requests[1].raw_metadata == atom_entry # check we did not touch the other parts - requests_archive1 = DepositRequest.objects.filter( - deposit=deposit, type='archive') + requests_archive1 = DepositRequest.objects.filter(deposit=deposit, type="archive") assert len(requests_archive1) == 1 assert set(requests_archive0) == set(requests_archive1) def test_add_metadata_to_unknown_deposit( - deposit_collection, authenticated_client, atom_dataset): + deposit_collection, authenticated_client, atom_dataset +): """Replacing metadata to unknown deposit should return a 404 response """ unknown_deposit_id = 1000 try: Deposit.objects.get(pk=unknown_deposit_id) except Deposit.DoesNotExist: assert True url = reverse(EDIT_SE_IRI, args=[deposit_collection, unknown_deposit_id]) response = authenticated_client.post( url, - content_type='application/atom+xml;type=entry', - data=atom_dataset['entry-data1']) + content_type="application/atom+xml;type=entry", + data=atom_dataset["entry-data1"], + ) assert response.status_code == status.HTTP_404_NOT_FOUND response_content = parse_xml(response.content) - assert 'Unknown collection name' in \ - response_content['sword:error']['summary'] + assert "Unknown collection name" in response_content["sword:error"]["summary"] def test_add_metadata_to_unknown_collection( - partial_deposit, authenticated_client, atom_dataset): + partial_deposit, authenticated_client, atom_dataset +): """Replacing metadata to unknown deposit should return a 404 response """ deposit = partial_deposit - unknown_collection_name = 'unknown-collection' + unknown_collection_name = "unknown-collection" try: DepositCollection.objects.get(name=unknown_collection_name) except DepositCollection.DoesNotExist: assert True url = reverse(EDIT_SE_IRI, args=[unknown_collection_name, deposit.id]) response = authenticated_client.post( url, - content_type='application/atom+xml;type=entry', - data=atom_dataset['entry-data1']) + content_type="application/atom+xml;type=entry", + data=atom_dataset["entry-data1"], + ) assert response.status_code == status.HTTP_404_NOT_FOUND response_content = parse_xml(response.content) - assert 'Unknown collection name' in \ - response_content['sword:error']['summary'] + assert "Unknown collection name" in response_content["sword:error"]["summary"] def test_replace_metadata_to_unknown_deposit( - authenticated_client, deposit_collection, atom_dataset): + authenticated_client, deposit_collection, atom_dataset +): """Adding metadata to unknown deposit should return a 404 response """ unknown_deposit_id = 998 try: Deposit.objects.get(pk=unknown_deposit_id) except Deposit.DoesNotExist: assert True - url = reverse(EDIT_SE_IRI, args=[ - deposit_collection.name, unknown_deposit_id]) + url = reverse(EDIT_SE_IRI, args=[deposit_collection.name, unknown_deposit_id]) response = authenticated_client.put( url, - content_type='application/atom+xml;type=entry', - data=atom_dataset['entry-data1']) + content_type="application/atom+xml;type=entry", + data=atom_dataset["entry-data1"], + ) assert response.status_code == status.HTTP_404_NOT_FOUND response_content = parse_xml(response.content) - assert 'Deposit with id %s does not exist' % unknown_deposit_id == \ - response_content['sword:error']['summary'] + assert ( + "Deposit with id %s does not exist" % unknown_deposit_id + == response_content["sword:error"]["summary"] + ) def test_add_archive_to_unknown_deposit( - authenticated_client, deposit_collection, atom_dataset): + authenticated_client, deposit_collection, atom_dataset +): """Adding metadata to unknown deposit should return a 404 response """ unknown_deposit_id = 997 try: Deposit.objects.get(pk=unknown_deposit_id) except Deposit.DoesNotExist: assert True url = reverse(EM_IRI, args=[deposit_collection.name, unknown_deposit_id]) - response = authenticated_client.post(url, - content_type='application/zip', - data=atom_dataset['entry-data1']) + response = authenticated_client.post( + url, content_type="application/zip", data=atom_dataset["entry-data1"] + ) assert response.status_code == status.HTTP_404_NOT_FOUND response_content = parse_xml(response.content) - assert 'Deposit with id %s does not exist' % unknown_deposit_id == \ - response_content['sword:error']['summary'] + assert ( + "Deposit with id %s does not exist" % unknown_deposit_id + == response_content["sword:error"]["summary"] + ) def test_replace_archive_to_unknown_deposit( - authenticated_client, deposit_collection, atom_dataset): + authenticated_client, deposit_collection, atom_dataset +): """Replacing archive to unknown deposit should return a 404 response """ unknown_deposit_id = 996 try: Deposit.objects.get(pk=unknown_deposit_id) except Deposit.DoesNotExist: assert True url = reverse(EM_IRI, args=[deposit_collection.name, unknown_deposit_id]) response = authenticated_client.put( - url, - content_type='application/zip', - data=atom_dataset['entry-data1']) + url, content_type="application/zip", data=atom_dataset["entry-data1"] + ) assert response.status_code == status.HTTP_404_NOT_FOUND response_content = parse_xml(response.content) - assert 'Deposit with id %s does not exist' % unknown_deposit_id == \ - response_content['sword:error']['summary'] + assert ( + "Deposit with id %s does not exist" % unknown_deposit_id + == response_content["sword:error"]["summary"] + ) def test_post_metadata_to_em_iri_failure( - authenticated_client, deposit_collection, partial_deposit, - atom_dataset): + authenticated_client, deposit_collection, partial_deposit, atom_dataset +): """Update (POST) archive with wrong content type should return 400 """ deposit = partial_deposit update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit.id]) response = authenticated_client.post( update_uri, - content_type='application/x-gtar-compressed', - data=atom_dataset['entry-data1']) + content_type="application/x-gtar-compressed", + data=atom_dataset["entry-data1"], + ) assert response.status_code == status.HTTP_400_BAD_REQUEST response_content = parse_xml(response.content) - msg = 'Packaging format supported is restricted to ' + \ - 'application/zip, application/x-tar' - assert msg == response_content['sword:error']['summary'] + msg = ( + "Packaging format supported is restricted to " + + "application/zip, application/x-tar" + ) + assert msg == response_content["sword:error"]["summary"] def test_put_metadata_to_em_iri_failure( - authenticated_client, deposit_collection, partial_deposit, - atom_dataset): + authenticated_client, deposit_collection, partial_deposit, atom_dataset +): """Update (PUT) archive with wrong content type should return 400 """ # given deposit = partial_deposit # when update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit.id]) response = authenticated_client.put( update_uri, - content_type='application/atom+xml;type=entry', - data=atom_dataset['entry-data1']) + content_type="application/atom+xml;type=entry", + data=atom_dataset["entry-data1"], + ) # then assert response.status_code == status.HTTP_400_BAD_REQUEST response_content = parse_xml(response.content) - msg = 'Packaging format supported is restricted to ' + \ - 'application/zip, application/x-tar' - assert msg == response_content['sword:error']['summary'] + msg = ( + "Packaging format supported is restricted to " + + "application/zip, application/x-tar" + ) + assert msg == response_content["sword:error"]["summary"] diff --git a/swh/deposit/tests/api/test_parser.py b/swh/deposit/tests/api/test_parser.py index 7d7c3b1a..b1cc9119 100644 --- a/swh/deposit/tests/api/test_parser.py +++ b/swh/deposit/tests/api/test_parser.py @@ -1,97 +1,134 @@ # Copyright (C) 2018-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import io from collections import OrderedDict from swh.deposit.parsers import SWHXMLParser def test_parsing_without_duplicates(): - xml_no_duplicate = io.BytesIO(b''' + xml_no_duplicate = io.BytesIO( + b""" Awesome Compiler GPL3.0 https://opensource.org/licenses/GPL-3.0 Python3 author1 Inria ocaml http://issuetracker.com -''') +""" + ) actual_result = SWHXMLParser().parse(xml_no_duplicate) expected_dict = OrderedDict( - [('@xmlns', 'http://www.w3.org/2005/Atom'), - ('@xmlns:codemeta', - 'https://doi.org/10.5063/SCHEMA/CODEMETA-2.0'), - ('title', 'Awesome Compiler'), - ('codemeta:license', - OrderedDict([('codemeta:name', 'GPL3.0'), - ('codemeta:url', - 'https://opensource.org/licenses/GPL-3.0')])), - ('codemeta:runtimePlatform', 'Python3'), - ('codemeta:author', - OrderedDict([('codemeta:name', 'author1'), - ('codemeta:affiliation', 'Inria')])), - ('codemeta:programmingLanguage', 'ocaml'), - ('codemeta:issueTracker', 'http://issuetracker.com')]) + [ + ("@xmlns", "http://www.w3.org/2005/Atom"), + ("@xmlns:codemeta", "https://doi.org/10.5063/SCHEMA/CODEMETA-2.0"), + ("title", "Awesome Compiler"), + ( + "codemeta:license", + OrderedDict( + [ + ("codemeta:name", "GPL3.0"), + ("codemeta:url", "https://opensource.org/licenses/GPL-3.0"), + ] + ), + ), + ("codemeta:runtimePlatform", "Python3"), + ( + "codemeta:author", + OrderedDict( + [("codemeta:name", "author1"), ("codemeta:affiliation", "Inria")] + ), + ), + ("codemeta:programmingLanguage", "ocaml"), + ("codemeta:issueTracker", "http://issuetracker.com"), + ] + ) assert expected_dict == actual_result def test_parsing_with_duplicates(): - xml_with_duplicates = io.BytesIO(b''' + xml_with_duplicates = io.BytesIO( + b""" Another Compiler GNU/Linux GPL3.0 https://opensource.org/licenses/GPL-3.0 Un*x author1 Inria author2 Inria ocaml haskell spdx http://spdx.org python3 -''') +""" + ) actual_result = SWHXMLParser().parse(xml_with_duplicates) - expected_dict = OrderedDict([ - ('@xmlns', 'http://www.w3.org/2005/Atom'), - ('@xmlns:codemeta', 'https://doi.org/10.5063/SCHEMA/CODEMETA-2.0'), - ('title', 'Another Compiler'), - ('codemeta:runtimePlatform', ['GNU/Linux', 'Un*x']), - ('codemeta:license', - [OrderedDict([('codemeta:name', 'GPL3.0'), - ('codemeta:url', - 'https://opensource.org/licenses/GPL-3.0')]), - OrderedDict([('codemeta:name', 'spdx'), - ('codemeta:url', 'http://spdx.org')])]), - ('codemeta:author', - [OrderedDict([('codemeta:name', 'author1'), - ('codemeta:affiliation', 'Inria')]), - OrderedDict([('codemeta:name', 'author2'), - ('codemeta:affiliation', 'Inria')])]), - ('codemeta:programmingLanguage', ['ocaml', 'haskell', 'python3'])]) + expected_dict = OrderedDict( + [ + ("@xmlns", "http://www.w3.org/2005/Atom"), + ("@xmlns:codemeta", "https://doi.org/10.5063/SCHEMA/CODEMETA-2.0"), + ("title", "Another Compiler"), + ("codemeta:runtimePlatform", ["GNU/Linux", "Un*x"]), + ( + "codemeta:license", + [ + OrderedDict( + [ + ("codemeta:name", "GPL3.0"), + ("codemeta:url", "https://opensource.org/licenses/GPL-3.0"), + ] + ), + OrderedDict( + [("codemeta:name", "spdx"), ("codemeta:url", "http://spdx.org")] + ), + ], + ), + ( + "codemeta:author", + [ + OrderedDict( + [ + ("codemeta:name", "author1"), + ("codemeta:affiliation", "Inria"), + ] + ), + OrderedDict( + [ + ("codemeta:name", "author2"), + ("codemeta:affiliation", "Inria"), + ] + ), + ], + ), + ("codemeta:programmingLanguage", ["ocaml", "haskell", "python3"]), + ] + ) assert expected_dict == actual_result diff --git a/swh/deposit/tests/api/test_service_document.py b/swh/deposit/tests/api/test_service_document.py index dda59a88..d1d24fa8 100644 --- a/swh/deposit/tests/api/test_service_document.py +++ b/swh/deposit/tests/api/test_service_document.py @@ -1,86 +1,82 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.urls import reverse from rest_framework import status from swh.deposit.config import SD_IRI def test_service_document_no_auth_fails(client): """Without authentication, service document endpoint should return 401 """ url = reverse(SD_IRI) response = client.get(url) assert response.status_code == status.HTTP_401_UNAUTHORIZED def test_service_document_no_auth_with_http_auth_should_not_break(client): """Without auth, sd endpoint through browser should return 401 """ url = reverse(SD_IRI) - response = client.get( - url, - HTTP_ACCEPT='text/html,application/xml;q=9,*/*,q=8') + response = client.get(url, HTTP_ACCEPT="text/html,application/xml;q=9,*/*,q=8") assert response.status_code == status.HTTP_401_UNAUTHORIZED def test_service_document(authenticated_client, deposit_user): """With authentication, service document list user's collection """ url = reverse(SD_IRI) response = authenticated_client.get(url) check_response(response, deposit_user.username) -def test_service_document_with_http_accept_header( - authenticated_client, deposit_user): +def test_service_document_with_http_accept_header(authenticated_client, deposit_user): """With authentication, with browser, sd list user's collection """ url = reverse(SD_IRI) response = authenticated_client.get( - url, - HTTP_ACCEPT='text/html,application/xml;q=9,*/*,q=8') + url, HTTP_ACCEPT="text/html,application/xml;q=9,*/*,q=8" + ) check_response(response, deposit_user.username) def check_response(response, username): assert response.status_code == status.HTTP_200_OK - assert response.content.decode('utf-8') == \ - ''' + assert ( + response.content.decode("utf-8") + == """ 2.0 %s The Software Heritage (SWH) Archive %s Software Collection application/zip application/x-tar Collection Policy Software Heritage Archive Collect, Preserve, Share false false http://purl.org/net/sword/package/SimpleZip http://testserver/1/%s/ %s -''' % (500, - username, - username, - username, - username) # noqa +""" + % (500, username, username, username, username) + ) # noqa diff --git a/swh/deposit/tests/cli/test_client.py b/swh/deposit/tests/cli/test_client.py index 656ebf69..6110bedf 100644 --- a/swh/deposit/tests/cli/test_client.py +++ b/swh/deposit/tests/cli/test_client.py @@ -1,323 +1,394 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import contextlib import logging import os import re from unittest.mock import MagicMock from click.testing import CliRunner import pytest from swh.deposit.client import PublicApiDepositClient -from swh.deposit.cli.client import ( - generate_slug, _url, _client, _collection, InputError) +from swh.deposit.cli.client import generate_slug, _url, _client, _collection, InputError from swh.deposit.cli import deposit as cli from ..conftest import TEST_USER EXAMPLE_SERVICE_DOCUMENT = { - 'service': { - 'workspace': { - 'collection': { - 'sword:name': 'softcol', - } - } - } + "service": {"workspace": {"collection": {"sword:name": "softcol",}}} } @pytest.fixture def slug(): return generate_slug() @pytest.fixture def client_mock(mocker, slug): - mocker.patch('swh.deposit.cli.client.generate_slug', return_value=slug) + mocker.patch("swh.deposit.cli.client.generate_slug", return_value=slug) mock_client = MagicMock() - mocker.patch( - 'swh.deposit.cli.client._client', - return_value=mock_client) + mocker.patch("swh.deposit.cli.client._client", return_value=mock_client) mock_client.service_document.return_value = EXAMPLE_SERVICE_DOCUMENT mock_client.deposit_create.return_value = '{"foo": "bar"}' return mock_client def test_url(): - assert _url('http://deposit') == 'http://deposit/1' - assert _url('https://other/1') == 'https://other/1' + assert _url("http://deposit") == "http://deposit/1" + assert _url("https://other/1") == "https://other/1" def test_client(): - client = _client('http://deposit', 'user', 'pass') + client = _client("http://deposit", "user", "pass") assert isinstance(client, PublicApiDepositClient) def test_collection_error(): mock_client = MagicMock() - mock_client.service_document.return_value = { - 'error': 'something went wrong' - } + mock_client.service_document.return_value = {"error": "something went wrong"} with pytest.raises(InputError) as e: _collection(mock_client) - assert 'Service document retrieval: something went wrong' == str(e.value) + assert "Service document retrieval: something went wrong" == str(e.value) def test_collection_ok(): mock_client = MagicMock() mock_client.service_document.return_value = EXAMPLE_SERVICE_DOCUMENT collection_name = _collection(mock_client) - assert collection_name == 'softcol' + assert collection_name == "softcol" def test_single_minimal_deposit( - sample_archive, mocker, caplog, client_mock, slug, tmp_path): + sample_archive, mocker, caplog, client_mock, slug, tmp_path +): """ from: https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html#single-deposit """ # noqa - metadata_path = os.path.join(tmp_path, 'metadata.xml') - mocker.patch('swh.deposit.cli.client.tempfile.TemporaryDirectory', - return_value=contextlib.nullcontext(str(tmp_path))) + metadata_path = os.path.join(tmp_path, "metadata.xml") + mocker.patch( + "swh.deposit.cli.client.tempfile.TemporaryDirectory", + return_value=contextlib.nullcontext(str(tmp_path)), + ) runner = CliRunner() - result = runner.invoke(cli, [ - 'upload', - '--url', 'mock://deposit.swh/1', - '--username', TEST_USER['username'], - '--password', TEST_USER['password'], - '--name', 'test-project', - '--archive', sample_archive['path'], - '--author', 'Jane Doe', - ]) + result = runner.invoke( + cli, + [ + "upload", + "--url", + "mock://deposit.swh/1", + "--username", + TEST_USER["username"], + "--password", + TEST_USER["password"], + "--name", + "test-project", + "--archive", + sample_archive["path"], + "--author", + "Jane Doe", + ], + ) assert result.exit_code == 0, result.output - assert result.output == '' + assert result.output == "" assert caplog.record_tuples == [ - ('swh.deposit.cli.client', logging.INFO, '{"foo": "bar"}'), + ("swh.deposit.cli.client", logging.INFO, '{"foo": "bar"}'), ] client_mock.deposit_create.assert_called_once_with( - archive=sample_archive['path'], - collection='softcol', in_progress=False, metadata=metadata_path, - slug=slug) + archive=sample_archive["path"], + collection="softcol", + in_progress=False, + metadata=metadata_path, + slug=slug, + ) with open(metadata_path) as fd: - assert fd.read() == f'''\ + assert ( + fd.read() + == f"""\ \ttest-project \t{slug} \t \t\tJane Doe \t -''' +""" + ) def test_metadata_validation(sample_archive, mocker, caplog, tmp_path): """ from: https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html#single-deposit """ # noqa slug = generate_slug() - mocker.patch('swh.deposit.cli.client.generate_slug', return_value=slug) + mocker.patch("swh.deposit.cli.client.generate_slug", return_value=slug) mock_client = MagicMock() - mocker.patch( - 'swh.deposit.cli.client._client', - return_value=mock_client) + mocker.patch("swh.deposit.cli.client._client", return_value=mock_client) mock_client.service_document.return_value = EXAMPLE_SERVICE_DOCUMENT mock_client.deposit_create.return_value = '{"foo": "bar"}' - metadata_path = os.path.join(tmp_path, 'metadata.xml') - mocker.patch('swh.deposit.cli.client.tempfile.TemporaryDirectory', - return_value=contextlib.nullcontext(str(tmp_path))) - with open(metadata_path, 'a'): + metadata_path = os.path.join(tmp_path, "metadata.xml") + mocker.patch( + "swh.deposit.cli.client.tempfile.TemporaryDirectory", + return_value=contextlib.nullcontext(str(tmp_path)), + ) + with open(metadata_path, "a"): pass # creates the file runner = CliRunner() # Test missing author - result = runner.invoke(cli, [ - 'upload', - '--url', 'mock://deposit.swh/1', - '--username', TEST_USER['username'], - '--password', TEST_USER['password'], - '--name', 'test-project', - '--archive', sample_archive['path'], - ]) + result = runner.invoke( + cli, + [ + "upload", + "--url", + "mock://deposit.swh/1", + "--username", + TEST_USER["username"], + "--password", + TEST_USER["password"], + "--name", + "test-project", + "--archive", + sample_archive["path"], + ], + ) assert result.exit_code == 1, result.output - assert result.output == '' + assert result.output == "" assert len(caplog.record_tuples) == 1 (_logger, level, message) = caplog.record_tuples[0] assert level == logging.ERROR - assert ' --author ' in message + assert " --author " in message # Clear mocking state caplog.clear() mock_client.reset_mock() # Test missing name - result = runner.invoke(cli, [ - 'upload', - '--url', 'mock://deposit.swh/1', - '--username', TEST_USER['username'], - '--password', TEST_USER['password'], - '--archive', sample_archive['path'], - '--author', 'Jane Doe', - ]) + result = runner.invoke( + cli, + [ + "upload", + "--url", + "mock://deposit.swh/1", + "--username", + TEST_USER["username"], + "--password", + TEST_USER["password"], + "--archive", + sample_archive["path"], + "--author", + "Jane Doe", + ], + ) assert result.exit_code == 1, result.output - assert result.output == '' + assert result.output == "" assert len(caplog.record_tuples) == 1 (_logger, level, message) = caplog.record_tuples[0] assert level == logging.ERROR - assert ' --name ' in message + assert " --name " in message # Clear mocking state caplog.clear() mock_client.reset_mock() # Test both --metadata and --author - result = runner.invoke(cli, [ - 'upload', - '--url', 'mock://deposit.swh/1', - '--username', TEST_USER['username'], - '--password', TEST_USER['password'], - '--archive', sample_archive['path'], - '--metadata', metadata_path, - '--author', 'Jane Doe', - ]) + result = runner.invoke( + cli, + [ + "upload", + "--url", + "mock://deposit.swh/1", + "--username", + TEST_USER["username"], + "--password", + TEST_USER["password"], + "--archive", + sample_archive["path"], + "--metadata", + metadata_path, + "--author", + "Jane Doe", + ], + ) assert result.exit_code == 1, result.output - assert result.output == '' + assert result.output == "" assert len(caplog.record_tuples) == 1 (_logger, level, message) = caplog.record_tuples[0] assert level == logging.ERROR - assert re.search('--metadata.*is incompatible with', message) + assert re.search("--metadata.*is incompatible with", message) # Clear mocking state caplog.clear() mock_client.reset_mock() def test_single_deposit_slug_generation( - sample_archive, mocker, caplog, tmp_path, client_mock): + sample_archive, mocker, caplog, tmp_path, client_mock +): """ from: https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html#single-deposit """ # noqa - slug = 'my-slug' - collection = 'my-collection' + slug = "my-slug" + collection = "my-collection" - metadata_path = os.path.join(tmp_path, 'metadata.xml') - mocker.patch('swh.deposit.cli.client.tempfile.TemporaryDirectory', - return_value=contextlib.nullcontext(str(tmp_path))) + metadata_path = os.path.join(tmp_path, "metadata.xml") + mocker.patch( + "swh.deposit.cli.client.tempfile.TemporaryDirectory", + return_value=contextlib.nullcontext(str(tmp_path)), + ) runner = CliRunner() - result = runner.invoke(cli, [ - 'upload', - '--url', 'mock://deposit.swh/1', - '--username', TEST_USER['username'], - '--password', TEST_USER['password'], - '--name', 'test-project', - '--archive', sample_archive['path'], - '--slug', slug, - '--collection', collection, - '--author', 'Jane Doe', - ]) + result = runner.invoke( + cli, + [ + "upload", + "--url", + "mock://deposit.swh/1", + "--username", + TEST_USER["username"], + "--password", + TEST_USER["password"], + "--name", + "test-project", + "--archive", + sample_archive["path"], + "--slug", + slug, + "--collection", + collection, + "--author", + "Jane Doe", + ], + ) assert result.exit_code == 0, result.output - assert result.output == '' + assert result.output == "" assert caplog.record_tuples == [ - ('swh.deposit.cli.client', logging.INFO, '{"foo": "bar"}'), + ("swh.deposit.cli.client", logging.INFO, '{"foo": "bar"}'), ] client_mock.deposit_create.assert_called_once_with( - archive=sample_archive['path'], - collection=collection, in_progress=False, metadata=metadata_path, - slug=slug) + archive=sample_archive["path"], + collection=collection, + in_progress=False, + metadata=metadata_path, + slug=slug, + ) with open(metadata_path) as fd: - assert fd.read() == '''\ + assert ( + fd.read() + == """\ \ttest-project \tmy-slug \t \t\tJane Doe \t -''' +""" + ) def test_multisteps_deposit( - sample_archive, atom_dataset, mocker, caplog, datadir, - client_mock, slug): + sample_archive, atom_dataset, mocker, caplog, datadir, client_mock, slug +): """ from: https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html#multisteps-deposit """ # noqa slug = generate_slug() - mocker.patch('swh.deposit.cli.client.generate_slug', return_value=slug) + mocker.patch("swh.deposit.cli.client.generate_slug", return_value=slug) # https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html#create-an-incomplete-deposit client_mock.deposit_create.return_value = '{"deposit_id": "42"}' runner = CliRunner() - result = runner.invoke(cli, [ - 'upload', - '--url', 'mock://deposit.swh/1', - '--username', TEST_USER['username'], - '--password', TEST_USER['password'], - '--archive', sample_archive['path'], - '--partial', - ]) + result = runner.invoke( + cli, + [ + "upload", + "--url", + "mock://deposit.swh/1", + "--username", + TEST_USER["username"], + "--password", + TEST_USER["password"], + "--archive", + sample_archive["path"], + "--partial", + ], + ) assert result.exit_code == 0, result.output - assert result.output == '' + assert result.output == "" assert caplog.record_tuples == [ - ('swh.deposit.cli.client', logging.INFO, '{"deposit_id": "42"}'), + ("swh.deposit.cli.client", logging.INFO, '{"deposit_id": "42"}'), ] client_mock.deposit_create.assert_called_once_with( - archive=sample_archive['path'], - collection='softcol', in_progress=True, metadata=None, - slug=slug) + archive=sample_archive["path"], + collection="softcol", + in_progress=True, + metadata=None, + slug=slug, + ) # Clear mocking state caplog.clear() client_mock.reset_mock() # https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html#add-content-or-metadata-to-the-deposit - metadata_path = os.path.join( - datadir, 'atom', 'entry-data-deposit-binary.xml') - - result = runner.invoke(cli, [ - 'upload', - '--url', 'mock://deposit.swh/1', - '--username', TEST_USER['username'], - '--password', TEST_USER['password'], - '--metadata', metadata_path, - ]) + metadata_path = os.path.join(datadir, "atom", "entry-data-deposit-binary.xml") + + result = runner.invoke( + cli, + [ + "upload", + "--url", + "mock://deposit.swh/1", + "--username", + TEST_USER["username"], + "--password", + TEST_USER["password"], + "--metadata", + metadata_path, + ], + ) assert result.exit_code == 0, result.output - assert result.output == '' + assert result.output == "" assert caplog.record_tuples == [ - ('swh.deposit.cli.client', logging.INFO, '{"deposit_id": "42"}'), + ("swh.deposit.cli.client", logging.INFO, '{"deposit_id": "42"}'), ] client_mock.deposit_create.assert_called_once_with( archive=None, - collection='softcol', in_progress=False, metadata=metadata_path, - slug=slug) + collection="softcol", + in_progress=False, + metadata=metadata_path, + slug=slug, + ) # Clear mocking state caplog.clear() client_mock.reset_mock() diff --git a/swh/deposit/tests/common.py b/swh/deposit/tests/common.py index e4341f59..1fd4845e 100644 --- a/swh/deposit/tests/common.py +++ b/swh/deposit/tests/common.py @@ -1,138 +1,141 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import hashlib import os import re import tarfile import tempfile from swh.core import tarball def compute_info(archive_path): """Given a path, compute information on path. """ - with open(archive_path, 'rb') as f: + with open(archive_path, "rb") as f: length = 0 sha1sum = hashlib.sha1() md5sum = hashlib.md5() - data = b'' + data = b"" for chunk in f: sha1sum.update(chunk) md5sum.update(chunk) length += len(chunk) data += chunk return { - 'dir': os.path.dirname(archive_path), - 'name': os.path.basename(archive_path), - 'path': archive_path, - 'length': length, - 'sha1sum': sha1sum.hexdigest(), - 'md5sum': md5sum.hexdigest(), - 'data': data + "dir": os.path.dirname(archive_path), + "name": os.path.basename(archive_path), + "path": archive_path, + "length": length, + "sha1sum": sha1sum.hexdigest(), + "md5sum": md5sum.hexdigest(), + "data": data, } def _compress(path, extension, dir_path): """Compress path according to extension """ - if extension == 'zip' or extension == 'tar': + if extension == "zip" or extension == "tar": return tarball.compress(path, extension, dir_path) - elif '.' in extension: - split_ext = extension.split('.') - if split_ext[0] != 'tar': + elif "." in extension: + split_ext = extension.split(".") + if split_ext[0] != "tar": raise ValueError( - 'Development error, only zip or tar archive supported, ' - '%s not supported' % extension) + "Development error, only zip or tar archive supported, " + "%s not supported" % extension + ) # deal with specific tar mode = split_ext[1] - supported_mode = ['xz', 'gz', 'bz2'] + supported_mode = ["xz", "gz", "bz2"] if mode not in supported_mode: raise ValueError( - 'Development error, only %s supported, %s not supported' % ( - supported_mode, mode)) + "Development error, only %s supported, %s not supported" + % (supported_mode, mode) + ) files = tarball._ls(dir_path) - with tarfile.open(path, 'w:%s' % mode) as t: + with tarfile.open(path, "w:%s" % mode) as t: for fpath, fname in files: t.add(fpath, arcname=fname, recursive=False) return path -def create_arborescence_archive(root_path, archive_name, filename, content, - up_to_size=None, extension='zip'): +def create_arborescence_archive( + root_path, archive_name, filename, content, up_to_size=None, extension="zip" +): """Build an archive named archive_name in the root_path. This archive contains one file named filename with the content content. Args: root_path (str): Location path of the archive to create archive_name (str): Archive's name (without extension) filename (str): Archive's content is only one filename content (bytes): Content of the filename up_to_size (int | None): Fill in the blanks size to oversize or complete an archive's size extension (str): Extension of the archive to write (default is zip) Returns: dict with the keys: - dir: the directory of that archive - path: full path to the archive - sha1sum: archive's sha1sum - length: archive's length """ os.makedirs(root_path, exist_ok=True) archive_path_dir = tempfile.mkdtemp(dir=root_path) dir_path = os.path.join(archive_path_dir, archive_name) os.mkdir(dir_path) filepath = os.path.join(dir_path, filename) _length = len(content) count = 0 batch_size = 128 - with open(filepath, 'wb') as f: + with open(filepath, "wb") as f: f.write(content) if up_to_size: # fill with blank content up to a given size count += _length while count < up_to_size: - f.write(b'0'*batch_size) + f.write(b"0" * batch_size) count += batch_size - _path = '%s.%s' % (dir_path, extension) + _path = "%s.%s" % (dir_path, extension) _path = _compress(_path, extension, dir_path) return compute_info(_path) def create_archive_with_archive(root_path, name, archive): """Create an archive holding another. """ invalid_archive_path = os.path.join(root_path, name) - with tarfile.open(invalid_archive_path, 'w:gz') as _archive: - _archive.add(archive['path'], arcname=archive['name']) + with tarfile.open(invalid_archive_path, "w:gz") as _archive: + _archive.add(archive["path"], arcname=archive["name"]) return compute_info(invalid_archive_path) def check_archive(archive_name: str, archive_name_to_check: str): """Helper function to ensure archive_name is present within the archive_name_to_check. Raises: AssertionError if archive_name is not present within archive_name_to_check """ - if '.' in archive_name: - filename, extension = archive_name.split('.') - pattern = re.compile('.*/%s.*\\.%s' % (filename, extension)) + if "." in archive_name: + filename, extension = archive_name.split(".") + pattern = re.compile(".*/%s.*\\.%s" % (filename, extension)) else: - pattern = re.compile('.*/%s' % archive_name) + pattern = re.compile(".*/%s" % archive_name) assert pattern.match(archive_name_to_check) is not None diff --git a/swh/deposit/tests/conftest.py b/swh/deposit/tests/conftest.py index c410c96a..9bb733e9 100644 --- a/swh/deposit/tests/conftest.py +++ b/swh/deposit/tests/conftest.py @@ -1,390 +1,417 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os import base64 import pytest import psycopg2 from django.urls import reverse from django.test.utils import setup_databases # type: ignore + # mypy is asked to ignore the import statement above because setup_databases # is not part of the d.t.utils.__all__ variable. from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT from rest_framework import status from rest_framework.test import APIClient from typing import Mapping from swh.scheduler import get_scheduler from swh.scheduler.tests.conftest import * # noqa from swh.deposit.config import setup_django_for from swh.deposit.parsers import parse_xml from swh.deposit.config import SWHDefaultConfig from swh.deposit.config import ( - COL_IRI, EDIT_SE_IRI, DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_REJECTED, - DEPOSIT_STATUS_PARTIAL, DEPOSIT_STATUS_LOAD_SUCCESS, - DEPOSIT_STATUS_VERIFIED, DEPOSIT_STATUS_LOAD_FAILURE + COL_IRI, + EDIT_SE_IRI, + DEPOSIT_STATUS_DEPOSITED, + DEPOSIT_STATUS_REJECTED, + DEPOSIT_STATUS_PARTIAL, + DEPOSIT_STATUS_LOAD_SUCCESS, + DEPOSIT_STATUS_VERIFIED, + DEPOSIT_STATUS_LOAD_FAILURE, ) from swh.deposit.tests.common import create_arborescence_archive TEST_USER = { - 'username': 'test', - 'password': 'password', - 'email': 'test@example.org', - 'provider_url': 'https://hal-test.archives-ouvertes.fr/', - 'domain': 'archives-ouvertes.fr/', - 'collection': { - 'name': 'test' - }, + "username": "test", + "password": "password", + "email": "test@example.org", + "provider_url": "https://hal-test.archives-ouvertes.fr/", + "domain": "archives-ouvertes.fr/", + "collection": {"name": "test"}, } TEST_CONFIG = { - 'max_upload_size': 500, - 'extraction_dir': '/tmp/swh-deposit/test/extraction-dir', - 'checks': False, - 'provider': { - 'provider_name': '', - 'provider_type': 'deposit_client', - 'provider_url': '', - 'metadata': { - } + "max_upload_size": 500, + "extraction_dir": "/tmp/swh-deposit/test/extraction-dir", + "checks": False, + "provider": { + "provider_name": "", + "provider_type": "deposit_client", + "provider_url": "", + "metadata": {}, }, - 'tool': { - 'name': 'swh-deposit', - 'version': '0.0.1', - 'configuration': { - 'sword_version': '2' - } + "tool": { + "name": "swh-deposit", + "version": "0.0.1", + "configuration": {"sword_version": "2"}, }, } def pytest_configure(): - setup_django_for('testing') + setup_django_for("testing") @pytest.fixture() def deposit_config(): return TEST_CONFIG @pytest.fixture(autouse=True) def deposit_autoconfig(monkeypatch, deposit_config, swh_scheduler_config): """Enforce config for deposit classes inherited from SWHDefaultConfig.""" + def mock_parse_config(*args, **kw): config = deposit_config.copy() - config['scheduler'] = { - 'cls': 'local', - 'args': swh_scheduler_config, + config["scheduler"] = { + "cls": "local", + "args": swh_scheduler_config, } return config - monkeypatch.setattr( - SWHDefaultConfig, "parse_config_file", - mock_parse_config) - scheduler = get_scheduler('local', swh_scheduler_config) + monkeypatch.setattr(SWHDefaultConfig, "parse_config_file", mock_parse_config) + + scheduler = get_scheduler("local", swh_scheduler_config) task_type = { - 'type': 'load-deposit', - 'backend_name': 'swh.loader.packages.deposit.tasks.LoadDeposit', - 'description': 'why does this have not-null constraint?'} + "type": "load-deposit", + "backend_name": "swh.loader.packages.deposit.tasks.LoadDeposit", + "description": "why does this have not-null constraint?", + } scheduler.create_task_type(task_type) -@pytest.fixture(scope='session') -def django_db_setup( - request, - django_db_blocker, - postgresql_proc): +@pytest.fixture(scope="session") +def django_db_setup(request, django_db_blocker, postgresql_proc): from django.conf import settings - settings.DATABASES['default'].update({ - ('ENGINE', 'django.db.backends.postgresql'), - ('NAME', 'tests'), - ('USER', postgresql_proc.user), # noqa - ('HOST', postgresql_proc.host), # noqa - ('PORT', postgresql_proc.port), # noqa - }) + + settings.DATABASES["default"].update( + { + ("ENGINE", "django.db.backends.postgresql"), + ("NAME", "tests"), + ("USER", postgresql_proc.user), # noqa + ("HOST", postgresql_proc.host), # noqa + ("PORT", postgresql_proc.port), # noqa + } + ) with django_db_blocker.unblock(): setup_databases( - verbosity=request.config.option.verbose, - interactive=False, - keepdb=False) + verbosity=request.config.option.verbose, interactive=False, keepdb=False + ) def execute_sql(sql): """Execute sql to postgres db""" - with psycopg2.connect(database='postgres') as conn: + with psycopg2.connect(database="postgres") as conn: conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) cur = conn.cursor() cur.execute(sql) -@pytest.fixture(autouse=True, scope='session') +@pytest.fixture(autouse=True, scope="session") def swh_proxy(): """Automatically inject this fixture in all tests to ensure no outside connection takes place. """ - os.environ['http_proxy'] = 'http://localhost:999' - os.environ['https_proxy'] = 'http://localhost:999' + os.environ["http_proxy"] = "http://localhost:999" + os.environ["https_proxy"] = "http://localhost:999" def create_deposit_collection(collection_name: str): """Create a deposit collection with name collection_name """ from swh.deposit.models import DepositCollection + try: - collection = DepositCollection._default_manager.get( - name=collection_name) + collection = DepositCollection._default_manager.get(name=collection_name) except DepositCollection.DoesNotExist: collection = DepositCollection(name=collection_name) collection.save() return collection -def deposit_collection_factory( - collection_name=TEST_USER['collection']['name']): +def deposit_collection_factory(collection_name=TEST_USER["collection"]["name"]): @pytest.fixture def _deposit_collection(db, collection_name=collection_name): return create_deposit_collection(collection_name) return _deposit_collection deposit_collection = deposit_collection_factory() -deposit_another_collection = deposit_collection_factory('another-collection') +deposit_another_collection = deposit_collection_factory("another-collection") @pytest.fixture def deposit_user(db, deposit_collection): """Create/Return the test_user "test" """ from swh.deposit.models import DepositClient + try: - user = DepositClient._default_manager.get( - username=TEST_USER['username']) + user = DepositClient._default_manager.get(username=TEST_USER["username"]) except DepositClient.DoesNotExist: user = DepositClient._default_manager.create_user( - username=TEST_USER['username'], - email=TEST_USER['email'], - password=TEST_USER['password'], - provider_url=TEST_USER['provider_url'], - domain=TEST_USER['domain'], + username=TEST_USER["username"], + email=TEST_USER["email"], + password=TEST_USER["password"], + provider_url=TEST_USER["provider_url"], + domain=TEST_USER["domain"], ) user.collections = [deposit_collection.id] user.save() return user @pytest.fixture def client(): """Override pytest-django one which does not work for djangorestframework. """ return APIClient() # <- drf's client @pytest.yield_fixture def authenticated_client(client, deposit_user): """Returned a logged client """ - _token = '%s:%s' % (deposit_user.username, TEST_USER['password']) - token = base64.b64encode(_token.encode('utf-8')) - authorization = 'Basic %s' % token.decode('utf-8') + _token = "%s:%s" % (deposit_user.username, TEST_USER["password"]) + token = base64.b64encode(_token.encode("utf-8")) + authorization = "Basic %s" % token.decode("utf-8") client.credentials(HTTP_AUTHORIZATION=authorization) yield client client.logout() @pytest.fixture def sample_archive(tmp_path): """Returns a sample archive """ tmp_path = str(tmp_path) # pytest version limitation in previous version archive = create_arborescence_archive( - tmp_path, 'archive1', 'file1', b'some content in file') + tmp_path, "archive1", "file1", b"some content in file" + ) return archive @pytest.fixture def atom_dataset(datadir) -> Mapping[str, str]: """Compute the paths to atom files. Returns: Dict of atom name per content (bytes) """ - atom_path = os.path.join(datadir, 'atom') + atom_path = os.path.join(datadir, "atom") data = {} for filename in os.listdir(atom_path): filepath = os.path.join(atom_path, filename) - with open(filepath, 'rb') as f: - raw_content = f.read().decode('utf-8') + with open(filepath, "rb") as f: + raw_content = f.read().decode("utf-8") # Keep the filename without extension - atom_name = filename.split('.')[0] + atom_name = filename.split(".")[0] data[atom_name] = raw_content return data def create_deposit( - authenticated_client, collection_name: str, sample_archive, - external_id: str, deposit_status=DEPOSIT_STATUS_DEPOSITED): + authenticated_client, + collection_name: str, + sample_archive, + external_id: str, + deposit_status=DEPOSIT_STATUS_DEPOSITED, +): """Create a skeleton shell deposit """ url = reverse(COL_IRI, args=[collection_name]) # when response = authenticated_client.post( url, - content_type='application/zip', # as zip - data=sample_archive['data'], + content_type="application/zip", # as zip + data=sample_archive["data"], # + headers - CONTENT_LENGTH=sample_archive['length'], + CONTENT_LENGTH=sample_archive["length"], HTTP_SLUG=external_id, - HTTP_CONTENT_MD5=sample_archive['md5sum'], - HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', - HTTP_IN_PROGRESS='false', - HTTP_CONTENT_DISPOSITION='attachment; filename=%s' % ( - sample_archive['name'])) + HTTP_CONTENT_MD5=sample_archive["md5sum"], + HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", + HTTP_IN_PROGRESS="false", + HTTP_CONTENT_DISPOSITION="attachment; filename=%s" % (sample_archive["name"]), + ) # then assert response.status_code == status.HTTP_201_CREATED from swh.deposit.models import Deposit + deposit = Deposit._default_manager.get(external_id=external_id) if deposit.status != deposit_status: deposit.status = deposit_status deposit.save() assert deposit.status == deposit_status return deposit def create_binary_deposit( - authenticated_client, collection_name: str, sample_archive, - external_id: str, deposit_status: str = DEPOSIT_STATUS_DEPOSITED, - atom_dataset: Mapping[str, bytes] = {}): + authenticated_client, + collection_name: str, + sample_archive, + external_id: str, + deposit_status: str = DEPOSIT_STATUS_DEPOSITED, + atom_dataset: Mapping[str, bytes] = {}, +): """Create a deposit with both metadata and archive set. Then alters its status to `deposit_status`. """ deposit = create_deposit( - authenticated_client, collection_name, sample_archive, - external_id=external_id, deposit_status=DEPOSIT_STATUS_PARTIAL) + authenticated_client, + collection_name, + sample_archive, + external_id=external_id, + deposit_status=DEPOSIT_STATUS_PARTIAL, + ) response = authenticated_client.post( reverse(EDIT_SE_IRI, args=[collection_name, deposit.id]), - content_type='application/atom+xml;type=entry', - data=atom_dataset['entry-data0'] % deposit.external_id.encode('utf-8'), + content_type="application/atom+xml;type=entry", + data=atom_dataset["entry-data0"] % deposit.external_id.encode("utf-8"), HTTP_SLUG=deposit.external_id, - HTTP_IN_PROGRESS='true') + HTTP_IN_PROGRESS="true", + ) assert response.status_code == status.HTTP_201_CREATED assert deposit.status == DEPOSIT_STATUS_PARTIAL from swh.deposit.models import Deposit + deposit = Deposit._default_manager.get(pk=deposit.id) if deposit.status != deposit_status: deposit.status = deposit_status deposit.save() assert deposit.status == deposit_status return deposit def deposit_factory(deposit_status=DEPOSIT_STATUS_DEPOSITED): """Build deposit with a specific status """ + @pytest.fixture() - def _deposit(sample_archive, deposit_collection, authenticated_client, - deposit_status=deposit_status): - external_id = 'external-id-%s' % deposit_status + def _deposit( + sample_archive, + deposit_collection, + authenticated_client, + deposit_status=deposit_status, + ): + external_id = "external-id-%s" % deposit_status return create_deposit( - authenticated_client, deposit_collection.name, sample_archive, - external_id=external_id, deposit_status=deposit_status + authenticated_client, + deposit_collection.name, + sample_archive, + external_id=external_id, + deposit_status=deposit_status, ) return _deposit deposited_deposit = deposit_factory() rejected_deposit = deposit_factory(deposit_status=DEPOSIT_STATUS_REJECTED) partial_deposit = deposit_factory(deposit_status=DEPOSIT_STATUS_PARTIAL) verified_deposit = deposit_factory(deposit_status=DEPOSIT_STATUS_VERIFIED) completed_deposit = deposit_factory(deposit_status=DEPOSIT_STATUS_LOAD_SUCCESS) failed_deposit = deposit_factory(deposit_status=DEPOSIT_STATUS_LOAD_FAILURE) @pytest.fixture def partial_deposit_with_metadata( - sample_archive, deposit_collection, authenticated_client, - atom_dataset): + sample_archive, deposit_collection, authenticated_client, atom_dataset +): """Returns deposit with archive and metadata provided, status 'partial' """ return create_binary_deposit( - authenticated_client, deposit_collection.name, sample_archive, - external_id='external-id-partial', + authenticated_client, + deposit_collection.name, + sample_archive, + external_id="external-id-partial", deposit_status=DEPOSIT_STATUS_PARTIAL, - atom_dataset=atom_dataset + atom_dataset=atom_dataset, ) @pytest.fixture def partial_deposit_only_metadata( - deposit_collection, authenticated_client, - atom_dataset): + deposit_collection, authenticated_client, atom_dataset +): response = authenticated_client.post( reverse(COL_IRI, args=[deposit_collection.name]), - content_type='application/atom+xml;type=entry', - data=atom_dataset['entry-data1'], - HTTP_SLUG='external-id-partial', - HTTP_IN_PROGRESS=True) + content_type="application/atom+xml;type=entry", + data=atom_dataset["entry-data1"], + HTTP_SLUG="external-id-partial", + HTTP_IN_PROGRESS=True, + ) assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(response.content) - deposit_id = response_content['deposit_id'] + deposit_id = response_content["deposit_id"] from swh.deposit.models import Deposit + deposit = Deposit._default_manager.get(pk=deposit_id) assert deposit.status == DEPOSIT_STATUS_PARTIAL return deposit @pytest.fixture def complete_deposit(sample_archive, deposit_collection, authenticated_client): """Returns a completed deposit (load success) """ deposit = create_deposit( - authenticated_client, deposit_collection.name, sample_archive, - external_id='external-id-complete', - deposit_status=DEPOSIT_STATUS_LOAD_SUCCESS + authenticated_client, + deposit_collection.name, + sample_archive, + external_id="external-id-complete", + deposit_status=DEPOSIT_STATUS_LOAD_SUCCESS, ) - _swh_id_context = 'https://hal.archives-ouvertes.fr/hal-01727745' - deposit.swh_id = 'swh:1:dir:42a13fc721c8716ff695d0d62fc851d641f3a12b' - deposit.swh_id_context = '%s;%s' % ( - deposit.swh_id, _swh_id_context) - deposit.swh_anchor_id = \ - 'swh:rev:1:548b3c0a2bb43e1fca191e24b5803ff6b3bc7c10' - deposit.swh_anchor_id_context = '%s;%s' % ( - deposit.swh_anchor_id, _swh_id_context) + _swh_id_context = "https://hal.archives-ouvertes.fr/hal-01727745" + deposit.swh_id = "swh:1:dir:42a13fc721c8716ff695d0d62fc851d641f3a12b" + deposit.swh_id_context = "%s;%s" % (deposit.swh_id, _swh_id_context) + deposit.swh_anchor_id = "swh:rev:1:548b3c0a2bb43e1fca191e24b5803ff6b3bc7c10" + deposit.swh_anchor_id_context = "%s;%s" % (deposit.swh_anchor_id, _swh_id_context) deposit.save() return deposit @pytest.fixture() def tmp_path(tmp_path): return str(tmp_path) # issue with oldstable's pytest version diff --git a/swh/deposit/tests/loader/common.py b/swh/deposit/tests/loader/common.py index d56fc928..510830af 100644 --- a/swh/deposit/tests/loader/common.py +++ b/swh/deposit/tests/loader/common.py @@ -1,124 +1,138 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json from typing import Dict from swh.deposit.client import PrivateApiDepositClient from swh.model.hashutil import hash_to_bytes, hash_to_hex CLIENT_TEST_CONFIG = { - 'url': 'http://nowhere:9000/', - 'auth': {}, # no authentication in test scenario + "url": "http://nowhere:9000/", + "auth": {}, # no authentication in test scenario } class SWHDepositTestClient(PrivateApiDepositClient): """Deposit test client to permit overriding the default request client. """ + def __init__(self, client, config): super().__init__(config=config) self.client = client def archive_get(self, archive_update_url, archive_path, log=None): r = self.client.get(archive_update_url) - with open(archive_path, 'wb') as f: + with open(archive_path, "wb") as f: for chunk in r.streaming_content: f.write(chunk) return archive_path def metadata_get(self, metadata_url, log=None): r = self.client.get(metadata_url) - return json.loads(r.content.decode('utf-8')) - - def status_update(self, update_status_url, status, - revision_id=None, directory_id=None, origin_url=None): - payload = {'status': status} + return json.loads(r.content.decode("utf-8")) + + def status_update( + self, + update_status_url, + status, + revision_id=None, + directory_id=None, + origin_url=None, + ): + payload = {"status": status} if revision_id: - payload['revision_id'] = revision_id + payload["revision_id"] = revision_id if directory_id: - payload['directory_id'] = directory_id + payload["directory_id"] = directory_id if origin_url: - payload['origin_url'] = origin_url - self.client.put(update_status_url, - content_type='application/json', - data=json.dumps(payload)) + payload["origin_url"] = origin_url + self.client.put( + update_status_url, content_type="application/json", data=json.dumps(payload) + ) def check(self, check_url): r = self.client.get(check_url) - data = json.loads(r.content.decode('utf-8')) - return data['status'] + data = json.loads(r.content.decode("utf-8")) + return data["status"] def get_stats(storage) -> Dict: """Adaptation utils to unify the stats counters across storage implementation. """ storage.refresh_stat_counters() stats = storage.stat_counters() - keys = ['content', 'directory', 'origin', 'origin_visit', 'person', - 'release', 'revision', 'skipped_content', 'snapshot'] + keys = [ + "content", + "directory", + "origin", + "origin_visit", + "person", + "release", + "revision", + "skipped_content", + "snapshot", + ] return {k: stats.get(k) for k in keys} def decode_target(target): """Test helper to ease readability in test """ if not target: return target - target_type = target['target_type'] + target_type = target["target_type"] - if target_type == 'alias': - decoded_target = target['target'].decode('utf-8') + if target_type == "alias": + decoded_target = target["target"].decode("utf-8") else: - decoded_target = hash_to_hex(target['target']) + decoded_target = hash_to_hex(target["target"]) - return { - 'target': decoded_target, - 'target_type': target_type - } + return {"target": decoded_target, "target_type": target_type} def check_snapshot(expected_snapshot, storage): """Check for snapshot match. Provide the hashes as hexadecimal, the conversion is done within the method. Args: expected_snapshot (dict): full snapshot with hex ids storage (Storage): expected storage """ - expected_snapshot_id = expected_snapshot['id'] - expected_branches = expected_snapshot['branches'] + expected_snapshot_id = expected_snapshot["id"] + expected_branches = expected_snapshot["branches"] snap = storage.snapshot_get(hash_to_bytes(expected_snapshot_id)) if snap is None: # display known snapshots instead if possible - if hasattr(storage, '_snapshots'): # in-mem storage + if hasattr(storage, "_snapshots"): # in-mem storage from pprint import pprint + for snap_id, (_snap, _) in storage._snapshots.items(): snapd = _snap.to_dict() - snapd['id'] = hash_to_hex(snapd['id']) + snapd["id"] = hash_to_hex(snapd["id"]) branches = { - branch.decode('utf-8'): decode_target(target) - for branch, target in snapd['branches'].items() + branch.decode("utf-8"): decode_target(target) + for branch, target in snapd["branches"].items() } - snapd['branches'] = branches + snapd["branches"] = branches pprint(snapd) - raise AssertionError('Snapshot is not found') + raise AssertionError("Snapshot is not found") branches = { - branch.decode('utf-8'): decode_target(target) - for branch, target in snap['branches'].items() + branch.decode("utf-8"): decode_target(target) + for branch, target in snap["branches"].items() } assert expected_branches == branches diff --git a/swh/deposit/tests/loader/conftest.py b/swh/deposit/tests/loader/conftest.py index addb2eb5..b322ca31 100644 --- a/swh/deposit/tests/loader/conftest.py +++ b/swh/deposit/tests/loader/conftest.py @@ -1,61 +1,58 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import re import os import pytest import yaml from functools import partial from swh.core.pytest_plugin import get_response_cb from swh.scheduler.tests.conftest import * # noqa from swh.storage.tests.conftest import * # noqa from swh.deposit.loader.checker import DepositChecker -@pytest.fixture(scope='session') # type: ignore # expected redefinition +@pytest.fixture(scope="session") # type: ignore # expected redefinition def celery_includes(): return [ - 'swh.deposit.loader.tasks', + "swh.deposit.loader.tasks", ] @pytest.fixture def swh_config(tmp_path, swh_storage_postgresql, monkeypatch): storage_config = { - 'url': 'https://deposit.softwareheritage.org/', - 'storage': { - 'cls': 'local', - 'args': { - 'db': swh_storage_postgresql.dsn, - 'objstorage': { - 'cls': 'memory', - 'args': {} - }, + "url": "https://deposit.softwareheritage.org/", + "storage": { + "cls": "local", + "args": { + "db": swh_storage_postgresql.dsn, + "objstorage": {"cls": "memory", "args": {}}, }, }, } - conffile = os.path.join(tmp_path, 'deposit.yml') - with open(conffile, 'w') as f: + conffile = os.path.join(tmp_path, "deposit.yml") + with open(conffile, "w") as f: f.write(yaml.dump(storage_config)) - monkeypatch.setenv('SWH_CONFIG_FILENAME', conffile) + monkeypatch.setenv("SWH_CONFIG_FILENAME", conffile) return conffile @pytest.fixture def deposit_checker(swh_config): return DepositChecker() @pytest.fixture def requests_mock_datadir(datadir, requests_mock_datadir): """Override default behavior to deal with put method """ cb = partial(get_response_cb, datadir=datadir) - requests_mock_datadir.put(re.compile('https://'), body=cb) + requests_mock_datadir.put(re.compile("https://"), body=cb) return requests_mock_datadir diff --git a/swh/deposit/tests/loader/test_checker.py b/swh/deposit/tests/loader/test_checker.py index 86bcb304..cdc4a57a 100644 --- a/swh/deposit/tests/loader/test_checker.py +++ b/swh/deposit/tests/loader/test_checker.py @@ -1,45 +1,38 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.urls import reverse from unittest.mock import patch from swh.deposit.config import PRIVATE_CHECK_DEPOSIT -def test_check_deposit_ready( - swh_config, requests_mock_datadir, deposit_checker): +def test_check_deposit_ready(swh_config, requests_mock_datadir, deposit_checker): """Check on a valid 'deposited' deposit should result in 'verified' """ - deposit_check_url = reverse(PRIVATE_CHECK_DEPOSIT, args=['test', 1]) + deposit_check_url = reverse(PRIVATE_CHECK_DEPOSIT, args=["test", 1]) actual_result = deposit_checker.check(deposit_check_url=deposit_check_url) - assert actual_result == {'status': 'eventful'} + assert actual_result == {"status": "eventful"} -def test_check_deposit_rejected( - swh_config, requests_mock_datadir, deposit_checker): +def test_check_deposit_rejected(swh_config, requests_mock_datadir, deposit_checker): """Check on invalid 'deposited' deposit should result in 'rejected' """ - deposit_check_url = reverse(PRIVATE_CHECK_DEPOSIT, args=[ - 'test', 2 - ]) + deposit_check_url = reverse(PRIVATE_CHECK_DEPOSIT, args=["test", 2]) actual_result = deposit_checker.check(deposit_check_url=deposit_check_url) - assert actual_result == {'status': 'failed'} + assert actual_result == {"status": "failed"} -@patch('swh.deposit.client.requests.get') -def test_check_deposit_rejected_exception( - mock_requests, swh_config, deposit_checker): +@patch("swh.deposit.client.requests.get") +def test_check_deposit_rejected_exception(mock_requests, swh_config, deposit_checker): """Check on invalid 'deposited' deposit should result in 'rejected' """ - mock_requests.side_effect = ValueError('simulated problem when checking') - deposit_check_url = reverse(PRIVATE_CHECK_DEPOSIT, args=[ - 'test', 3 - ]) + mock_requests.side_effect = ValueError("simulated problem when checking") + deposit_check_url = reverse(PRIVATE_CHECK_DEPOSIT, args=["test", 3]) actual_result = deposit_checker.check(deposit_check_url=deposit_check_url) - assert actual_result == {'status': 'failed'} + assert actual_result == {"status": "failed"} diff --git a/swh/deposit/tests/loader/test_client.py b/swh/deposit/tests/loader/test_client.py index 3189be1b..4f099d40 100644 --- a/swh/deposit/tests/loader/test_client.py +++ b/swh/deposit/tests/loader/test_client.py @@ -1,250 +1,247 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os import json import pytest import unittest from typing import Any, Callable, Optional from urllib.parse import urlparse from swh.deposit.client import PrivateApiDepositClient -from swh.deposit.config import ( - DEPOSIT_STATUS_LOAD_SUCCESS, DEPOSIT_STATUS_LOAD_FAILURE -) +from swh.deposit.config import DEPOSIT_STATUS_LOAD_SUCCESS, DEPOSIT_STATUS_LOAD_FAILURE CLIENT_TEST_CONFIG = { - 'url': 'https://nowhere.org/', - 'auth': {}, # no authentication in test scenario + "url": "https://nowhere.org/", + "auth": {}, # no authentication in test scenario } def build_expected_path(datadir, base_url: str, api_url: str) -> str: """Build expected path from api to served file """ url = urlparse(base_url) - dirname = '%s_%s' % (url.scheme, url.hostname) - if api_url.endswith('/'): + dirname = "%s_%s" % (url.scheme, url.hostname) + if api_url.endswith("/"): api_url = api_url[:-1] - if api_url.startswith('/'): + if api_url.startswith("/"): api_url = api_url[1:] - suffix_path = api_url.replace('/', '_') + suffix_path = api_url.replace("/", "_") return os.path.join(datadir, dirname, suffix_path) def test_build_expected_path(datadir): - actual_path = build_expected_path( - datadir, 'http://example.org', '/hello/you/') + actual_path = build_expected_path(datadir, "http://example.org", "/hello/you/") - assert actual_path == os.path.join( - datadir, 'http_example.org', 'hello_you') + assert actual_path == os.path.join(datadir, "http_example.org", "hello_you") def read_served_path( - datadir, base_url: str, api_url: str, - convert_fn: Optional[Callable[[str], Any]] = None) -> bytes: + datadir, + base_url: str, + api_url: str, + convert_fn: Optional[Callable[[str], Any]] = None, +) -> bytes: """Read served path """ archive_path = build_expected_path(datadir, base_url, api_url) - with open(archive_path, 'rb') as f: + with open(archive_path, "rb") as f: content = f.read() if convert_fn: - content = convert_fn(content.decode('utf-8')) + content = convert_fn(content.decode("utf-8")) return content def test_read_served_path(datadir): - actual_content = read_served_path( - datadir, 'http://example.org', '/hello/you/') + actual_content = read_served_path(datadir, "http://example.org", "/hello/you/") - assert actual_content == b'hello people\n' + assert actual_content == b"hello people\n" actual_content2 = read_served_path( - datadir, 'http://example.org', '/hello.json', - convert_fn=json.loads) + datadir, "http://example.org", "/hello.json", convert_fn=json.loads + ) - assert actual_content2 == { - 'a': [1, 3] - } + assert actual_content2 == {"a": [1, 3]} # private api to retrieve archive def test_archive_get(tmp_path, datadir, requests_mock_datadir): """Retrieving archive data through private api should stream data """ - api_url = '/1/private/test/1/raw/' + api_url = "/1/private/test/1/raw/" client = PrivateApiDepositClient(config=CLIENT_TEST_CONFIG) - expected_content = read_served_path( - datadir, client.base_url, api_url) + expected_content = read_served_path(datadir, client.base_url, api_url) - archive_path = os.path.join(tmp_path, 'test.archive') + archive_path = os.path.join(tmp_path, "test.archive") archive_path = client.archive_get(api_url, archive_path) assert os.path.exists(archive_path) is True - with open(archive_path, 'rb') as f: + with open(archive_path, "rb") as f: actual_content = f.read() assert actual_content == expected_content - assert client.base_url == CLIENT_TEST_CONFIG['url'] + assert client.base_url == CLIENT_TEST_CONFIG["url"] assert client.auth is None def test_archive_get_auth(tmp_path, datadir, requests_mock_datadir): """Retrieving archive data through private api should stream data """ - api_url = '/1/private/test/1/raw/' + api_url = "/1/private/test/1/raw/" config = CLIENT_TEST_CONFIG.copy() - config['auth'] = { # add authentication setup - 'username': 'user', - 'password': 'pass' + config["auth"] = { # add authentication setup + "username": "user", + "password": "pass", } client = PrivateApiDepositClient(config) - expected_content = read_served_path( - datadir, client.base_url, api_url) + expected_content = read_served_path(datadir, client.base_url, api_url) - archive_path = os.path.join(tmp_path, 'test.archive') + archive_path = os.path.join(tmp_path, "test.archive") archive_path = client.archive_get(api_url, archive_path) assert os.path.exists(archive_path) is True - with open(archive_path, 'rb') as f: + with open(archive_path, "rb") as f: actual_content = f.read() assert actual_content == expected_content - assert client.base_url == CLIENT_TEST_CONFIG['url'] - assert client.auth == ('user', 'pass') + assert client.base_url == CLIENT_TEST_CONFIG["url"] + assert client.auth == ("user", "pass") def test_archive_get_ko(tmp_path, datadir, requests_mock_datadir): """Reading archive can fail for some reasons """ - unknown_api_url = '/1/private/unknown/deposit-id/raw/' + unknown_api_url = "/1/private/unknown/deposit-id/raw/" client = PrivateApiDepositClient(config=CLIENT_TEST_CONFIG) - with pytest.raises(ValueError, match='Problem when retrieving deposit'): - client.archive_get(unknown_api_url, 'some/path') + with pytest.raises(ValueError, match="Problem when retrieving deposit"): + client.archive_get(unknown_api_url, "some/path") # private api read metadata def test_metadata_get(datadir, requests_mock_datadir): """Reading archive should write data in temporary directory """ - api_url = '/1/private/test/1/metadata' + api_url = "/1/private/test/1/metadata" client = PrivateApiDepositClient(config=CLIENT_TEST_CONFIG) actual_metadata = client.metadata_get(api_url) assert isinstance(actual_metadata, str) is False expected_content = read_served_path( - datadir, client.base_url, api_url, - convert_fn=json.loads) + datadir, client.base_url, api_url, convert_fn=json.loads + ) assert actual_metadata == expected_content def test_metadata_get_ko(requests_mock_datadir): """Reading metadata can fail for some reasons """ - unknown_api_url = '/1/private/unknown/deposit-id/metadata/' + unknown_api_url = "/1/private/unknown/deposit-id/metadata/" client = PrivateApiDepositClient(config=CLIENT_TEST_CONFIG) - with pytest.raises(ValueError, match='Problem when retrieving metadata'): + with pytest.raises(ValueError, match="Problem when retrieving metadata"): client.metadata_get(unknown_api_url) # private api check def test_check(requests_mock_datadir): """When check ok, this should return the deposit's status """ - api_url = '/1/private/test/1/check' + api_url = "/1/private/test/1/check" client = PrivateApiDepositClient(config=CLIENT_TEST_CONFIG) r = client.check(api_url) - assert r == 'something' + assert r == "something" def test_check_fails(requests_mock_datadir): """Checking deposit can fail for some reason """ - unknown_api_url = '/1/private/test/10/check' + unknown_api_url = "/1/private/test/10/check" client = PrivateApiDepositClient(config=CLIENT_TEST_CONFIG) - with pytest.raises(ValueError, match='Problem when checking deposit'): + with pytest.raises(ValueError, match="Problem when checking deposit"): client.check(unknown_api_url) # private api update status class FakeRequestClientPut: """Fake Request client dedicated to put request method calls. """ + args = None kwargs = None def put(self, *args, **kwargs): self.args = args self.kwargs = kwargs class PrivateApiDepositClientStatusUpdateTest(unittest.TestCase): def test_status_update(self): """Update status """ _client = FakeRequestClientPut() - deposit_client = PrivateApiDepositClient(config=CLIENT_TEST_CONFIG, - _client=_client) - - deposit_client.status_update('/update/status', - DEPOSIT_STATUS_LOAD_SUCCESS, - revision_id='some-revision-id') - - self.assertEqual(_client.args, - ('https://nowhere.org/update/status', )) - self.assertEqual(_client.kwargs, { - 'json': { - 'status': DEPOSIT_STATUS_LOAD_SUCCESS, - 'revision_id': 'some-revision-id', - } - }) + deposit_client = PrivateApiDepositClient( + config=CLIENT_TEST_CONFIG, _client=_client + ) + + deposit_client.status_update( + "/update/status", + DEPOSIT_STATUS_LOAD_SUCCESS, + revision_id="some-revision-id", + ) + + self.assertEqual(_client.args, ("https://nowhere.org/update/status",)) + self.assertEqual( + _client.kwargs, + { + "json": { + "status": DEPOSIT_STATUS_LOAD_SUCCESS, + "revision_id": "some-revision-id", + } + }, + ) def test_status_update_with_no_revision_id(self): """Reading metadata can fail for some reasons """ _client = FakeRequestClientPut() - deposit_client = PrivateApiDepositClient(config=CLIENT_TEST_CONFIG, - _client=_client) - - deposit_client.status_update('/update/status/fail', - DEPOSIT_STATUS_LOAD_FAILURE) - - self.assertEqual(_client.args, - ('https://nowhere.org/update/status/fail', )) - self.assertEqual(_client.kwargs, { - 'json': { - 'status': DEPOSIT_STATUS_LOAD_FAILURE, - } - }) + deposit_client = PrivateApiDepositClient( + config=CLIENT_TEST_CONFIG, _client=_client + ) + + deposit_client.status_update("/update/status/fail", DEPOSIT_STATUS_LOAD_FAILURE) + + self.assertEqual(_client.args, ("https://nowhere.org/update/status/fail",)) + self.assertEqual( + _client.kwargs, {"json": {"status": DEPOSIT_STATUS_LOAD_FAILURE,}} + ) diff --git a/swh/deposit/tests/loader/test_tasks.py b/swh/deposit/tests/loader/test_tasks.py index 9c9516ba..58433048 100644 --- a/swh/deposit/tests/loader/test_tasks.py +++ b/swh/deposit/tests/loader/test_tasks.py @@ -1,21 +1,21 @@ # Copyright (C) 2018-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from unittest.mock import patch -@patch('swh.deposit.loader.checker.DepositChecker.check') +@patch("swh.deposit.loader.checker.DepositChecker.check") def deposit_check(checker, swh_config, swh_app, celery_session_worker): - checker.return_value = {'status': 'uneventful'} + checker.return_value = {"status": "uneventful"} res = swh_app.send_task( - 'swh.deposit.loader.tasks.ChecksDepositTsk', - args=['check_deposit_url']) + "swh.deposit.loader.tasks.ChecksDepositTsk", args=["check_deposit_url"] + ) assert res res.wait() assert res.successful() - assert res.result == {'status': 'uneventful'} - checker.assert_called_once_with('check_deposit_url') + assert res.result == {"status": "uneventful"} + checker.assert_called_once_with("check_deposit_url") diff --git a/swh/deposit/tests/test_common.py b/swh/deposit/tests/test_common.py index 588a4675..89454aba 100644 --- a/swh/deposit/tests/test_common.py +++ b/swh/deposit/tests/test_common.py @@ -1,26 +1,26 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import pytest from swh.deposit.tests.common import check_archive def test_check_archive_helper(): # success for archive_name, archive_name_to_check in [ - ('filename0', 'something/filename0'), - ('archive.zip', 'client_1/archive_noisynoise.zip'), + ("filename0", "something/filename0"), + ("archive.zip", "client_1/archive_noisynoise.zip"), ]: check_archive(archive_name, archive_name_to_check) # failures for archive_name, archive_name_to_check in [ - ('filename0', 'something-filename0'), - ('archive.zip', 'client_1_archive_noisynoise.zip'), - ('reference', 'irrelevant'), + ("filename0", "something-filename0"), + ("archive.zip", "client_1_archive_noisynoise.zip"), + ("reference", "irrelevant"), ]: with pytest.raises(AssertionError): check_archive(archive_name, archive_name_to_check) diff --git a/swh/deposit/tests/test_gunicorn_config.py b/swh/deposit/tests/test_gunicorn_config.py index 209fb783..48fc5d61 100644 --- a/swh/deposit/tests/test_gunicorn_config.py +++ b/swh/deposit/tests/test_gunicorn_config.py @@ -1,57 +1,62 @@ # Copyright (C) 2019-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os from unittest.mock import patch import swh.deposit.gunicorn_config as gunicorn_config def test_post_fork_default(): - with patch('sentry_sdk.init') as sentry_sdk_init: + with patch("sentry_sdk.init") as sentry_sdk_init: gunicorn_config.post_fork(None, None) sentry_sdk_init.assert_not_called() def test_post_fork_with_dsn_env(): django_integration = object() # unique object to check for equality - with patch('swh.deposit.gunicorn_config.DjangoIntegration', - new=lambda: django_integration): - with patch('sentry_sdk.init') as sentry_sdk_init: - with patch.dict(os.environ, { - 'SWH_SENTRY_DSN': 'test_dsn', - 'SWH_SENTRY_ENVIRONMENT': 'test', - }): + with patch( + "swh.deposit.gunicorn_config.DjangoIntegration", new=lambda: django_integration + ): + with patch("sentry_sdk.init") as sentry_sdk_init: + with patch.dict( + os.environ, + {"SWH_SENTRY_DSN": "test_dsn", "SWH_SENTRY_ENVIRONMENT": "test",}, + ): gunicorn_config.post_fork(None, None) sentry_sdk_init.assert_called_once_with( - dsn='test_dsn', + dsn="test_dsn", integrations=[django_integration], - environment='test', + environment="test", debug=False, release=None, ) def test_post_fork_debug(): django_integration = object() # unique object to check for equality - with patch('swh.deposit.gunicorn_config.DjangoIntegration', - new=lambda: django_integration): - with patch('sentry_sdk.init') as sentry_sdk_init: - with patch.dict(os.environ, { - 'SWH_SENTRY_DSN': 'test_dsn', - 'SWH_SENTRY_DEBUG': '1', - 'SWH_SENTRY_ENVIRONMENT': 'test', - }): + with patch( + "swh.deposit.gunicorn_config.DjangoIntegration", new=lambda: django_integration + ): + with patch("sentry_sdk.init") as sentry_sdk_init: + with patch.dict( + os.environ, + { + "SWH_SENTRY_DSN": "test_dsn", + "SWH_SENTRY_DEBUG": "1", + "SWH_SENTRY_ENVIRONMENT": "test", + }, + ): gunicorn_config.post_fork(None, None) sentry_sdk_init.assert_called_once_with( - dsn='test_dsn', + dsn="test_dsn", integrations=[django_integration], - environment='test', + environment="test", debug=True, release=None, ) diff --git a/swh/deposit/tests/test_utils.py b/swh/deposit/tests/test_utils.py index a7486f7e..644d8f33 100644 --- a/swh/deposit/tests/test_utils.py +++ b/swh/deposit/tests/test_utils.py @@ -1,178 +1,141 @@ # Copyright (C) 2018-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import pytest from unittest.mock import patch from swh.deposit import utils def test_merge(): """Calling utils.merge on dicts should merge without losing information """ - d0 = { - 'author': 'someone', - 'license': [['gpl2']], - 'a': 1 - } + d0 = {"author": "someone", "license": [["gpl2"]], "a": 1} d1 = { - 'author': ['author0', {'name': 'author1'}], - 'license': [['gpl3']], - 'b': { - '1': '2' - } + "author": ["author0", {"name": "author1"}], + "license": [["gpl3"]], + "b": {"1": "2"}, } - d2 = { - 'author': map(lambda x: x, ['else']), - 'license': 'mit', - 'b': { - '2': '3', - } - } + d2 = {"author": map(lambda x: x, ["else"]), "license": "mit", "b": {"2": "3",}} d3 = { - 'author': (v for v in ['no one']), + "author": (v for v in ["no one"]), } actual_merge = utils.merge(d0, d1, d2, d3) expected_merge = { - 'a': 1, - 'license': [['gpl2'], ['gpl3'], 'mit'], - 'author': [ - 'someone', 'author0', {'name': 'author1'}, 'else', 'no one'], - 'b': { - '1': '2', - '2': '3', - } + "a": 1, + "license": [["gpl2"], ["gpl3"], "mit"], + "author": ["someone", "author0", {"name": "author1"}, "else", "no one"], + "b": {"1": "2", "2": "3",}, } assert actual_merge == expected_merge def test_merge_2(): - d0 = { - 'license': 'gpl2', - 'runtime': { - 'os': 'unix derivative' - } - } + d0 = {"license": "gpl2", "runtime": {"os": "unix derivative"}} - d1 = { - 'license': 'gpl3', - 'runtime': 'GNU/Linux' - } + d1 = {"license": "gpl3", "runtime": "GNU/Linux"} expected = { - 'license': ['gpl2', 'gpl3'], - 'runtime': [ - { - 'os': 'unix derivative' - }, - 'GNU/Linux' - ], + "license": ["gpl2", "gpl3"], + "runtime": [{"os": "unix derivative"}, "GNU/Linux"], } actual = utils.merge(d0, d1) assert actual == expected def test_merge_edge_cases(): input_dict = { - 'license': ['gpl2', 'gpl3'], - 'runtime': [ - { - 'os': 'unix derivative' - }, - 'GNU/Linux' - ], + "license": ["gpl2", "gpl3"], + "runtime": [{"os": "unix derivative"}, "GNU/Linux"], } # against empty dict actual = utils.merge(input_dict, {}) assert actual == input_dict # against oneself actual = utils.merge(input_dict, input_dict, input_dict) assert actual == input_dict def test_merge_one_dict(): """Merge one dict should result in the same dict value """ - input_and_expected = {'anything': 'really'} + input_and_expected = {"anything": "really"} actual = utils.merge(input_and_expected) assert actual == input_and_expected def test_merge_raise(): """Calling utils.merge with any no dict argument should raise """ - d0 = { - 'author': 'someone', - 'a': 1 - } + d0 = {"author": "someone", "a": 1} - d1 = ['not a dict'] + d1 = ["not a dict"] with pytest.raises(ValueError): utils.merge(d0, d1) with pytest.raises(ValueError): utils.merge(d1, d0) with pytest.raises(ValueError): utils.merge(d1) assert utils.merge(d0) == d0 -@patch('swh.deposit.utils.normalize_timestamp', side_effect=lambda x: x) +@patch("swh.deposit.utils.normalize_timestamp", side_effect=lambda x: x) def test_normalize_date_0(mock_normalize): """When date is a list, choose the first date and normalize it Note: We do not test swh.model.identifiers which is already tested in swh.model """ - actual_date = utils.normalize_date(['2017-10-12', 'date1']) + actual_date = utils.normalize_date(["2017-10-12", "date1"]) - expected_date = '2017-10-12 00:00:00+00:00' + expected_date = "2017-10-12 00:00:00+00:00" assert str(actual_date) == expected_date -@patch('swh.deposit.utils.normalize_timestamp', side_effect=lambda x: x) +@patch("swh.deposit.utils.normalize_timestamp", side_effect=lambda x: x) def test_normalize_date_1(mock_normalize): """Providing a date in a reasonable format, everything is fine Note: We do not test swh.model.identifiers which is already tested in swh.model """ - actual_date = utils.normalize_date('2018-06-11 17:02:02') + actual_date = utils.normalize_date("2018-06-11 17:02:02") - expected_date = '2018-06-11 17:02:02+00:00' + expected_date = "2018-06-11 17:02:02+00:00" assert str(actual_date) == expected_date -@patch('swh.deposit.utils.normalize_timestamp', side_effect=lambda x: x) +@patch("swh.deposit.utils.normalize_timestamp", side_effect=lambda x: x) def test_normalize_date_doing_irrelevant_stuff(mock_normalize): """Providing a date with only the year results in a reasonable date Note: We do not test swh.model.identifiers which is already tested in swh.model """ - actual_date = utils.normalize_date('2017') + actual_date = utils.normalize_date("2017") - expected_date = '2017-01-01 00:00:00+00:00' + expected_date = "2017-01-01 00:00:00+00:00" assert str(actual_date) == expected_date diff --git a/swh/deposit/urls.py b/swh/deposit/urls.py index f0d26d13..384844c3 100644 --- a/swh/deposit/urls.py +++ b/swh/deposit/urls.py @@ -1,30 +1,31 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """SWH's main deposit URL Configuration """ from django.conf.urls import url, include from django.shortcuts import render from django.views.generic.base import RedirectView from rest_framework.urlpatterns import format_suffix_patterns -favicon_view = RedirectView.as_view(url='/static/img/icons/swh-logo-32x32.png', - permanent=True) +favicon_view = RedirectView.as_view( + url="/static/img/icons/swh-logo-32x32.png", permanent=True +) def default_view(req): return render(req, "homepage.html") urlpatterns = [ - url(r'^favicon\.ico$', favicon_view), - url(r'^1/', include('swh.deposit.api.urls')), - url(r'^1/private/', include('swh.deposit.api.private.urls')), - url(r'^$', default_view, name='home'), + url(r"^favicon\.ico$", favicon_view), + url(r"^1/", include("swh.deposit.api.urls")), + url(r"^1/private/", include("swh.deposit.api.private.urls")), + url(r"^$", default_view, name="home"), ] urlpatterns = format_suffix_patterns(urlpatterns) diff --git a/swh/deposit/utils.py b/swh/deposit/utils.py index 86775ac3..ee3711db 100644 --- a/swh/deposit/utils.py +++ b/swh/deposit/utils.py @@ -1,83 +1,83 @@ # Copyright (C) 2018-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import iso8601 from types import GeneratorType from swh.model.identifiers import normalize_timestamp def merge(*dicts): """Given an iterator of dicts, merge them losing no information. Args: *dicts: arguments are all supposed to be dict to merge into one Returns: dict merged without losing information """ + def _extend(existing_val, value): """Given an existing value and a value (as potential lists), merge them together without repetition. """ if isinstance(value, (list, map, GeneratorType)): vals = value else: vals = [value] for v in vals: if v in existing_val: continue existing_val.append(v) return existing_val d = {} for data in dicts: if not isinstance(data, dict): - raise ValueError( - 'dicts is supposed to be a variable arguments of dict') + raise ValueError("dicts is supposed to be a variable arguments of dict") for key, value in data.items(): existing_val = d.get(key) if not existing_val: d[key] = value continue if isinstance(existing_val, (list, map, GeneratorType)): new_val = _extend(existing_val, value) elif isinstance(existing_val, dict): if isinstance(value, dict): new_val = merge(existing_val, value) else: new_val = _extend([existing_val], value) else: new_val = _extend([existing_val], value) d[key] = new_val return d def normalize_date(date): """Normalize date fields as expected by swh workers. If date is a list, elect arbitrarily the first element of that list If date is (then) a string, parse it through dateutil.parser.parse to extract a datetime. Then normalize it through swh.model.identifiers.normalize_timestamp. Returns The swh date object """ if isinstance(date, list): date = date[0] if isinstance(date, str): date = iso8601.parse_date(date) return normalize_timestamp(date) diff --git a/tox.ini b/tox.ini index e81b324f..192894e2 100644 --- a/tox.ini +++ b/tox.ini @@ -1,37 +1,44 @@ [tox] envlist=flake8,mypy,py3-django{1,2} [testenv] extras = testing deps = # the dependency below is needed for now as a workaround for # https://github.com/pypa/pip/issues/6239 swh.core[http] >= 0.0.75 dev: ipdb pytest-cov django1: Django>=1.11,<2 django2: Django>=2,<3 commands = pytest \ !dev: --cov {envsitepackagesdir}/swh/deposit --cov-branch \ {envsitepackagesdir}/swh/deposit \ {posargs} +[testenv:black] +skip_install = true +deps = + black +commands = + {envpython} -m black --check swh + [testenv:flake8] skip_install = true deps = flake8 commands = {envpython} -m flake8 \ --exclude=.tox,.git,__pycache__,.tox,.eggs,*.egg,swh/deposit/migrations [testenv:mypy] setenv = DJANGO_SETTINGS_MODULE=swh.deposit.settings.testing extras = testing deps = mypy django-stubs commands = mypy swh