diff --git a/swh/deposit/api/private/__init__.py b/swh/deposit/api/private/__init__.py index c52363d8..3a18be5e 100644 --- a/swh/deposit/api/private/__init__.py +++ b/swh/deposit/api/private/__init__.py @@ -1,109 +1,109 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.deposit import utils from ...config import METADATA_TYPE, APIConfig from ...models import DepositRequest, Deposit from rest_framework.permissions import AllowAny from swh.deposit.api.common import AuthenticatedAPIView from swh.deposit.errors import make_error_dict, NOT_FOUND class DepositReadMixin: """Deposit Read mixin """ def _deposit_requests(self, deposit, request_type): """Given a deposit, yields its associated deposit_request Args: deposit (Deposit): Deposit to list requests for request_type (str): 'archive' or 'metadata' Yields: deposit requests of type request_type associated to the deposit """ if isinstance(deposit, int): deposit = Deposit.objects.get(pk=deposit) deposit_requests = DepositRequest.objects.filter( type=request_type, deposit=deposit ).order_by("id") for deposit_request in deposit_requests: yield deposit_request def _metadata_get(self, deposit): """Given a deposit, aggregate all metadata requests. Args: deposit (Deposit): The deposit instance to extract metadata from. Returns: metadata dict from the deposit. """ metadata = ( m.metadata for m in self._deposit_requests(deposit, request_type=METADATA_TYPE) ) return utils.merge(*metadata) -class SWHPrivateAPIView(APIConfig, AuthenticatedAPIView): +class APIPrivateView(APIConfig, AuthenticatedAPIView): """Mixin intended as private api (so no authentication) based API view (for the private ones). """ authentication_classes = () permission_classes = (AllowAny,) def checks(self, req, collection_name, deposit_id=None): """Override default checks implementation to allow empty collection. """ if deposit_id: try: Deposit.objects.get(pk=deposit_id) except Deposit.DoesNotExist: return make_error_dict( NOT_FOUND, "Deposit with id %s does not exist" % deposit_id ) headers = self._read_headers(req) checks = self.additional_checks(req, headers, collection_name, deposit_id) if "error" in checks: return checks return {"headers": headers} def get( self, request, collection_name=None, deposit_id=None, format=None, *args, **kwargs, ): return super().get(request, collection_name, deposit_id, format) def put( self, request, collection_name=None, deposit_id=None, format=None, *args, **kwargs, ): return super().put(request, collection_name, deposit_id, format) diff --git a/swh/deposit/api/private/deposit_check.py b/swh/deposit/api/private/deposit_check.py index 7e654045..dc9a8307 100644 --- a/swh/deposit/api/private/deposit_check.py +++ b/swh/deposit/api/private/deposit_check.py @@ -1,230 +1,230 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json import re import tarfile import zipfile from itertools import chain from shutil import get_unpack_formats from rest_framework import status from swh.scheduler.utils import create_oneshot_task_dict -from . import DepositReadMixin, SWHPrivateAPIView +from . import DepositReadMixin, APIPrivateView from ..common import APIGet from ...config import DEPOSIT_STATUS_VERIFIED, DEPOSIT_STATUS_REJECTED from ...config import ARCHIVE_TYPE from ...models import Deposit MANDATORY_FIELDS_MISSING = "Mandatory fields are missing" ALTERNATE_FIELDS_MISSING = "Mandatory alternate fields are missing" MANDATORY_ARCHIVE_UNREADABLE = ( "At least one of its associated archives is not readable" # noqa ) MANDATORY_ARCHIVE_INVALID = ( "Mandatory archive is invalid (i.e contains only one archive)" # noqa ) MANDATORY_ARCHIVE_UNSUPPORTED = "Mandatory archive type is not supported" MANDATORY_ARCHIVE_MISSING = "Deposit without archive is rejected" ARCHIVE_EXTENSIONS = [ "zip", "tar", "tar.gz", "xz", "tar.xz", "bz2", "tar.bz2", "Z", "tar.Z", "tgz", "7z", ] PATTERN_ARCHIVE_EXTENSION = re.compile(r".*\.(%s)$" % "|".join(ARCHIVE_EXTENSIONS)) def known_archive_format(filename): return any( filename.endswith(t) for t in chain(*(x[1] for x in get_unpack_formats())) ) -class SWHChecksDeposit(SWHPrivateAPIView, APIGet, DepositReadMixin): +class APIChecks(APIPrivateView, APIGet, DepositReadMixin): """Dedicated class to read a deposit's raw archives content. Only GET is supported. """ def _check_deposit_archives(self, deposit): """Given a deposit, check each deposit request of type archive. Args: The deposit to check archives for Returns tuple (status, error_detail): True, None if all archives are ok, (False, ) otherwise. """ requests = list(self._deposit_requests(deposit, request_type=ARCHIVE_TYPE)) if len(requests) == 0: # no associated archive is refused return False, {"archive": [{"summary": MANDATORY_ARCHIVE_MISSING,}]} errors = [] for archive_request in requests: check, error_message = self._check_archive(archive_request) if not check: errors.append( {"summary": error_message, "fields": [archive_request.id]} ) if not errors: return True, None return False, {"archive": errors} def _check_archive(self, archive_request): """Check that a deposit associated archive is ok: - readable - supported archive format - valid content: the archive does not contain a single archive file If any of those checks are not ok, return the corresponding failing check. Args: archive_path (DepositRequest): Archive to check Returns: (True, None) if archive is check compliant, (False, ) otherwise. """ archive_path = archive_request.archive.path if not known_archive_format(archive_path): return False, MANDATORY_ARCHIVE_UNSUPPORTED try: if zipfile.is_zipfile(archive_path): with zipfile.ZipFile(archive_path) as f: files = f.namelist() elif tarfile.is_tarfile(archive_path): with tarfile.open(archive_path) as f: files = f.getnames() else: return False, MANDATORY_ARCHIVE_UNSUPPORTED except Exception: return False, MANDATORY_ARCHIVE_UNREADABLE if len(files) > 1: return True, None element = files[0] if PATTERN_ARCHIVE_EXTENSION.match(element): # archive in archive! return False, MANDATORY_ARCHIVE_INVALID return True, None def _check_metadata(self, metadata): """Check to execute on all metadata for mandatory field presence. Args: metadata (dict): Metadata dictionary to check for mandatory fields Returns: tuple (status, error_detail): True, None if metadata are ok (False, ) otherwise. """ required_fields = { "author": False, } alternate_fields = { ("name", "title"): False, # alternate field, at least one # of them must be present } for field, value in metadata.items(): for name in required_fields: if name in field: required_fields[name] = True for possible_names in alternate_fields: for possible_name in possible_names: if possible_name in field: alternate_fields[possible_names] = True continue mandatory_result = [k for k, v in required_fields.items() if not v] optional_result = [" or ".join(k) for k, v in alternate_fields.items() if not v] if mandatory_result == [] and optional_result == []: return True, None detail = [] if mandatory_result != []: detail.append( {"summary": MANDATORY_FIELDS_MISSING, "fields": mandatory_result} ) if optional_result != []: detail.append( {"summary": ALTERNATE_FIELDS_MISSING, "fields": optional_result,} ) return False, {"metadata": detail} def process_get(self, req, collection_name, deposit_id): """Build a unique tarball from the multiple received and stream that content to the client. Args: req (Request): collection_name (str): Collection owning the deposit deposit_id (id): Deposit concerned by the reading Returns: Tuple status, stream of content, content-type """ deposit = Deposit.objects.get(pk=deposit_id) metadata = self._metadata_get(deposit) problems = {} # will check each deposit's associated request (both of type # archive and metadata) for errors archives_status, error_detail = self._check_deposit_archives(deposit) if not archives_status: problems.update(error_detail) metadata_status, error_detail = self._check_metadata(metadata) if not metadata_status: problems.update(error_detail) deposit_status = archives_status and metadata_status # if any problems arose, the deposit is rejected if not deposit_status: deposit.status = DEPOSIT_STATUS_REJECTED deposit.status_detail = problems response = { "status": deposit.status, "details": deposit.status_detail, } else: deposit.status = DEPOSIT_STATUS_VERIFIED response = { "status": deposit.status, } if not deposit.load_task_id and self.config["checks"]: url = deposit.origin_url task = create_oneshot_task_dict( "load-deposit", url=url, deposit_id=deposit.id, retries_left=3 ) load_task_id = self.scheduler.create_tasks([task])[0]["id"] deposit.load_task_id = load_task_id deposit.save() return status.HTTP_200_OK, json.dumps(response), "application/json" diff --git a/swh/deposit/api/private/deposit_list.py b/swh/deposit/api/private/deposit_list.py index c63a14df..43089a8b 100644 --- a/swh/deposit/api/private/deposit_list.py +++ b/swh/deposit/api/private/deposit_list.py @@ -1,66 +1,66 @@ # Copyright (C) 2018-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from rest_framework.fields import _UnvalidatedField from rest_framework.generics import ListAPIView from rest_framework.pagination import PageNumberPagination from rest_framework import serializers -from . import SWHPrivateAPIView +from . import APIPrivateView from ..converters import convert_status_detail from ...models import Deposit class DefaultPagination(PageNumberPagination): page_size = 100 page_size_query_param = "page_size" class StatusDetailField(_UnvalidatedField): """status_detail field is a dict, we want a simple message instead. So, we reuse the convert_status_detail from deposit_status endpoint to that effect. """ def to_representation(self, value): return convert_status_detail(value) class DepositSerializer(serializers.ModelSerializer): status_detail = StatusDetailField() class Meta: model = Deposit fields = "__all__" -class DepositList(ListAPIView, SWHPrivateAPIView): +class APIList(ListAPIView, APIPrivateView): """Deposit request class to list the deposit's status per page. HTTP verbs supported: GET """ serializer_class = DepositSerializer pagination_class = DefaultPagination def get_queryset(self): params = self.request.query_params exclude_like = params.get("exclude") if exclude_like: # sql injection: A priori, nothing to worry about, django does it for # queryset # https://docs.djangoproject.com/en/3.0/topics/security/#sql-injection-protection # noqa # https://docs.djangoproject.com/en/2.2/topics/security/#sql-injection-protection # noqa deposits = ( Deposit.objects.all() .exclude(external_id__startswith=exclude_like) .order_by("id") ) else: deposits = Deposit.objects.all().order_by("id") return deposits diff --git a/swh/deposit/api/private/deposit_read.py b/swh/deposit/api/private/deposit_read.py index 6a647871..e6444b0f 100644 --- a/swh/deposit/api/private/deposit_read.py +++ b/swh/deposit/api/private/deposit_read.py @@ -1,216 +1,216 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json import os import shutil import tempfile from contextlib import contextmanager from django.http import FileResponse from rest_framework import status from swh.core import tarball from swh.model import identifiers from swh.deposit.utils import normalize_date -from . import DepositReadMixin, SWHPrivateAPIView +from . import DepositReadMixin, APIPrivateView from ...config import SWH_PERSON, ARCHIVE_TYPE from ..common import APIGet from ...models import Deposit @contextmanager def aggregate_tarballs(extraction_dir, archive_paths): """Aggregate multiple tarballs into one and returns this new archive's path. Args: extraction_dir (path): Path to use for the tarballs computation archive_paths ([str]): Deposit's archive paths Returns: Tuple (directory to clean up, archive path (aggregated or not)) """ # rebuild one zip archive from (possibly) multiple ones os.makedirs(extraction_dir, 0o755, exist_ok=True) dir_path = tempfile.mkdtemp(prefix="swh.deposit-", dir=extraction_dir) # root folder to build an aggregated tarball aggregated_tarball_rootdir = os.path.join(dir_path, "aggregate") os.makedirs(aggregated_tarball_rootdir, 0o755, exist_ok=True) # uncompress in a temporary location all archives for archive_path in archive_paths: tarball.uncompress(archive_path, aggregated_tarball_rootdir) # Aggregate into one big tarball the multiple smaller ones temp_tarpath = shutil.make_archive( aggregated_tarball_rootdir, "zip", aggregated_tarball_rootdir ) # can already clean up temporary directory shutil.rmtree(aggregated_tarball_rootdir) try: yield temp_tarpath finally: shutil.rmtree(dir_path) -class SWHDepositReadArchives(SWHPrivateAPIView, APIGet, DepositReadMixin): +class APIReadArchives(APIPrivateView, APIGet, DepositReadMixin): """Dedicated class to read a deposit's raw archives content. Only GET is supported. """ ADDITIONAL_CONFIG = { "extraction_dir": ("str", "/tmp/swh-deposit/archive/"), } def __init__(self): super().__init__() self.extraction_dir = self.config["extraction_dir"] if not os.path.exists(self.extraction_dir): os.makedirs(self.extraction_dir) def process_get(self, request, collection_name, deposit_id): """Build a unique tarball from the multiple received and stream that content to the client. Args: request (Request): collection_name (str): Collection owning the deposit deposit_id (id): Deposit concerned by the reading Returns: Tuple status, stream of content, content-type """ archive_paths = [ r.archive.path for r in self._deposit_requests(deposit_id, request_type=ARCHIVE_TYPE) ] with aggregate_tarballs(self.extraction_dir, archive_paths) as path: return FileResponse( open(path, "rb"), status=status.HTTP_200_OK, content_type="application/zip", ) -class SWHDepositReadMetadata(SWHPrivateAPIView, APIGet, DepositReadMixin): +class APIReadMetadata(APIPrivateView, APIGet, DepositReadMixin): """Class in charge of aggregating metadata on a deposit. """ ADDITIONAL_CONFIG = { "provider": ( "dict", { # 'provider_name': '', # those are not set since read from the # 'provider_url': '', # deposit's client "provider_type": "deposit_client", "metadata": {}, }, ), "tool": ( "dict", { "name": "swh-deposit", "version": "0.0.1", "configuration": {"sword_version": "2"}, }, ), } def __init__(self): super().__init__() self.provider = self.config["provider"] self.tool = self.config["tool"] def _normalize_dates(self, deposit, metadata): """Normalize the date to use as a tuple of author date, committer date from the incoming metadata. Args: deposit (Deposit): Deposit model representation metadata (Dict): Metadata dict representation Returns: Tuple of author date, committer date. Those dates are swh normalized. """ commit_date = metadata.get("codemeta:datePublished") author_date = metadata.get("codemeta:dateCreated") if author_date and commit_date: pass elif commit_date: author_date = commit_date elif author_date: commit_date = author_date else: author_date = deposit.complete_date commit_date = deposit.complete_date return (normalize_date(author_date), normalize_date(commit_date)) def metadata_read(self, deposit): """Read and aggregate multiple data on deposit into one unified data dictionary. Args: deposit (Deposit): Deposit concerned by the data aggregation. Returns: Dictionary of data representing the deposit to inject in swh. """ metadata = self._metadata_get(deposit) # Read information metadata data = {"origin": {"type": "deposit", "url": deposit.origin_url,}} # metadata provider self.provider["provider_name"] = deposit.client.last_name self.provider["provider_url"] = deposit.client.provider_url author_date, commit_date = self._normalize_dates(deposit, metadata) if deposit.parent: swh_persistent_id = deposit.parent.swh_id swhid = identifiers.parse_swhid(swh_persistent_id) parent_revision = swhid.object_id parents = [parent_revision] else: parents = [] data["origin_metadata"] = { "provider": self.provider, "tool": self.tool, "metadata": metadata, } data["deposit"] = { "id": deposit.id, "client": deposit.client.username, "collection": deposit.collection.name, "author": SWH_PERSON, "author_date": author_date, "committer": SWH_PERSON, "committer_date": commit_date, "revision_parents": parents, } return data def process_get(self, request, collection_name, deposit_id): deposit = Deposit.objects.get(pk=deposit_id) data = self.metadata_read(deposit) d = {} if data: d = json.dumps(data) return status.HTTP_200_OK, d, "application/json" diff --git a/swh/deposit/api/private/deposit_update_status.py b/swh/deposit/api/private/deposit_update_status.py index 24678a6c..fcf0492b 100644 --- a/swh/deposit/api/private/deposit_update_status.py +++ b/swh/deposit/api/private/deposit_update_status.py @@ -1,105 +1,105 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from rest_framework.parsers import JSONParser from swh.model.identifiers import DIRECTORY, swhid, REVISION, SNAPSHOT -from . import SWHPrivateAPIView +from . import APIPrivateView from ..common import APIPut from ...errors import make_error_dict, BAD_REQUEST from ...models import Deposit, DEPOSIT_STATUS_DETAIL from ...models import DEPOSIT_STATUS_LOAD_SUCCESS MANDATORY_KEYS = ["origin_url", "revision_id", "directory_id", "snapshot_id"] -class SWHUpdateStatusDeposit(SWHPrivateAPIView, APIPut): +class APIUpdateStatus(APIPrivateView, APIPut): """Deposit request class to update the deposit's status. HTTP verbs supported: PUT """ parser_classes = (JSONParser,) def additional_checks(self, request, headers, collection_name, deposit_id=None): """Enrich existing checks to the default ones. New checks: - Ensure the status is provided - Ensure it exists - no missing information on load success update """ data = request.data status = data.get("status") if not status: msg = "The status key is mandatory with possible values %s" % list( DEPOSIT_STATUS_DETAIL.keys() ) return make_error_dict(BAD_REQUEST, msg) if status not in DEPOSIT_STATUS_DETAIL: msg = "Possible status in %s" % list(DEPOSIT_STATUS_DETAIL.keys()) return make_error_dict(BAD_REQUEST, msg) if status == DEPOSIT_STATUS_LOAD_SUCCESS: missing_keys = [] for key in MANDATORY_KEYS: value = data.get(key) if value is None: missing_keys.append(key) if missing_keys: msg = ( f"Updating deposit status to {status}" f" requires information {','.join(missing_keys)}" ) return make_error_dict(BAD_REQUEST, msg) return {} def process_put(self, request, headers, collection_name, deposit_id): """Update the deposit with status and SWHIDs Returns: 204 No content 400 Bad request if checks fail """ data = request.data deposit = Deposit.objects.get(pk=deposit_id) status = data["status"] deposit.status = status if status == DEPOSIT_STATUS_LOAD_SUCCESS: origin_url = data["origin_url"] directory_id = data["directory_id"] revision_id = data["revision_id"] dir_id = swhid(DIRECTORY, directory_id) snp_id = swhid(SNAPSHOT, data["snapshot_id"]) rev_id = swhid(REVISION, revision_id) deposit.swh_id = dir_id # new id with contextual information deposit.swh_id_context = swhid( DIRECTORY, directory_id, metadata={ "origin": origin_url, "visit": snp_id, "anchor": rev_id, "path": "/", }, ) else: # rejected deposit.status = status deposit.save() return {} diff --git a/swh/deposit/api/private/urls.py b/swh/deposit/api/private/urls.py index 19330bbf..45ffd309 100644 --- a/swh/deposit/api/private/urls.py +++ b/swh/deposit/api/private/urls.py @@ -1,79 +1,79 @@ -# Copyright (C) 2017-2018 The Software Heritage developers +# Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.conf.urls import url from ...config import ( PRIVATE_GET_RAW_CONTENT, PRIVATE_PUT_DEPOSIT, PRIVATE_GET_DEPOSIT_METADATA, PRIVATE_CHECK_DEPOSIT, PRIVATE_LIST_DEPOSITS, ) -from .deposit_read import SWHDepositReadArchives -from .deposit_read import SWHDepositReadMetadata -from .deposit_update_status import SWHUpdateStatusDeposit -from .deposit_check import SWHChecksDeposit -from .deposit_list import DepositList +from .deposit_read import APIReadArchives +from .deposit_read import APIReadMetadata +from .deposit_update_status import APIUpdateStatus +from .deposit_check import APIChecks +from .deposit_list import APIList urlpatterns = [ # Retrieve deposit's raw archives' content # -> GET url( r"^(?P[^/]+)/(?P[^/]+)/raw/$", - SWHDepositReadArchives.as_view(), + APIReadArchives.as_view(), name=PRIVATE_GET_RAW_CONTENT, ), # Update deposit's status # -> PUT url( r"^(?P[^/]+)/(?P[^/]+)/update/$", - SWHUpdateStatusDeposit.as_view(), + APIUpdateStatus.as_view(), name=PRIVATE_PUT_DEPOSIT, ), # Retrieve metadata information on a specific deposit # -> GET url( r"^(?P[^/]+)/(?P[^/]+)/meta/$", - SWHDepositReadMetadata.as_view(), + APIReadMetadata.as_view(), name=PRIVATE_GET_DEPOSIT_METADATA, ), # Check archive and metadata information on a specific deposit # -> GET url( r"^(?P[^/]+)/(?P[^/]+)/check/$", - SWHChecksDeposit.as_view(), + APIChecks.as_view(), name=PRIVATE_CHECK_DEPOSIT, ), # Retrieve deposit's raw archives' content # -> GET url( r"^(?P[^/]+)/raw/$", - SWHDepositReadArchives.as_view(), + APIReadArchives.as_view(), name=PRIVATE_GET_RAW_CONTENT + "-nc", ), # Update deposit's status # -> PUT url( r"^(?P[^/]+)/update/$", - SWHUpdateStatusDeposit.as_view(), + APIUpdateStatus.as_view(), name=PRIVATE_PUT_DEPOSIT + "-nc", ), # Retrieve metadata information on a specific deposit # -> GET url( r"^(?P[^/]+)/meta/$", - SWHDepositReadMetadata.as_view(), + APIReadMetadata.as_view(), name=PRIVATE_GET_DEPOSIT_METADATA + "-nc", ), # Check archive and metadata information on a specific deposit # -> GET url( r"^(?P[^/]+)/check/$", - SWHChecksDeposit.as_view(), + APIChecks.as_view(), name=PRIVATE_CHECK_DEPOSIT + "-nc", ), - url(r"^deposits/$", DepositList.as_view(), name=PRIVATE_LIST_DEPOSITS), + url(r"^deposits/$", APIList.as_view(), name=PRIVATE_LIST_DEPOSITS), ] diff --git a/swh/deposit/tests/api/conftest.py b/swh/deposit/tests/api/conftest.py index 614f4400..7f626c1d 100644 --- a/swh/deposit/tests/api/conftest.py +++ b/swh/deposit/tests/api/conftest.py @@ -1,94 +1,94 @@ # Copyright (C) 2019-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os import hashlib import pytest from django.urls import reverse from swh.deposit.config import ( DEPOSIT_STATUS_DEPOSITED, COL_IRI, DEPOSIT_STATUS_VERIFIED, ) from swh.deposit.models import Deposit from swh.deposit.parsers import parse_xml -from swh.deposit.api.private.deposit_check import SWHChecksDeposit +from swh.deposit.api.private.deposit_check import APIChecks @pytest.fixture def datadir(request): """Override default datadir to target main test datadir""" return os.path.join(os.path.dirname(str(request.fspath)), "../data") @pytest.fixture def ready_deposit_ok(partial_deposit_with_metadata): """Returns a deposit ready for checks (it will pass the checks). """ deposit = partial_deposit_with_metadata deposit.status = DEPOSIT_STATUS_DEPOSITED deposit.save() return deposit @pytest.fixture def ready_deposit_verified(partial_deposit_with_metadata): """Returns a deposit ready for checks (it will pass the checks). """ deposit = partial_deposit_with_metadata deposit.status = DEPOSIT_STATUS_VERIFIED deposit.save() return deposit @pytest.fixture def ready_deposit_only_metadata(partial_deposit_only_metadata): """Deposit in status ready that will fail the checks (because missing archive). """ deposit = partial_deposit_only_metadata deposit.status = DEPOSIT_STATUS_DEPOSITED deposit.save() return deposit @pytest.fixture def ready_deposit_invalid_archive(authenticated_client, deposit_collection): url = reverse(COL_IRI, args=[deposit_collection.name]) data = b"some data which is clearly not a zip file" md5sum = hashlib.md5(data).hexdigest() # when response = authenticated_client.post( url, content_type="application/zip", # as zip data=data, # + headers CONTENT_LENGTH=len(data), # other headers needs HTTP_ prefix to be taken into account HTTP_SLUG="external-id-invalid", HTTP_CONTENT_MD5=md5sum, HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", HTTP_CONTENT_DISPOSITION="attachment; filename=filename0", ) response_content = parse_xml(response.content) deposit_id = int(response_content["deposit_id"]) deposit = Deposit.objects.get(pk=deposit_id) deposit.status = DEPOSIT_STATUS_DEPOSITED deposit.save() return deposit @pytest.fixture def swh_checks_deposit(): - return SWHChecksDeposit() + return APIChecks()