diff --git a/swh/deposit/api/collection.py b/swh/deposit/api/collection.py index 0efced43..2e0b6d90 100644 --- a/swh/deposit/api/collection.py +++ b/swh/deposit/api/collection.py @@ -1,102 +1,102 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from typing import Any, Dict, Optional, Tuple from rest_framework import status -from ..config import EDIT_SE_IRI +from ..config import EDIT_IRI from ..parsers import ( SWHAtomEntryParser, SWHFileUploadTarParser, SWHFileUploadZipParser, SWHMultiPartParser, ) from .common import ACCEPT_ARCHIVE_CONTENT_TYPES, APIPost class CollectionAPI(APIPost): """Deposit request class defining api endpoints for sword deposit. What's known as 'Col-IRI' in the sword specification. HTTP verbs supported: POST """ parser_classes = ( SWHMultiPartParser, SWHFileUploadZipParser, SWHFileUploadTarParser, SWHAtomEntryParser, ) def process_post( self, req, headers: Dict[str, Any], collection_name: str, deposit_id: Optional[int] = None, ) -> Tuple[int, str, Dict[str, Any]]: """Create a first deposit as: - archive deposit (1 zip) - multipart (1 zip + 1 atom entry) - atom entry Args: req (Request): the request holding the information to parse and inject in db collection_name (str): the associated client Returns: An http response (HttpResponse) according to the situation. If everything is ok, a 201 response (created) with a deposit receipt. Otherwise, depending on the upload, the following errors can be returned: - archive deposit: - 400 (bad request) if the request is not providing an external identifier - 403 (forbidden) if the length of the archive exceeds the max size configured - 412 (precondition failed) if the length or hash provided mismatch the reality of the archive. - 415 (unsupported media type) if a wrong media type is provided - multipart deposit: - 400 (bad request) if the request is not providing an external identifier - 412 (precondition failed) if the potentially md5 hash provided mismatch the reality of the archive - 415 (unsupported media type) if a wrong media type is provided - Atom entry deposit: - 400 (bad request) if the request is not providing an external identifier - 400 (bad request) if the request's body is empty - 415 (unsupported media type) if a wrong media type is provided """ assert deposit_id is None if req.content_type in ACCEPT_ARCHIVE_CONTENT_TYPES: data = self._binary_upload( req, headers, collection_name, check_slug_is_present=True ) elif req.content_type.startswith("multipart/"): data = self._multipart_upload( req, headers, collection_name, check_slug_is_present=True ) else: data = self._atom_entry( req, headers, collection_name, check_slug_is_present=True ) - return status.HTTP_201_CREATED, EDIT_SE_IRI, data + return status.HTTP_201_CREATED, EDIT_IRI, data diff --git a/swh/deposit/api/common.py b/swh/deposit/api/common.py index 123f7e6d..565138fa 100644 --- a/swh/deposit/api/common.py +++ b/swh/deposit/api/common.py @@ -1,1192 +1,1193 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from abc import ABCMeta, abstractmethod import datetime import hashlib import json from typing import Any, Dict, Optional, Sequence, Tuple, Type, Union import attr from django.http import FileResponse, HttpResponse from django.shortcuts import render from django.urls import reverse from django.utils import timezone from rest_framework import status from rest_framework.authentication import BaseAuthentication, BasicAuthentication from rest_framework.permissions import BasePermission, IsAuthenticated from rest_framework.request import Request from rest_framework.views import APIView from swh.deposit.api.checks import check_metadata from swh.deposit.api.converters import convert_status_detail from swh.deposit.models import Deposit from swh.deposit.utils import compute_metadata_context from swh.model import hashutil from swh.model.identifiers import SWHID, ValidationError from swh.model.model import ( MetadataAuthority, MetadataAuthorityType, MetadataFetcher, RawExtrinsicMetadata, ) from swh.scheduler.utils import create_oneshot_task_dict from ..config import ( ARCHIVE_KEY, ARCHIVE_TYPE, CONT_FILE_IRI, DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_LOAD_SUCCESS, DEPOSIT_STATUS_PARTIAL, - EDIT_SE_IRI, + EDIT_IRI, EM_IRI, METADATA_KEY, METADATA_TYPE, RAW_METADATA_KEY, + SE_IRI, STATE_IRI, APIConfig, ) from ..errors import ( BAD_REQUEST, CHECKSUM_MISMATCH, ERROR_CONTENT, FORBIDDEN, MAX_UPLOAD_SIZE_EXCEEDED, MEDIATION_NOT_ALLOWED, METHOD_NOT_ALLOWED, NOT_FOUND, PARSING_ERROR, BadRequestError, ParserError, make_error_dict, make_error_response, make_error_response_from_dict, make_missing_slug_error, ) from ..models import DepositClient, DepositCollection, DepositRequest from ..parsers import parse_swh_reference, parse_xml ACCEPT_PACKAGINGS = ["http://purl.org/net/sword/package/SimpleZip"] ACCEPT_ARCHIVE_CONTENT_TYPES = ["application/zip", "application/x-tar"] class AuthenticatedAPIView(APIView): """Mixin intended as a based API view to enforce the basic authentication check """ authentication_classes: Sequence[Type[BaseAuthentication]] = (BasicAuthentication,) permission_classes: Sequence[Type[BasePermission]] = (IsAuthenticated,) class APIBase(APIConfig, AuthenticatedAPIView, metaclass=ABCMeta): """Base deposit request class sharing multiple common behaviors. """ def _read_headers(self, request: Request) -> Dict[str, Any]: """Read and unify the necessary headers from the request (those are not stored in the same location or not properly formatted). Args: request (Request): Input request Returns: Dictionary with the following keys (some associated values may be None): - content-type - content-length - in-progress - content-disposition - packaging - slug - on-behalf-of """ meta = request._request.META content_type = request.content_type content_length = meta.get("CONTENT_LENGTH") if content_length and isinstance(content_length, str): content_length = int(content_length) # final deposit if not provided in_progress = meta.get("HTTP_IN_PROGRESS", False) content_disposition = meta.get("HTTP_CONTENT_DISPOSITION") if isinstance(in_progress, str): in_progress = in_progress.lower() == "true" content_md5sum = meta.get("HTTP_CONTENT_MD5") if content_md5sum: content_md5sum = bytes.fromhex(content_md5sum) packaging = meta.get("HTTP_PACKAGING") slug = meta.get("HTTP_SLUG") on_behalf_of = meta.get("HTTP_ON_BEHALF_OF") metadata_relevant = meta.get("HTTP_METADATA_RELEVANT") swhid = meta.get("HTTP_X_CHECK_SWHID") return { "content-type": content_type, "content-length": content_length, "in-progress": in_progress, "content-disposition": content_disposition, "content-md5sum": content_md5sum, "packaging": packaging, "slug": slug, "on-behalf-of": on_behalf_of, "metadata-relevant": metadata_relevant, "swhid": swhid, } def _compute_md5(self, filehandler) -> bytes: """Compute uploaded file's md5 sum. Args: filehandler (InMemoryUploadedFile): the file to compute the md5 hash Returns: the md5 checksum (str) """ h = hashlib.md5() for chunk in filehandler: h.update(chunk) return h.digest() def _deposit_put( self, request: Request, deposit_id: Optional[int] = None, in_progress: bool = False, external_id: Optional[str] = None, ) -> Deposit: """Save/Update a deposit in db. Args: request: request data deposit_id: deposit identifier in_progress: deposit status external_id: external identifier to associate to the deposit Returns: The Deposit instance saved or updated. """ complete_date: Optional[datetime.datetime] = None deposit_parent: Optional[Deposit] = None if in_progress is False: complete_date = timezone.now() status_type = DEPOSIT_STATUS_DEPOSITED else: status_type = DEPOSIT_STATUS_PARTIAL if not deposit_id: try: # find a deposit parent (same external id, status load to success) deposit_parent = ( Deposit.objects.filter( external_id=external_id, status=DEPOSIT_STATUS_LOAD_SUCCESS ) .order_by("-id")[0:1] .get() ) except Deposit.DoesNotExist: # then no parent for that deposit, deposit_parent already None pass deposit = Deposit( collection=self._collection, external_id=external_id or "", complete_date=complete_date, status=status_type, client=self._client, parent=deposit_parent, ) else: deposit = Deposit.objects.get(pk=deposit_id) # update metadata deposit.complete_date = complete_date deposit.status = status_type if self.config["checks"]: deposit.save() # needed to have a deposit id scheduler = self.scheduler if deposit.status == DEPOSIT_STATUS_DEPOSITED and not deposit.check_task_id: task = create_oneshot_task_dict( "check-deposit", collection=deposit.collection.name, deposit_id=deposit.id, retries_left=3, ) check_task_id = scheduler.create_tasks([task])[0]["id"] deposit.check_task_id = check_task_id deposit.save() return deposit def _deposit_request_put( self, deposit: Deposit, deposit_request_data: Dict[str, Any], replace_metadata: bool = False, replace_archives: bool = False, ) -> DepositRequest: """Save a deposit request with metadata attached to a deposit. Args: deposit: The deposit concerned by the request deposit_request_data: The dictionary with at most 2 deposit request types (archive, metadata) to associate to the deposit replace_metadata: Flag defining if we add or update existing metadata to the deposit replace_archives: Flag defining if we add or update archives to existing deposit Returns: the DepositRequest object stored in the backend """ if replace_metadata: DepositRequest.objects.filter(deposit=deposit, type=METADATA_TYPE).delete() if replace_archives: DepositRequest.objects.filter(deposit=deposit, type=ARCHIVE_TYPE).delete() deposit_request = None archive_file = deposit_request_data.get(ARCHIVE_KEY) if archive_file: deposit_request = DepositRequest( type=ARCHIVE_TYPE, deposit=deposit, archive=archive_file ) deposit_request.save() metadata = deposit_request_data.get(METADATA_KEY) if metadata: raw_metadata = deposit_request_data[RAW_METADATA_KEY] deposit_request = DepositRequest( type=METADATA_TYPE, deposit=deposit, metadata=metadata, raw_metadata=raw_metadata.decode("utf-8"), ) deposit_request.save() assert deposit_request is not None return deposit_request def _delete_archives(self, collection_name: str, deposit_id: int) -> Dict: """Delete archive references from the deposit id. """ try: deposit = Deposit.objects.get(pk=deposit_id) except Deposit.DoesNotExist: return make_error_dict( NOT_FOUND, f"The deposit {deposit_id} does not exist" ) DepositRequest.objects.filter(deposit=deposit, type=ARCHIVE_TYPE).delete() return {} def _delete_deposit(self, collection_name: str, deposit_id: int) -> Dict: """Delete deposit reference. Args: collection_name: Client's collection deposit_id: The deposit to delete Returns Empty dict when ok. Dict with error key to describe the failure. """ try: deposit = Deposit.objects.get(pk=deposit_id) except Deposit.DoesNotExist: return make_error_dict( NOT_FOUND, f"The deposit {deposit_id} does not exist" ) if deposit.collection.name != collection_name: summary = "Cannot delete a deposit from another collection" description = "Deposit %s does not belong to the collection %s" % ( deposit_id, collection_name, ) return make_error_dict( BAD_REQUEST, summary=summary, verbose_description=description ) DepositRequest.objects.filter(deposit=deposit).delete() deposit.delete() return {} def _check_preconditions_on( self, filehandler, md5sum: str, content_length: Optional[int] = None ) -> Optional[Dict]: """Check preconditions on provided file are respected. That is the length and/or the md5sum hash match the file's content. Args: filehandler (InMemoryUploadedFile): The file to check md5sum: md5 hash expected from the file's content content_length: the expected length if provided. Returns: Either none if no error or a dictionary with a key error detailing the problem. """ max_upload_size = self.config["max_upload_size"] if content_length: if content_length > max_upload_size: return make_error_dict( MAX_UPLOAD_SIZE_EXCEEDED, f"Upload size limit exceeded (max {max_upload_size} bytes)." "Please consider sending the archive in multiple steps.", ) length = filehandler.size if length != content_length: return make_error_dict( status.HTTP_412_PRECONDITION_FAILED, "Wrong length" ) if md5sum: _md5sum = self._compute_md5(filehandler) if _md5sum != md5sum: return make_error_dict( CHECKSUM_MISMATCH, "Wrong md5 hash", f"The checksum sent {hashutil.hash_to_hex(md5sum)} and the actual " f"checksum {hashutil.hash_to_hex(_md5sum)} does not match.", ) return None def _binary_upload( self, request: Request, headers: Dict[str, Any], collection_name: str, deposit_id: Optional[int] = None, replace_metadata: bool = False, replace_archives: bool = False, check_slug_is_present: bool = False, ) -> Dict[str, Any]: """Binary upload routine. Other than such a request, a 415 response is returned. Args: request (Request): the request holding information to parse and inject in db headers (dict): request headers formatted collection_name (str): the associated client deposit_id (id): deposit identifier if provided replace_metadata (bool): 'Update or add' request to existing deposit. If False (default), this adds new metadata request to existing ones. Otherwise, this will replace existing metadata. replace_archives (bool): 'Update or add' request to existing deposit. If False (default), this adds new archive request to existing ones. Otherwise, this will replace existing archives. ones. check_slug_is_present: Check for the slug header if True and raise if not present Returns: In the optimal case a dict with the following keys: - deposit_id (int): Deposit identifier - deposit_date (date): Deposit date - archive: None (no archive is provided here) Otherwise, a dictionary with the key error and the associated failures, either: - 400 (bad request) if the request is not providing an external identifier - 413 (request entity too large) if the length of the archive exceeds the max size configured - 412 (precondition failed) if the length or md5 hash provided mismatch the reality of the archive - 415 (unsupported media type) if a wrong media type is provided """ content_length = headers["content-length"] if not content_length: return make_error_dict( BAD_REQUEST, "CONTENT_LENGTH header is mandatory", "For archive deposit, the CONTENT_LENGTH header must be sent.", ) content_disposition = headers["content-disposition"] if not content_disposition: return make_error_dict( BAD_REQUEST, "CONTENT_DISPOSITION header is mandatory", "For archive deposit, the CONTENT_DISPOSITION header must be sent.", ) packaging = headers["packaging"] if packaging and packaging not in ACCEPT_PACKAGINGS: return make_error_dict( BAD_REQUEST, f"Only packaging {ACCEPT_PACKAGINGS} is supported", f"The packaging provided {packaging} is not supported", ) filehandler = request.FILES["file"] precondition_status_response = self._check_preconditions_on( filehandler, headers["content-md5sum"], content_length ) if precondition_status_response: return precondition_status_response slug = headers.get("slug") if check_slug_is_present and not slug: return make_missing_slug_error() # actual storage of data archive_metadata = filehandler deposit = self._deposit_put( request, deposit_id=deposit_id, in_progress=headers["in-progress"], external_id=slug, ) self._deposit_request_put( deposit, {ARCHIVE_KEY: archive_metadata}, replace_metadata=replace_metadata, replace_archives=replace_archives, ) return { "deposit_id": deposit.id, "deposit_date": deposit.reception_date, "status": deposit.status, "archive": filehandler.name, } def _read_metadata(self, metadata_stream) -> Tuple[bytes, Dict[str, Any]]: """Given a metadata stream, reads the metadata and returns both the parsed and the raw metadata. """ raw_metadata = metadata_stream.read() metadata = parse_xml(raw_metadata) return raw_metadata, metadata def _multipart_upload( self, request: Request, headers: Dict[str, Any], collection_name: str, deposit_id: Optional[int] = None, replace_metadata: bool = False, replace_archives: bool = False, check_slug_is_present: bool = False, ) -> Dict: """Multipart upload supported with exactly: - 1 archive (zip) - 1 atom entry Other than such a request, a 415 response is returned. Args: request (Request): the request holding information to parse and inject in db headers: request headers formatted collection_name: the associated client deposit_id: deposit identifier if provided replace_metadata: 'Update or add' request to existing deposit. If False (default), this adds new metadata request to existing ones. Otherwise, this will replace existing metadata. replace_archives: 'Update or add' request to existing deposit. If False (default), this adds new archive request to existing ones. Otherwise, this will replace existing archives. ones. check_slug_is_present: Check for the slug header if True and raise if not present Returns: In the optimal case a dict with the following keys: - deposit_id (int): Deposit identifier - deposit_date (date): Deposit date - archive: None (no archive is provided here) Otherwise, a dictionary with the key error and the associated failures, either: - 400 (bad request) if the request is not providing an external identifier - 412 (precondition failed) if the potentially md5 hash provided mismatch the reality of the archive - 413 (request entity too large) if the length of the archive exceeds the max size configured - 415 (unsupported media type) if a wrong media type is provided """ slug = headers.get("slug") if check_slug_is_present and not slug: return make_missing_slug_error() content_types_present = set() data: Dict[str, Optional[Any]] = { "application/zip": None, # expected either zip "application/x-tar": None, # or x-tar "application/atom+xml": None, } for key, value in request.FILES.items(): fh = value content_type = fh.content_type if content_type in content_types_present: return make_error_dict( ERROR_CONTENT, "Only 1 application/zip (or application/x-tar) archive " "and 1 atom+xml entry is supported (as per sword2.0 " "specification)", "You provided more than 1 application/(zip|x-tar) " "or more than 1 application/atom+xml content-disposition " "header in the multipart deposit", ) content_types_present.add(content_type) assert content_type is not None data[content_type] = fh if len(content_types_present) != 2: return make_error_dict( ERROR_CONTENT, "You must provide both 1 application/zip (or " "application/x-tar) and 1 atom+xml entry for multipart " "deposit", "You need to provide only 1 application/(zip|x-tar) " "and 1 application/atom+xml content-disposition header " "in the multipart deposit", ) filehandler = data["application/zip"] if not filehandler: filehandler = data["application/x-tar"] precondition_status_response = self._check_preconditions_on( filehandler, headers["content-md5sum"] ) if precondition_status_response: return precondition_status_response try: raw_metadata, metadata = self._read_metadata(data["application/atom+xml"]) except ParserError: return make_error_dict( PARSING_ERROR, "Malformed xml metadata", "The xml received is malformed. " "Please ensure your metadata file is correctly formatted.", ) # actual storage of data deposit = self._deposit_put( request, deposit_id=deposit_id, in_progress=headers["in-progress"], external_id=slug, ) deposit_request_data = { ARCHIVE_KEY: filehandler, METADATA_KEY: metadata, RAW_METADATA_KEY: raw_metadata, } self._deposit_request_put( deposit, deposit_request_data, replace_metadata, replace_archives ) assert filehandler is not None return { "deposit_id": deposit.id, "deposit_date": deposit.reception_date, "archive": filehandler.name, "status": deposit.status, } def _store_metadata_deposit( self, deposit: Deposit, swhid_reference: Union[str, SWHID], metadata: Dict, raw_metadata: bytes, deposit_origin: Optional[str] = None, ) -> Tuple[Union[SWHID, str], Union[SWHID, str], Deposit, DepositRequest]: """When all user inputs pass the checks, this associates the raw_metadata to the swhid_reference in the raw extrinsic metadata storage. In case of any issues, a bad request response is returned to the user with the details. Checks: - metadata are technically parsable - metadata pass the functional checks - SWHID (if any) is technically valid Args: deposit: Deposit reference swhid_reference: The swhid or the origin to attach metadata information to metadata: Full dict of metadata to check for validity (parsed out of raw_metadata) raw_metadata: The actual raw metadata to send in the storage metadata deposit_origin: Optional deposit origin url to use if any (e.g. deposit update scenario provides one) Raises: BadRequestError in case of incorrect inputs from the deposit client (e.g. functionally invalid metadata, ...) Returns: Tuple of core swhid, swhid context, deposit and deposit request """ metadata_ok, error_details = check_metadata(metadata) if not metadata_ok: assert error_details, "Details should be set when a failure occurs" raise BadRequestError( "Functional metadata checks failure", convert_status_detail(error_details), ) metadata_authority = MetadataAuthority( type=MetadataAuthorityType.DEPOSIT_CLIENT, url=deposit.client.provider_url, metadata={"name": deposit.client.last_name}, ) metadata_fetcher = MetadataFetcher( name=self.tool["name"], version=self.tool["version"], metadata=self.tool["configuration"], ) # replace metadata within the deposit backend deposit_request_data = { METADATA_KEY: metadata, RAW_METADATA_KEY: raw_metadata, } # actually add the metadata to the completed deposit deposit_request = self._deposit_request_put(deposit, deposit_request_data) object_type, metadata_context = compute_metadata_context(swhid_reference) if deposit_origin: # metadata deposit update on completed deposit metadata_context["origin"] = deposit_origin swhid_core: Union[str, SWHID] if isinstance(swhid_reference, str): swhid_core = swhid_reference else: swhid_core = attr.evolve(swhid_reference, metadata={}) # store that metadata to the metadata storage metadata_object = RawExtrinsicMetadata( type=object_type, target=swhid_core, # core swhid or origin discovery_date=deposit_request.date, authority=metadata_authority, fetcher=metadata_fetcher, format="sword-v2-atom-codemeta", metadata=raw_metadata, **metadata_context, ) # write to metadata storage self.storage_metadata.metadata_authority_add([metadata_authority]) self.storage_metadata.metadata_fetcher_add([metadata_fetcher]) self.storage_metadata.raw_extrinsic_metadata_add([metadata_object]) return (swhid_core, swhid_reference, deposit, deposit_request) def _atom_entry( self, request: Request, headers: Dict[str, Any], collection_name: str, deposit_id: Optional[int] = None, replace_metadata: bool = False, replace_archives: bool = False, check_slug_is_present: bool = False, ) -> Dict[str, Any]: """Atom entry deposit. Args: request: the request holding information to parse and inject in db headers: request headers formatted collection_name: the associated client deposit_id: deposit identifier if provided replace_metadata: 'Update or add' request to existing deposit. If False (default), this adds new metadata request to existing ones. Otherwise, this will replace existing metadata. replace_archives: 'Update or add' request to existing deposit. If False (default), this adds new archive request to existing ones. Otherwise, this will replace existing archives. ones. check_slug_is_present: Check for the slug header if True and raise if not present Returns: In the optimal case a dict with the following keys: - deposit_id: deposit id associated to the deposit - deposit_date: date of the deposit - archive: None (no archive is provided here) Otherwise, a dictionary with the key error and the associated failures, either: - 400 (bad request) if the request is not providing an external identifier - 400 (bad request) if the request's body is empty - 415 (unsupported media type) if a wrong media type is provided """ try: raw_metadata, metadata = self._read_metadata(request.data) except ParserError: return make_error_dict( BAD_REQUEST, "Malformed xml metadata", "The xml received is malformed. " "Please ensure your metadata file is correctly formatted.", ) if not metadata: return make_error_dict( BAD_REQUEST, "Empty body request is not supported", "Atom entry deposit is supposed to send for metadata. " "If the body is empty, there is no metadata.", ) # Determine if we are in the metadata-only deposit case try: swhid = parse_swh_reference(metadata) except ValidationError as e: return make_error_dict(PARSING_ERROR, "Invalid SWHID reference", str(e),) if swhid is not None: external_id = metadata.get("external_identifier", headers.get("slug")) else: slug = headers.get("slug") if check_slug_is_present and not slug: return make_missing_slug_error() external_id = metadata.get("external_identifier", slug) deposit = self._deposit_put( request, deposit_id=deposit_id, in_progress=headers["in-progress"], external_id=external_id, ) if swhid is not None: try: swhid, swhid_ref, depo, depo_request = self._store_metadata_deposit( deposit, swhid, metadata, raw_metadata ) except BadRequestError as bad_request_error: return bad_request_error.to_dict() deposit.status = DEPOSIT_STATUS_LOAD_SUCCESS if isinstance(swhid_ref, SWHID): deposit.swhid = str(swhid) deposit.swhid_context = str(swhid_ref) deposit.complete_date = depo_request.date deposit.reception_date = depo_request.date deposit.save() return { "deposit_id": deposit.id, "deposit_date": depo_request.date, "status": deposit.status, "archive": None, } self._deposit_request_put( deposit, {METADATA_KEY: metadata, RAW_METADATA_KEY: raw_metadata}, replace_metadata, replace_archives, ) return { "deposit_id": deposit.id, "deposit_date": deposit.reception_date, "archive": None, "status": deposit.status, } def _empty_post( self, request: Request, headers: Dict, collection_name: str, deposit_id: int ) -> Dict[str, Any]: """Empty post to finalize an empty deposit. Args: request: the request holding information to parse and inject in db headers: request headers formatted collection_name: the associated client deposit_id: deposit identifier Returns: Dictionary of result with the deposit's id, the date it was completed and no archive. """ deposit = Deposit.objects.get(pk=deposit_id) deposit.complete_date = timezone.now() deposit.status = DEPOSIT_STATUS_DEPOSITED deposit.save() return { "deposit_id": deposit_id, "deposit_date": deposit.complete_date, "status": deposit.status, "archive": None, } def _make_iris( self, request: Request, collection_name: str, deposit_id: int ) -> Dict[str, Any]: """Define the IRI endpoints Args: request (Request): The initial request collection_name (str): client/collection's name deposit_id (id): Deposit identifier Returns: Dictionary of keys with the iris' urls. """ args = [collection_name, deposit_id] return { iri: request.build_absolute_uri(reverse(iri, args=args)) - for iri in [EM_IRI, EDIT_SE_IRI, CONT_FILE_IRI, STATE_IRI] + for iri in [EM_IRI, EDIT_IRI, CONT_FILE_IRI, SE_IRI, STATE_IRI] } def additional_checks( self, request: Request, headers: Dict[str, Any], collection_name: str, deposit_id: Optional[int] = None, ) -> Dict[str, Any]: """Permit the child class to enrich additional checks. Returns: dict with 'error' detailing the problem. """ return {} def checks( self, request: Request, collection_name: str, deposit_id: Optional[int] = None ) -> Dict[str, Any]: try: self._collection = DepositCollection.objects.get(name=collection_name) except DepositCollection.DoesNotExist: return make_error_dict( NOT_FOUND, f"Unknown collection name {collection_name}" ) assert self._collection is not None username = request.user.username if username: # unauthenticated request can have the username empty try: self._client: DepositClient = DepositClient.objects.get( # type: ignore username=username ) except DepositClient.DoesNotExist: return make_error_dict(NOT_FOUND, f"Unknown client name {username}") collection_id = self._collection.id collections = self._client.collections assert collections is not None if collection_id not in collections: return make_error_dict( FORBIDDEN, f"Client {username} cannot access collection {collection_name}", ) headers = self._read_headers(request) if deposit_id: try: deposit = Deposit.objects.get(pk=deposit_id) except Deposit.DoesNotExist: return make_error_dict( NOT_FOUND, f"Deposit with id {deposit_id} does not exist" ) assert deposit is not None checks = self.restrict_access(request, headers, deposit) if checks: return checks if headers["on-behalf-of"]: return make_error_dict(MEDIATION_NOT_ALLOWED, "Mediation is not supported.") checks = self.additional_checks(request, headers, collection_name, deposit_id) if "error" in checks: return checks return {"headers": headers} def restrict_access( self, request: Request, headers: Dict, deposit: Deposit ) -> Dict[str, Any]: """Allow modifications on deposit with status 'partial' only, reject the rest. """ if request.method != "GET" and deposit.status != DEPOSIT_STATUS_PARTIAL: summary = "You can only act on deposit with status '%s'" % ( DEPOSIT_STATUS_PARTIAL, ) description = f"This deposit has status '{deposit.status}'" return make_error_dict( BAD_REQUEST, summary=summary, verbose_description=description ) return {} def _basic_not_allowed_method(self, request: Request, method: str): return make_error_response( request, METHOD_NOT_ALLOWED, f"{method} method is not supported on this endpoint", ) def get( self, request: Request, collection_name: str, deposit_id: int ) -> Union[HttpResponse, FileResponse]: return self._basic_not_allowed_method(request, "GET") def post( self, request: Request, collection_name: str, deposit_id: Optional[int] = None ) -> HttpResponse: return self._basic_not_allowed_method(request, "POST") def put( self, request: Request, collection_name: str, deposit_id: int ) -> HttpResponse: return self._basic_not_allowed_method(request, "PUT") def delete( self, request: Request, collection_name: str, deposit_id: Optional[int] = None ) -> HttpResponse: return self._basic_not_allowed_method(request, "DELETE") class APIGet(APIBase, metaclass=ABCMeta): """Mixin for class to support GET method. """ def get( self, request: Request, collection_name: str, deposit_id: int ) -> Union[HttpResponse, FileResponse]: """Endpoint to create/add resources to deposit. Returns: 200 response when no error during routine occurred 400 if the deposit does not belong to the collection 404 if the deposit or the collection does not exist """ checks = self.checks(request, collection_name, deposit_id) if "error" in checks: return make_error_response_from_dict(request, checks["error"]) r = self.process_get(request, collection_name, deposit_id) status, content, content_type = r if content_type == "swh/generator": with content as path: return FileResponse( open(path, "rb"), status=status, content_type="application/zip" ) if content_type == "application/json": return HttpResponse( json.dumps(content), status=status, content_type=content_type ) return HttpResponse(content, status=status, content_type=content_type) @abstractmethod def process_get( self, request: Request, collection_name: str, deposit_id: int ) -> Tuple[int, Any, str]: """Routine to deal with the deposit's get processing. Returns: Tuple status, stream of content, content-type """ pass class APIPost(APIBase, metaclass=ABCMeta): """Mixin for class to support POST method. """ def post( self, request: Request, collection_name: str, deposit_id: Optional[int] = None ) -> HttpResponse: """Endpoint to create/add resources to deposit. Returns: 204 response when no error during routine occurred. 400 if the deposit does not belong to the collection 404 if the deposit or the collection does not exist """ checks = self.checks(request, collection_name, deposit_id) if "error" in checks: return make_error_response_from_dict(request, checks["error"]) headers = checks["headers"] _status, _iri_key, data = self.process_post( request, headers, collection_name, deposit_id ) error = data.get("error") if error: return make_error_response_from_dict(request, error) data["packagings"] = ACCEPT_PACKAGINGS iris = self._make_iris(request, collection_name, data["deposit_id"]) data.update(iris) response = render( request, "deposit/deposit_receipt.xml", context=data, content_type="application/xml", status=_status, ) response._headers["location"] = "Location", data[_iri_key] # type: ignore return response @abstractmethod def process_post( self, request, headers: Dict, collection_name: str, deposit_id: Optional[int] = None, ) -> Tuple[int, str, Dict]: """Routine to deal with the deposit's processing. Returns Tuple of: - response status code (200, 201, etc...) - - key iri (EM_IRI, EDIT_SE_IRI, etc...) + - key iri (EM_IRI, EDIT_IRI, etc...) - dictionary of the processing result """ pass class APIPut(APIBase, metaclass=ABCMeta): """Mixin for class to support PUT method. """ def put( self, request: Request, collection_name: str, deposit_id: int ) -> HttpResponse: """Endpoint to update deposit resources. Returns: 204 response when no error during routine occurred. 400 if the deposit does not belong to the collection 404 if the deposit or the collection does not exist """ checks = self.checks(request, collection_name, deposit_id) if "error" in checks: return make_error_response_from_dict(request, checks["error"]) headers = checks["headers"] data = self.process_put(request, headers, collection_name, deposit_id) error = data.get("error") if error: return make_error_response_from_dict(request, error) return HttpResponse(status=status.HTTP_204_NO_CONTENT) @abstractmethod def process_put( self, request: Request, headers: Dict, collection_name: str, deposit_id: int ) -> Dict[str, Any]: """Routine to deal with updating a deposit in some way. Returns dictionary of the processing result """ pass class APIDelete(APIBase, metaclass=ABCMeta): """Mixin for class to support DELETE method. """ def delete( self, request: Request, collection_name: str, deposit_id: Optional[int] = None ) -> HttpResponse: """Endpoint to delete some deposit's resources (archives, deposit). Returns: 204 response when no error during routine occurred. 400 if the deposit does not belong to the collection 404 if the deposit or the collection does not exist """ checks = self.checks(request, collection_name, deposit_id) if "error" in checks: return make_error_response_from_dict(request, checks["error"]) assert deposit_id is not None data = self.process_delete(request, collection_name, deposit_id) error = data.get("error") if error: return make_error_response_from_dict(request, error) return HttpResponse(status=status.HTTP_204_NO_CONTENT) @abstractmethod def process_delete( self, request: Request, collection_name: str, deposit_id: int ) -> Dict: """Routine to delete a resource. This is mostly not allowed except for the EM_IRI (cf. .api.deposit_update.APIUpdateArchive) """ return {} diff --git a/swh/deposit/api/edit.py b/swh/deposit/api/edit.py index a05ddfe1..99314cf5 100644 --- a/swh/deposit/api/edit.py +++ b/swh/deposit/api/edit.py @@ -1,202 +1,148 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from typing import Any, Dict, Optional, Tuple +from typing import Any, Dict -from rest_framework import status from rest_framework.request import Request from swh.deposit.models import Deposit from swh.model.identifiers import parse_swhid -from ..config import ( - DEPOSIT_STATUS_LOAD_SUCCESS, - EDIT_SE_IRI, - EM_IRI, -) +from ..config import DEPOSIT_STATUS_LOAD_SUCCESS from ..errors import BAD_REQUEST, BadRequestError, ParserError, make_error_dict from ..parsers import SWHAtomEntryParser, SWHMultiPartParser -from .common import APIDelete, APIPost, APIPut +from .common import APIDelete, APIPut -class EditAPI(APIPost, APIPut, APIDelete): +class EditAPI(APIPut, APIDelete): """Deposit request class defining api endpoints for sword deposit. - What's known as 'Edit-IRI' and 'SE-IRI' in the sword specification. + What's known as 'Edit-IRI' in the sword specification. - HTTP verbs supported: POST (SE IRI), PUT (Edit IRI), DELETE + HTTP verbs supported: PUT, DELETE """ parser_classes = (SWHMultiPartParser, SWHAtomEntryParser) def restrict_access( self, request: Request, headers: Dict, deposit: Deposit ) -> Dict[str, Any]: """Relax restriction access to allow metadata update on deposit with status "done" when a swhid is provided. """ if ( request.method == "PUT" and headers["swhid"] is not None and deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS ): # Allow metadata update on deposit with status "done" when swhid provided return {} # otherwise, let the standard access restriction check occur return super().restrict_access(request, headers, deposit) def process_put( self, request, headers: Dict, collection_name: str, deposit_id: int ) -> Dict[str, Any]: """This allows the following scenarios: - multipart: replace all the deposit (status partial) metadata and archive with the provided ones. - atom: replace all the deposit (status partial) metadata with the provided ones. - with swhid, atom: Add new metatada to deposit (status done) with provided ones and push such metadata to the metadata storage directly. source: - http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_editingcontent_metadata - http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_editingcontent_multipart Raises: 400 if any of the following occur: - the swhid provided and the deposit swhid do not match - the provided metadata xml file is malformed - the provided xml atom entry is empty - the provided swhid does not exist in the archive Returns: 204 No content """ # noqa swhid = headers.get("swhid") if swhid is None: if request.content_type.startswith("multipart/"): return self._multipart_upload( request, headers, collection_name, deposit_id=deposit_id, replace_archives=True, replace_metadata=True, ) # standard metadata update (replace all metadata already provided to the # deposit by the new ones) return self._atom_entry( request, headers, collection_name, deposit_id=deposit_id, replace_metadata=True, ) # Update metadata on a deposit already ingested # Write to the metadata storage (and the deposit backend) # no ingestion triggered deposit = Deposit.objects.get(pk=deposit_id) assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS if swhid != deposit.swhid: return make_error_dict( BAD_REQUEST, f"Mismatched provided SWHID {swhid} with deposit's {deposit.swhid}.", "The provided SWHID does not match the deposit to update. " "Please ensure you send the correct deposit SWHID.", ) try: raw_metadata, metadata = self._read_metadata(request.data) except ParserError: return make_error_dict( BAD_REQUEST, "Malformed xml metadata", "The xml received is malformed. " "Please ensure your metadata file is correctly formatted.", ) if not metadata: return make_error_dict( BAD_REQUEST, "Empty body request is not supported", "Atom entry deposit is supposed to send for metadata. " "If the body is empty, there is no metadata.", ) try: _, _, deposit, deposit_request = self._store_metadata_deposit( deposit, parse_swhid(swhid), metadata, raw_metadata, deposit.origin_url, ) except BadRequestError as bad_request_error: return bad_request_error.to_dict() return { "deposit_id": deposit.id, "deposit_date": deposit_request.date, "status": deposit.status, "archive": None, } - def process_post( - self, - request, - headers: Dict, - collection_name: str, - deposit_id: Optional[int] = None, - ) -> Tuple[int, str, Dict]: - """Add new metadata/archive to existing deposit. - - This allows the following scenarios to occur: - - - multipart: Add new metadata and archive to a deposit in status partial with - the provided ones. - - - empty atom: Allows to finalize a deposit in status partial (transition to - deposited). - - source: - - http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_addingcontent_metadata - - http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_addingcontent_multipart - - http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#continueddeposit_complete - - Returns: - In optimal case for a multipart and atom-entry update, a - 201 Created response. The body response will hold a - deposit. And the response headers will contain an entry - 'Location' with the EM-IRI. - - For the empty post case, this returns a 200. - - """ # noqa - assert deposit_id is not None - if request.content_type.startswith("multipart/"): - data = self._multipart_upload( - request, headers, collection_name, deposit_id=deposit_id - ) - return (status.HTTP_201_CREATED, EM_IRI, data) - - content_length = headers["content-length"] or 0 - if content_length == 0 and headers["in-progress"] is False: - # check for final empty post - data = self._empty_post(request, headers, collection_name, deposit_id) - return (status.HTTP_200_OK, EDIT_SE_IRI, data) - - data = self._atom_entry( - request, headers, collection_name, deposit_id=deposit_id - ) - return (status.HTTP_201_CREATED, EM_IRI, data) - def process_delete(self, req, collection_name: str, deposit_id: int) -> Dict: """Delete the container (deposit). source: http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_deleteconteiner # noqa """ return self._delete_deposit(collection_name, deposit_id) diff --git a/swh/deposit/api/sword_edit.py b/swh/deposit/api/sword_edit.py new file mode 100644 index 00000000..d6c38868 --- /dev/null +++ b/swh/deposit/api/sword_edit.py @@ -0,0 +1,82 @@ +# Copyright (C) 2017-2020 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from typing import Dict, Optional, Tuple + +from rest_framework import status + +from swh.storage import get_storage +from swh.storage.interface import StorageInterface + +from ..config import EDIT_IRI, EM_IRI +from ..parsers import SWHAtomEntryParser, SWHMultiPartParser +from .common import APIPost + + +class SwordEditAPI(APIPost): + """Deposit request class defining api endpoints for sword deposit. + + What's known as 'SE-IRI' in the sword specification. + + HTTP verbs supported: POST + + """ + + parser_classes = (SWHMultiPartParser, SWHAtomEntryParser) + + def __init__(self): + super().__init__() + self.storage_metadata: StorageInterface = get_storage( + **self.config["storage_metadata"] + ) + + def process_post( + self, + request, + headers: Dict, + collection_name: str, + deposit_id: Optional[int] = None, + ) -> Tuple[int, str, Dict]: + """Add new metadata/archive to existing deposit. + + This allows the following scenarios to occur: + + - multipart: Add new metadata and archive to a deposit in status partial with + the provided ones. + + - empty atom: Allows to finalize a deposit in status partial (transition to + deposited). + + source: + - http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_addingcontent_metadata + - http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_addingcontent_multipart + - http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#continueddeposit_complete + + Returns: + In optimal case for a multipart and atom-entry update, a + 201 Created response. The body response will hold a + deposit. And the response headers will contain an entry + 'Location' with the EM-IRI. + + For the empty post case, this returns a 200. + + """ # noqa + assert deposit_id is not None + if request.content_type.startswith("multipart/"): + data = self._multipart_upload( + request, headers, collection_name, deposit_id=deposit_id + ) + return (status.HTTP_201_CREATED, EM_IRI, data) + + content_length = headers["content-length"] or 0 + if content_length == 0 and headers["in-progress"] is False: + # check for final empty post + data = self._empty_post(request, headers, collection_name, deposit_id) + return (status.HTTP_200_OK, EDIT_IRI, data) + + data = self._atom_entry( + request, headers, collection_name, deposit_id=deposit_id + ) + return (status.HTTP_201_CREATED, EM_IRI, data) diff --git a/swh/deposit/api/urls.py b/swh/deposit/api/urls.py index 9c90cbff..4287baa7 100644 --- a/swh/deposit/api/urls.py +++ b/swh/deposit/api/urls.py @@ -1,69 +1,76 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """SWH's deposit api URL Configuration """ from django.conf.urls import url from django.shortcuts import render -from ..config import COL_IRI, CONT_FILE_IRI, EDIT_SE_IRI, EM_IRI, SD_IRI, STATE_IRI +from ..config import COL_IRI, CONT_FILE_IRI, EDIT_IRI, EM_IRI, SD_IRI, SE_IRI, STATE_IRI from .collection import CollectionAPI from .content import ContentAPI from .edit import EditAPI from .edit_media import EditMediaAPI from .service_document import ServiceDocumentAPI from .state import StateAPI +from .sword_edit import SwordEditAPI def api_view(req): return render(req, "api.html") # PUBLIC API urlpatterns = [ # simple view on the api url(r"^$", api_view, name="api"), # SD IRI - Service Document IRI # -> GET url(r"^servicedocument/", ServiceDocumentAPI.as_view(), name=SD_IRI), # Col-IRI - Collection IRI # -> POST url(r"^(?P[^/]+)/$", CollectionAPI.as_view(), name=COL_IRI), # EM IRI - Atom Edit Media IRI (update archive IRI) # -> PUT (update-in-place existing archive) # -> POST (add new archive) url( r"^(?P[^/]+)/(?P[^/]+)/media/$", EditMediaAPI.as_view(), name=EM_IRI, ), # Edit IRI - Atom Entry Edit IRI (update metadata IRI) - # SE IRI - Sword Edit IRI ;; possibly same as Edit IRI # -> PUT (update in place) + # -> DELETE (delete container) + url( + r"^(?P[^/]+)/(?P[^/]+)/atom/$", + EditAPI.as_view(), + name=EDIT_IRI, + ), + # SE IRI - Sword Edit IRI ;; possibly same as Edit IRI # -> POST (add new metadata) url( r"^(?P[^/]+)/(?P[^/]+)/metadata/$", - EditAPI.as_view(), - name=EDIT_SE_IRI, + SwordEditAPI.as_view(), + name=SE_IRI, ), # State IRI # -> GET url( r"^(?P[^/]+)/(?P[^/]+)/status/$", StateAPI.as_view(), name=STATE_IRI, ), # Cont-IRI # -> GET url( r"^(?P[^/]+)/(?P[^/]+)/content/$", ContentAPI.as_view(), name=CONT_FILE_IRI, ), # specification is not clear about # File-IRI, we assume it's the same as # the Cont-IRI one ] diff --git a/swh/deposit/config.py b/swh/deposit/config.py index ba6c0939..d810bff7 100644 --- a/swh/deposit/config.py +++ b/swh/deposit/config.py @@ -1,108 +1,109 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os from typing import Any, Dict from swh.core import config from swh.deposit import __version__ from swh.scheduler import get_scheduler from swh.scheduler.interface import SchedulerInterface from swh.storage import get_storage from swh.storage.interface import StorageInterface # IRIs (Internationalized Resource identifier) sword 2.0 specified -EDIT_SE_IRI = "edit_se_iri" +EDIT_IRI = "edit_iri" +SE_IRI = "se_iri" EM_IRI = "em_iri" CONT_FILE_IRI = "cont_file_iri" SD_IRI = "servicedocument" COL_IRI = "upload" STATE_IRI = "state_iri" PRIVATE_GET_RAW_CONTENT = "private-download" PRIVATE_CHECK_DEPOSIT = "check-deposit" PRIVATE_PUT_DEPOSIT = "private-update" PRIVATE_GET_DEPOSIT_METADATA = "private-read" PRIVATE_LIST_DEPOSITS = "private-deposit-list" ARCHIVE_KEY = "archive" METADATA_KEY = "metadata" RAW_METADATA_KEY = "raw-metadata" ARCHIVE_TYPE = "archive" METADATA_TYPE = "metadata" AUTHORIZED_PLATFORMS = ["development", "production", "testing"] DEPOSIT_STATUS_REJECTED = "rejected" DEPOSIT_STATUS_PARTIAL = "partial" DEPOSIT_STATUS_DEPOSITED = "deposited" DEPOSIT_STATUS_VERIFIED = "verified" DEPOSIT_STATUS_LOAD_SUCCESS = "done" DEPOSIT_STATUS_LOAD_FAILURE = "failed" # Revision author for deposit SWH_PERSON = { "name": "Software Heritage", "fullname": "Software Heritage", "email": "robot@softwareheritage.org", } DEFAULT_CONFIG = { "max_upload_size": 209715200, "checks": True, } def setup_django_for(platform=None, config_file=None): """Setup function for command line tools (swh.deposit.create_user) to initialize the needed db access. Note: Do not import any django related module prior to this function call. Otherwise, this will raise an django.core.exceptions.ImproperlyConfigured error message. Args: platform (str): the platform the scheduling is running config_file (str): Extra configuration file (typically for the production platform) Raises: ValueError in case of wrong platform inputs. """ if platform is not None: if platform not in AUTHORIZED_PLATFORMS: raise ValueError("Platform should be one of %s" % AUTHORIZED_PLATFORMS) if "DJANGO_SETTINGS_MODULE" not in os.environ: os.environ["DJANGO_SETTINGS_MODULE"] = "swh.deposit.settings.%s" % platform if config_file: os.environ.setdefault("SWH_CONFIG_FILENAME", config_file) import django django.setup() class APIConfig: """API Configuration centralized class. This loads explicitly the configuration file out of the SWH_CONFIG_FILENAME environment variable. """ def __init__(self): self.config: Dict[str, Any] = config.load_from_envvar(DEFAULT_CONFIG) self.scheduler: SchedulerInterface = get_scheduler(**self.config["scheduler"]) self.tool = { "name": "swh-deposit", "version": __version__, "configuration": {"sword_version": "2"}, } self.storage_metadata: StorageInterface = get_storage( **self.config["storage_metadata"] ) diff --git a/swh/deposit/templates/deposit/deposit_receipt.xml b/swh/deposit/templates/deposit/deposit_receipt.xml index 64cf2c09..5d26e7e7 100644 --- a/swh/deposit/templates/deposit/deposit_receipt.xml +++ b/swh/deposit/templates/deposit/deposit_receipt.xml @@ -1,19 +1,19 @@ {{ deposit_id }} {{ deposit_date }} {{ archive }} {{ status }} - + - + {% for packaging in packagings %}{{ packaging }}{% endfor %} diff --git a/swh/deposit/tests/api/test_collection.py b/swh/deposit/tests/api/test_collection.py index 00c38d91..4a135f5d 100644 --- a/swh/deposit/tests/api/test_collection.py +++ b/swh/deposit/tests/api/test_collection.py @@ -1,194 +1,194 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import hashlib from io import BytesIO from django.urls import reverse from rest_framework import status from swh.deposit.config import ( COL_IRI, DEPOSIT_STATUS_LOAD_FAILURE, DEPOSIT_STATUS_LOAD_SUCCESS, DEPOSIT_STATUS_PARTIAL, DEPOSIT_STATUS_REJECTED, - EDIT_SE_IRI, + SE_IRI, ) from swh.deposit.models import Deposit from swh.deposit.parsers import parse_xml def test_deposit_post_will_fail_with_401(client): """Without authentication, endpoint refuses access with 401 response """ url = reverse(COL_IRI, args=["hal"]) response = client.post(url) assert response.status_code == status.HTTP_401_UNAUTHORIZED def test_access_to_another_user_collection_is_forbidden( authenticated_client, deposit_another_collection, deposit_user ): """Access to another user collection should return a 403 """ coll2 = deposit_another_collection url = reverse(COL_IRI, args=[coll2.name]) response = authenticated_client.post(url) assert response.status_code == status.HTTP_403_FORBIDDEN msg = "Client %s cannot access collection %s" % (deposit_user.username, coll2.name,) assert msg in response.content.decode("utf-8") def test_delete_on_col_iri_not_supported(authenticated_client, deposit_collection): """Delete on col iri should return a 405 response """ url = reverse(COL_IRI, args=[deposit_collection.name]) response = authenticated_client.delete(url) assert response.status_code == status.HTTP_405_METHOD_NOT_ALLOWED assert "DELETE method is not supported on this endpoint" in response.content.decode( "utf-8" ) def create_deposit_with_rejection_status(authenticated_client, deposit_collection): url = reverse(COL_IRI, args=[deposit_collection.name]) data = b"some data which is clearly not a zip file" md5sum = hashlib.md5(data).hexdigest() external_id = "some-external-id-1" # when response = authenticated_client.post( url, content_type="application/zip", # as zip data=data, # + headers CONTENT_LENGTH=len(data), # other headers needs HTTP_ prefix to be taken into account HTTP_SLUG=external_id, HTTP_CONTENT_MD5=md5sum, HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", HTTP_CONTENT_DISPOSITION="attachment; filename=filename0", ) assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) actual_state = response_content["deposit_status"] assert actual_state == DEPOSIT_STATUS_REJECTED def test_act_on_deposit_rejected_is_not_permitted( authenticated_client, deposit_collection, rejected_deposit, atom_dataset ): deposit = rejected_deposit response = authenticated_client.post( - reverse(EDIT_SE_IRI, args=[deposit.collection.name, deposit.id]), + reverse(SE_IRI, args=[deposit.collection.name, deposit.id]), content_type="application/atom+xml;type=entry", data=atom_dataset["entry-data1"], HTTP_SLUG=deposit.external_id, ) assert response.status_code == status.HTTP_400_BAD_REQUEST msg = "You can only act on deposit with status '%s'" % ( DEPOSIT_STATUS_PARTIAL, ) assert msg in response.content.decode("utf-8") def test_add_deposit_when_partial_makes_new_deposit( authenticated_client, deposit_collection, partial_deposit, atom_dataset ): """Posting deposit on collection when previous is partial makes new deposit """ deposit = partial_deposit assert deposit.status == DEPOSIT_STATUS_PARTIAL # adding a new deposit with the same external id response = authenticated_client.post( reverse(COL_IRI, args=[deposit_collection.name]), content_type="application/atom+xml;type=entry", data=atom_dataset["entry-data0"] % deposit.external_id, HTTP_SLUG=deposit.external_id, ) assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) deposit_id = response_content["deposit_id"] assert deposit_id != deposit.id # new deposit new_deposit = Deposit.objects.get(pk=deposit_id) assert new_deposit != deposit assert new_deposit.parent is None def test_add_deposit_when_failed_makes_new_deposit_with_no_parent( authenticated_client, deposit_collection, failed_deposit, atom_dataset ): """Posting deposit on collection when deposit done makes new deposit with parent """ deposit = failed_deposit assert deposit.status == DEPOSIT_STATUS_LOAD_FAILURE # adding a new deposit with the same external id as a completed deposit # creates the parenting chain response = authenticated_client.post( reverse(COL_IRI, args=[deposit_collection.name]), content_type="application/atom+xml;type=entry", data=atom_dataset["entry-data0"] % deposit.external_id, HTTP_SLUG=deposit.external_id, ) assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) deposit_id = response_content["deposit_id"] assert deposit_id != deposit.id new_deposit = Deposit.objects.get(pk=deposit_id) assert new_deposit != deposit assert new_deposit.parent is None def test_add_deposit_when_done_makes_new_deposit_with_parent_old_one( authenticated_client, deposit_collection, completed_deposit, atom_dataset ): """Posting deposit on collection when deposit done makes new deposit with parent """ # given multiple deposit already loaded deposit = completed_deposit assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS # adding a new deposit with the same external id as a completed deposit # creates the parenting chain response = authenticated_client.post( reverse(COL_IRI, args=[deposit_collection.name]), content_type="application/atom+xml;type=entry", data=atom_dataset["entry-data0"] % deposit.external_id, HTTP_SLUG=deposit.external_id, ) assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) deposit_id = response_content["deposit_id"] assert deposit_id != deposit.id new_deposit = Deposit.objects.get(pk=deposit_id) assert deposit.collection == new_deposit.collection assert deposit.external_id == new_deposit.external_id assert new_deposit != deposit assert new_deposit.parent == deposit diff --git a/swh/deposit/tests/api/test_collection_post_binary.py b/swh/deposit/tests/api/test_collection_post_binary.py index c11cfcad..009615ae 100644 --- a/swh/deposit/tests/api/test_collection_post_binary.py +++ b/swh/deposit/tests/api/test_collection_post_binary.py @@ -1,577 +1,578 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """Tests the handling of the binary content when doing a POST Col-IRI.""" from io import BytesIO from django.core.files.uploadedfile import InMemoryUploadedFile from django.urls import reverse import pytest from rest_framework import status from swh.deposit.config import COL_IRI, DEPOSIT_STATUS_DEPOSITED, EM_IRI from swh.deposit.models import Deposit, DepositRequest from swh.deposit.parsers import parse_xml from swh.deposit.tests.common import check_archive, create_arborescence_archive def test_post_deposit_binary_no_slug( authenticated_client, deposit_collection, sample_archive ): """Posting a binary deposit without slug header should return 400 """ url = reverse(COL_IRI, args=[deposit_collection.name]) # when response = authenticated_client.post( url, content_type="application/zip", # as zip data=sample_archive["data"], # + headers CONTENT_LENGTH=sample_archive["length"], HTTP_CONTENT_MD5=sample_archive["md5sum"], HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", HTTP_IN_PROGRESS="false", HTTP_CONTENT_DISPOSITION="attachment; filename=filename0", ) assert b"Missing SLUG header" in response.content assert response.status_code == status.HTTP_400_BAD_REQUEST def test_post_deposit_binary_support( authenticated_client, deposit_collection, sample_archive ): """Binary upload with content-type not in [zip,x-tar] should return 415 """ # given url = reverse(COL_IRI, args=[deposit_collection.name]) external_id = "some-external-id-1" # when response = authenticated_client.post( url, content_type="application/octet-stream", data=sample_archive["data"], # + headers CONTENT_LENGTH=sample_archive["length"], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=sample_archive["md5sum"], HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", HTTP_IN_PROGRESS="false", HTTP_CONTENT_DISPOSITION="attachment; filename=filename0", ) # then assert response.status_code == status.HTTP_415_UNSUPPORTED_MEDIA_TYPE with pytest.raises(Deposit.DoesNotExist): Deposit.objects.get(external_id=external_id) def test_post_deposit_binary_upload_ok( authenticated_client, deposit_collection, sample_archive ): """Binary upload with correct headers should return 201 with receipt """ # given url = reverse(COL_IRI, args=[deposit_collection.name]) external_id = "some-external-id-1" # when response = authenticated_client.post( url, content_type="application/zip", # as zip data=sample_archive["data"], # + headers CONTENT_LENGTH=sample_archive["length"], # other headers needs HTTP_ prefix to be taken into account HTTP_SLUG=external_id, HTTP_CONTENT_MD5=sample_archive["md5sum"], HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", HTTP_IN_PROGRESS="false", HTTP_CONTENT_DISPOSITION="attachment; filename=%s" % (sample_archive["name"],), ) # then response_content = parse_xml(BytesIO(response.content)) assert response.status_code == status.HTTP_201_CREATED deposit_id = response_content["deposit_id"] deposit = Deposit.objects.get(pk=deposit_id) assert deposit.status == DEPOSIT_STATUS_DEPOSITED assert deposit.external_id == external_id assert deposit.collection == deposit_collection assert deposit.swhid is None deposit_request = DepositRequest.objects.get(deposit=deposit) check_archive(sample_archive["name"], deposit_request.archive.name) assert deposit_request.metadata is None assert deposit_request.raw_metadata is None response_content = parse_xml(BytesIO(response.content)) assert response_content["deposit_archive"] == sample_archive["name"] assert int(response_content["deposit_id"]) == deposit.id assert response_content["deposit_status"] == deposit.status - edit_se_iri = reverse("edit_se_iri", args=[deposit_collection.name, deposit.id]) + edit_iri = reverse("edit_iri", args=[deposit_collection.name, deposit.id]) assert response._headers["location"] == ( "Location", - "http://testserver" + edit_se_iri, + "http://testserver" + edit_iri, ) def test_post_deposit_binary_failure_unsupported_packaging_header( authenticated_client, deposit_collection, sample_archive ): """Bin deposit without supported content_disposition header returns 400 """ # given url = reverse(COL_IRI, args=[deposit_collection.name]) external_id = "some-external-id" # when response = authenticated_client.post( url, content_type="application/zip", data=sample_archive["data"], # + headers CONTENT_LENGTH=sample_archive["length"], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=sample_archive["md5sum"], HTTP_PACKAGING="something-unsupported", HTTP_CONTENT_DISPOSITION="attachment; filename=filename0", ) # then assert response.status_code == status.HTTP_400_BAD_REQUEST assert ( b"The packaging provided something-unsupported is not supported" in response.content ) with pytest.raises(Deposit.DoesNotExist): Deposit.objects.get(external_id=external_id) def test_post_deposit_binary_upload_no_content_disposition_header( authenticated_client, deposit_collection, sample_archive ): """Binary upload without content_disposition header should return 400 """ # given url = reverse(COL_IRI, args=[deposit_collection.name]) external_id = "some-external-id" # when response = authenticated_client.post( url, content_type="application/zip", data=sample_archive["data"], # + headers CONTENT_LENGTH=sample_archive["length"], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=sample_archive["md5sum"], HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", HTTP_IN_PROGRESS="false", ) # then assert response.status_code == status.HTTP_400_BAD_REQUEST assert b"CONTENT_DISPOSITION header is mandatory" in response.content with pytest.raises(Deposit.DoesNotExist): Deposit.objects.get(external_id=external_id) def test_post_deposit_mediation_not_supported( authenticated_client, deposit_collection, sample_archive ): """Binary upload with mediation should return a 412 response """ # given url = reverse(COL_IRI, args=[deposit_collection.name]) external_id = "some-external-id-1" # when response = authenticated_client.post( url, content_type="application/zip", data=sample_archive["data"], # + headers CONTENT_LENGTH=sample_archive["length"], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=sample_archive["md5sum"], HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", HTTP_IN_PROGRESS="false", HTTP_ON_BEHALF_OF="someone", HTTP_CONTENT_DISPOSITION="attachment; filename=filename0", ) # then assert response.status_code == status.HTTP_412_PRECONDITION_FAILED with pytest.raises(Deposit.DoesNotExist): Deposit.objects.get(external_id=external_id) def test_post_deposit_binary_upload_fail_if_upload_size_limit_exceeded( authenticated_client, deposit_collection, sample_archive, tmp_path ): """Binary upload must not exceed the limit set up... """ tmp_path = str(tmp_path) url = reverse(COL_IRI, args=[deposit_collection.name]) archive = create_arborescence_archive( tmp_path, "archive2", "file2", b"some content in file", up_to_size=500 ) external_id = "some-external-id" # when response = authenticated_client.post( url, content_type="application/zip", data=archive["data"], # + headers CONTENT_LENGTH=archive["length"], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=archive["md5sum"], HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", HTTP_IN_PROGRESS="false", HTTP_CONTENT_DISPOSITION="attachment; filename=filename0", ) # then assert response.status_code == status.HTTP_413_REQUEST_ENTITY_TOO_LARGE assert b"Upload size limit exceeded" in response.content with pytest.raises(Deposit.DoesNotExist): Deposit.objects.get(external_id=external_id) def test_post_deposit_2_post_2_different_deposits( authenticated_client, deposit_collection, sample_archive ): """2 posting deposits should return 2 different 201 with receipt """ url = reverse(COL_IRI, args=[deposit_collection.name]) # when response = authenticated_client.post( url, content_type="application/zip", # as zip data=sample_archive["data"], # + headers CONTENT_LENGTH=sample_archive["length"], HTTP_SLUG="some-external-id-1", HTTP_CONTENT_MD5=sample_archive["md5sum"], HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", HTTP_IN_PROGRESS="false", HTTP_CONTENT_DISPOSITION="attachment; filename=filename0", ) # then assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) deposit_id = response_content["deposit_id"] deposit = Deposit.objects.get(pk=deposit_id) deposits = Deposit.objects.all() assert len(deposits) == 1 assert deposits[0] == deposit # second post response = authenticated_client.post( url, content_type="application/x-tar", # as zip data=sample_archive["data"], # + headers CONTENT_LENGTH=sample_archive["length"], HTTP_SLUG="another-external-id", HTTP_CONTENT_MD5=sample_archive["md5sum"], HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", HTTP_IN_PROGRESS="false", HTTP_CONTENT_DISPOSITION="attachment; filename=filename1", ) assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) deposit_id2 = response_content["deposit_id"] deposit2 = Deposit.objects.get(pk=deposit_id2) assert deposit != deposit2 deposits = Deposit.objects.all().order_by("id") assert len(deposits) == 2 assert list(deposits), [deposit == deposit2] def test_post_deposit_binary_and_post_to_add_another_archive( authenticated_client, deposit_collection, sample_archive, tmp_path ): """Updating a deposit should return a 201 with receipt """ tmp_path = str(tmp_path) url = reverse(COL_IRI, args=[deposit_collection.name]) external_id = "some-external-id-1" # when response = authenticated_client.post( url, content_type="application/zip", # as zip data=sample_archive["data"], # + headers CONTENT_LENGTH=sample_archive["length"], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=sample_archive["md5sum"], HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", HTTP_IN_PROGRESS="true", HTTP_CONTENT_DISPOSITION="attachment; filename=%s" % (sample_archive["name"],), ) # then assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) deposit_id = response_content["deposit_id"] deposit = Deposit.objects.get(pk=deposit_id) assert deposit.status == "partial" assert deposit.external_id == external_id assert deposit.collection == deposit_collection assert deposit.swhid is None deposit_request = DepositRequest.objects.get(deposit=deposit) assert deposit_request.deposit == deposit assert deposit_request.type == "archive" check_archive(sample_archive["name"], deposit_request.archive.name) # 2nd archive to upload archive2 = create_arborescence_archive( tmp_path, "archive2", "file2", b"some other content in file" ) # uri to update the content update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit_id]) # adding another archive for the deposit and finalizing it response = authenticated_client.post( update_uri, content_type="application/zip", # as zip data=archive2["data"], # + headers CONTENT_LENGTH=archive2["length"], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=archive2["md5sum"], HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", HTTP_CONTENT_DISPOSITION="attachment; filename=%s" % (archive2["name"]), ) assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) deposit = Deposit.objects.get(pk=deposit_id) assert deposit.status == DEPOSIT_STATUS_DEPOSITED assert deposit.external_id == external_id assert deposit.collection == deposit_collection assert deposit.swhid is None deposit_requests = list( DepositRequest.objects.filter(deposit=deposit).order_by("id") ) # 2 deposit requests for the same deposit assert len(deposit_requests) == 2 assert deposit_requests[0].deposit == deposit assert deposit_requests[0].type == "archive" check_archive(sample_archive["name"], deposit_requests[0].archive.name) assert deposit_requests[1].deposit == deposit assert deposit_requests[1].type == "archive" check_archive(archive2["name"], deposit_requests[1].archive.name) # only 1 deposit in db deposits = Deposit.objects.all() assert len(deposits) == 1 def test_post_deposit_then_update_refused( authenticated_client, deposit_collection, sample_archive, atom_dataset, tmp_path ): """Updating a deposit with status 'ready' should return a 400 """ tmp_path = str(tmp_path) url = reverse(COL_IRI, args=[deposit_collection.name]) external_id = "some-external-id-1" # when response = authenticated_client.post( url, content_type="application/zip", # as zip data=sample_archive["data"], # + headers CONTENT_LENGTH=sample_archive["length"], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=sample_archive["md5sum"], HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", HTTP_IN_PROGRESS="false", HTTP_CONTENT_DISPOSITION="attachment; filename=filename0", ) # then assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) deposit_id = response_content["deposit_id"] deposit = Deposit.objects.get(pk=deposit_id) assert deposit.status == DEPOSIT_STATUS_DEPOSITED assert deposit.external_id == external_id assert deposit.collection == deposit_collection assert deposit.swhid is None deposit_request = DepositRequest.objects.get(deposit=deposit) assert deposit_request.deposit == deposit check_archive("filename0", deposit_request.archive.name) # updating/adding is forbidden # uri to update the content - edit_se_iri = reverse("edit_se_iri", args=[deposit_collection.name, deposit_id]) + edit_iri = reverse("edit_iri", args=[deposit_collection.name, deposit_id]) + se_iri = reverse("se_iri", args=[deposit_collection.name, deposit_id]) em_iri = reverse("em_iri", args=[deposit_collection.name, deposit_id]) # Testing all update/add endpoint should fail # since the status is ready archive2 = create_arborescence_archive( tmp_path, "archive2", "file2", b"some content in file 2" ) # replacing file is no longer possible since the deposit's # status is ready r = authenticated_client.put( em_iri, content_type="application/zip", data=archive2["data"], CONTENT_LENGTH=archive2["length"], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=archive2["md5sum"], HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", HTTP_IN_PROGRESS="false", HTTP_CONTENT_DISPOSITION="attachment; filename=filename0", ) assert r.status_code == status.HTTP_400_BAD_REQUEST assert b"You can only act on deposit with status 'partial'" in r.content # adding file is no longer possible since the deposit's status # is ready r = authenticated_client.post( em_iri, content_type="application/zip", data=archive2["data"], CONTENT_LENGTH=archive2["length"], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=archive2["md5sum"], HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", HTTP_IN_PROGRESS="false", HTTP_CONTENT_DISPOSITION="attachment; filename=filename0", ) assert r.status_code == status.HTTP_400_BAD_REQUEST assert b"You can only act on deposit with status 'partial'" in r.content # replacing metadata is no longer possible since the deposit's # status is ready r = authenticated_client.put( - edit_se_iri, + edit_iri, content_type="application/atom+xml;type=entry", data=atom_dataset["entry-data-deposit-binary"], CONTENT_LENGTH=len(atom_dataset["entry-data-deposit-binary"]), HTTP_SLUG=external_id, ) assert r.status_code == status.HTTP_400_BAD_REQUEST assert b"You can only act on deposit with status 'partial'" in r.content # adding new metadata is no longer possible since the # deposit's status is ready r = authenticated_client.post( - edit_se_iri, + se_iri, content_type="application/atom+xml;type=entry", data=atom_dataset["entry-data-deposit-binary"], CONTENT_LENGTH=len(atom_dataset["entry-data-deposit-binary"]), HTTP_SLUG=external_id, ) assert r.status_code == status.HTTP_400_BAD_REQUEST assert b"You can only act on deposit with status 'partial'" in r.content archive_content = b"some content representing archive" archive = InMemoryUploadedFile( BytesIO(archive_content), field_name="archive0", name="archive0", content_type="application/zip", size=len(archive_content), charset=None, ) atom_entry = InMemoryUploadedFile( BytesIO(atom_dataset["entry-data-deposit-binary"].encode("utf-8")), field_name="atom0", name="atom0", content_type='application/atom+xml; charset="utf-8"', size=len(atom_dataset["entry-data-deposit-binary"]), charset="utf-8", ) # replacing multipart metadata is no longer possible since the # deposit's status is ready r = authenticated_client.put( - edit_se_iri, + edit_iri, format="multipart", data={"archive": archive, "atom_entry": atom_entry,}, ) assert r.status_code == status.HTTP_400_BAD_REQUEST assert b"You can only act on deposit with status 'partial'" in r.content # adding new metadata is no longer possible since the # deposit's status is ready r = authenticated_client.post( - edit_se_iri, + se_iri, format="multipart", data={"archive": archive, "atom_entry": atom_entry,}, ) assert r.status_code == status.HTTP_400_BAD_REQUEST assert b"You can only act on deposit with status 'partial'" in r.content diff --git a/swh/deposit/tests/api/test_delete.py b/swh/deposit/tests/api/test_delete.py index 23a83b8c..1a1dcf76 100644 --- a/swh/deposit/tests/api/test_delete.py +++ b/swh/deposit/tests/api/test_delete.py @@ -1,131 +1,131 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from collections import defaultdict from typing import Dict, Mapping from django.urls import reverse from rest_framework import status from swh.deposit.config import ( ARCHIVE_KEY, DEPOSIT_STATUS_DEPOSITED, - EDIT_SE_IRI, + EDIT_IRI, EM_IRI, METADATA_KEY, ) from swh.deposit.models import Deposit, DepositRequest def count_deposit_request_types(deposit_requests) -> Mapping[str, int]: deposit_request_types = defaultdict(int) # type: Dict[str, int] for dr in deposit_requests: deposit_request_types[dr.type] += 1 return deposit_request_types def test_delete_archive_on_partial_deposit_works( authenticated_client, partial_deposit_with_metadata, deposit_collection ): """Removing partial deposit's archive should return a 204 response """ deposit_id = partial_deposit_with_metadata.id deposit = Deposit.objects.get(pk=deposit_id) deposit_requests = DepositRequest.objects.filter(deposit=deposit) # deposit request type: 'archive', 1 'metadata' deposit_request_types = count_deposit_request_types(deposit_requests) assert deposit_request_types == {ARCHIVE_KEY: 1, METADATA_KEY: 1} # when update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit_id]) response = authenticated_client.delete(update_uri) # then assert response.status_code == status.HTTP_204_NO_CONTENT deposit = Deposit.objects.get(pk=deposit_id) deposit_requests2 = DepositRequest.objects.filter(deposit=deposit) deposit_request_types = count_deposit_request_types(deposit_requests2) assert deposit_request_types == {METADATA_KEY: 1} def test_delete_archive_on_undefined_deposit_fails( authenticated_client, deposit_collection, sample_archive ): """Delete undefined deposit returns a 404 response """ # when update_uri = reverse(EM_IRI, args=[deposit_collection.name, 999]) response = authenticated_client.delete(update_uri) # then assert response.status_code == status.HTTP_404_NOT_FOUND def test_delete_non_partial_deposit( authenticated_client, deposit_collection, deposited_deposit ): """Delete !partial status deposit should return a 400 response """ deposit = deposited_deposit assert deposit.status == DEPOSIT_STATUS_DEPOSITED # when update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit.id]) response = authenticated_client.delete(update_uri) # then assert response.status_code == status.HTTP_400_BAD_REQUEST assert ( b"You can only act on deposit with status 'partial'" in response.content ) deposit = Deposit.objects.get(pk=deposit.id) assert deposit is not None def test_delete_partial_deposit( authenticated_client, deposit_collection, partial_deposit ): """Delete deposit should return a 204 response """ # given deposit = partial_deposit # when - url = reverse(EDIT_SE_IRI, args=[deposit_collection.name, deposit.id]) + url = reverse(EDIT_IRI, args=[deposit_collection.name, deposit.id]) response = authenticated_client.delete(url) # then assert response.status_code == status.HTTP_204_NO_CONTENT deposit_requests = list(DepositRequest.objects.filter(deposit=deposit)) assert deposit_requests == [] deposits = list(Deposit.objects.filter(pk=deposit.id)) assert deposits == [] -def test_delete_on_edit_se_iri_cannot_delete_non_partial_deposit( +def test_delete_on_edit_iri_cannot_delete_non_partial_deposit( authenticated_client, deposit_collection, complete_deposit ): """Delete !partial deposit should return a 400 response """ # given deposit = complete_deposit # when - url = reverse(EDIT_SE_IRI, args=[deposit_collection.name, deposit.id]) + url = reverse(EDIT_IRI, args=[deposit_collection.name, deposit.id]) response = authenticated_client.delete(url) # then assert response.status_code == status.HTTP_400_BAD_REQUEST assert ( b"You can only act on deposit with status 'partial'" in response.content ) deposit = Deposit.objects.get(pk=deposit.id) assert deposit is not None diff --git a/swh/deposit/tests/api/test_deposit_private_read_metadata.py b/swh/deposit/tests/api/test_deposit_private_read_metadata.py index fad948b7..a10961eb 100644 --- a/swh/deposit/tests/api/test_deposit_private_read_metadata.py +++ b/swh/deposit/tests/api/test_deposit_private_read_metadata.py @@ -1,398 +1,398 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.urls import reverse from rest_framework import status from swh.deposit import __version__, utils -from swh.deposit.config import EDIT_SE_IRI, PRIVATE_GET_DEPOSIT_METADATA, SWH_PERSON +from swh.deposit.config import PRIVATE_GET_DEPOSIT_METADATA, SE_IRI, SWH_PERSON from swh.deposit.models import Deposit from swh.deposit.parsers import parse_xml PRIVATE_GET_DEPOSIT_METADATA_NC = PRIVATE_GET_DEPOSIT_METADATA + "-nc" def private_get_raw_url_endpoints(collection, deposit): """There are 2 endpoints to check (one with collection, one without)""" deposit_id = deposit if isinstance(deposit, int) else deposit.id return [ reverse(PRIVATE_GET_DEPOSIT_METADATA, args=[collection.name, deposit_id]), reverse(PRIVATE_GET_DEPOSIT_METADATA_NC, args=[deposit_id]), ] def update_deposit_with_metadata(authenticated_client, collection, deposit, metadata): # update deposit's metadata response = authenticated_client.post( - reverse(EDIT_SE_IRI, args=[collection.name, deposit.id]), + reverse(SE_IRI, args=[collection.name, deposit.id]), content_type="application/atom+xml;type=entry", data=metadata, HTTP_SLUG=deposit.external_id, HTTP_IN_PROGRESS=True, ) assert response.status_code == status.HTTP_201_CREATED return deposit def test_read_metadata( authenticated_client, deposit_collection, partial_deposit, atom_dataset ): """Private metadata read api to existing deposit should return metadata """ deposit = partial_deposit deposit.external_id = "some-external-id" deposit.save() metadata_xml_atoms = [ atom_dataset[atom_key] for atom_key in ["entry-data2", "entry-data3"] ] metadata_xml_raws = [parse_xml(xml) for xml in metadata_xml_atoms] for atom_xml in metadata_xml_atoms: deposit = update_deposit_with_metadata( authenticated_client, deposit_collection, deposit, atom_xml, ) for url in private_get_raw_url_endpoints(deposit_collection, deposit): response = authenticated_client.get(url) assert response.status_code == status.HTTP_200_OK assert response._headers["content-type"][1] == "application/json" actual_data = response.json() assert actual_data == { "origin": { "type": "deposit", "url": "https://hal-test.archives-ouvertes.fr/some-external-id", }, "metadata_raw": metadata_xml_atoms, "metadata_dict": utils.merge(*metadata_xml_raws), "provider": { "metadata": {}, "provider_name": "", "provider_type": "deposit_client", "provider_url": "https://hal-test.archives-ouvertes.fr/", }, "tool": { "configuration": {"sword_version": "2"}, "name": "swh-deposit", "version": __version__, }, "deposit": { "author": SWH_PERSON, "committer": SWH_PERSON, "committer_date": { "negative_utc": False, "offset": 0, "timestamp": {"microseconds": 0, "seconds": 1507389428}, }, "author_date": { "negative_utc": False, "offset": 0, "timestamp": {"microseconds": 0, "seconds": 1507389428}, }, "client": "test", "id": deposit.id, "collection": "test", "revision_parents": [], }, } def test_read_metadata_revision_with_parent( authenticated_client, deposit_collection, partial_deposit, atom_dataset ): """Private read metadata to a deposit (with parent) returns metadata """ deposit = partial_deposit deposit.external_id = "some-external-id" deposit.save() metadata_xml_atoms = [ atom_dataset[atom_key] for atom_key in ["entry-data2", "entry-data3"] ] metadata_xml_raws = [parse_xml(xml) for xml in metadata_xml_atoms] for atom_xml in metadata_xml_atoms: deposit = update_deposit_with_metadata( authenticated_client, deposit_collection, deposit, atom_xml, ) rev_id = "da78a9d4cf1d5d29873693fd496142e3a18c20fa" swhid = "swh:1:rev:%s" % rev_id fake_parent = Deposit( swhid=swhid, client=deposit.client, collection=deposit.collection ) fake_parent.save() deposit.parent = fake_parent deposit.save() for url in private_get_raw_url_endpoints(deposit_collection, deposit): response = authenticated_client.get(url) assert response.status_code == status.HTTP_200_OK assert response._headers["content-type"][1] == "application/json" actual_data = response.json() assert actual_data == { "origin": { "type": "deposit", "url": "https://hal-test.archives-ouvertes.fr/some-external-id", }, "metadata_raw": metadata_xml_atoms, "metadata_dict": utils.merge(*metadata_xml_raws), "provider": { "metadata": {}, "provider_name": "", "provider_type": "deposit_client", "provider_url": "https://hal-test.archives-ouvertes.fr/", }, "tool": { "configuration": {"sword_version": "2"}, "name": "swh-deposit", "version": __version__, }, "deposit": { "author": SWH_PERSON, "committer": SWH_PERSON, "committer_date": { "negative_utc": False, "offset": 0, "timestamp": {"microseconds": 0, "seconds": 1507389428}, }, "author_date": { "negative_utc": False, "offset": 0, "timestamp": {"microseconds": 0, "seconds": 1507389428}, }, "client": "test", "id": deposit.id, "collection": "test", "revision_parents": [rev_id], }, } def test_read_metadata_3( authenticated_client, deposit_collection, partial_deposit, atom_dataset ): """date(Created|Published) provided, uses author/committer date """ deposit = partial_deposit deposit.external_id = "hal-01243065" deposit.save() # add metadata to the deposit with datePublished and dateCreated codemeta_entry_data = ( atom_dataset["metadata"] % """ 2015-04-06T17:08:47+02:00 2017-05-03T16:08:47+02:00 """ ) metadata_xml_atoms = [ atom_dataset["entry-data2"], atom_dataset["entry-data3"], codemeta_entry_data, ] metadata_xml_raws = [parse_xml(xml) for xml in metadata_xml_atoms] for atom_xml in metadata_xml_atoms: update_deposit_with_metadata( authenticated_client, deposit_collection, deposit, atom_xml, ) for url in private_get_raw_url_endpoints(deposit_collection, deposit): response = authenticated_client.get(url) assert response.status_code == status.HTTP_200_OK assert response._headers["content-type"][1] == "application/json" actual_data = response.json() assert actual_data == { "origin": { "type": "deposit", "url": "https://hal-test.archives-ouvertes.fr/hal-01243065", }, "metadata_raw": metadata_xml_atoms, "metadata_dict": utils.merge(*metadata_xml_raws), "provider": { "metadata": {}, "provider_name": "", "provider_type": "deposit_client", "provider_url": "https://hal-test.archives-ouvertes.fr/", }, "tool": { "configuration": {"sword_version": "2"}, "name": "swh-deposit", "version": __version__, }, "deposit": { "author": SWH_PERSON, "committer": SWH_PERSON, "committer_date": { "negative_utc": False, "offset": 120, "timestamp": {"microseconds": 0, "seconds": 1493820527}, }, "author_date": { "negative_utc": False, "offset": 0, "timestamp": {"microseconds": 0, "seconds": 1507389428}, }, "client": deposit_collection.name, "id": deposit.id, "collection": deposit_collection.name, "revision_parents": [], }, } def test_read_metadata_4( authenticated_client, deposit_collection, atom_dataset, partial_deposit ): """dateCreated/datePublished not provided, revision uses complete_date """ deposit = partial_deposit codemeta_entry_data = atom_dataset["metadata"] % "" deposit = update_deposit_with_metadata( authenticated_client, deposit_collection, deposit, codemeta_entry_data ) # will use the deposit completed date as fallback date deposit.complete_date = "2016-04-06" deposit.save() for url in private_get_raw_url_endpoints(deposit_collection, deposit): response = authenticated_client.get(url) assert response.status_code == status.HTTP_200_OK assert response._headers["content-type"][1] == "application/json" actual_data = response.json() assert actual_data == { "origin": { "type": "deposit", "url": "https://hal-test.archives-ouvertes.fr/external-id-partial", }, "metadata_raw": [codemeta_entry_data], "metadata_dict": parse_xml(codemeta_entry_data), "provider": { "metadata": {}, "provider_name": "", "provider_type": "deposit_client", "provider_url": "https://hal-test.archives-ouvertes.fr/", }, "tool": { "configuration": {"sword_version": "2"}, "name": "swh-deposit", "version": __version__, }, "deposit": { "author": SWH_PERSON, "committer": SWH_PERSON, "committer_date": { "negative_utc": False, "offset": 0, "timestamp": {"microseconds": 0, "seconds": 1459900800}, }, "author_date": { "negative_utc": False, "offset": 0, "timestamp": {"microseconds": 0, "seconds": 1459900800}, }, "client": deposit_collection.name, "id": deposit.id, "collection": deposit_collection.name, "revision_parents": [], }, } def test_read_metadata_5( authenticated_client, deposit_collection, partial_deposit, atom_dataset ): """dateCreated/datePublished provided, revision uses author/committer date If multiple dateCreated provided, the first occurrence (of dateCreated) is selected. If multiple datePublished provided, the first occurrence (of datePublished) is selected. """ deposit = partial_deposit # add metadata to the deposit with multiple datePublished/dateCreated codemeta_entry_data = ( atom_dataset["metadata"] % """ 2015-04-06T17:08:47+02:00 2017-05-03T16:08:47+02:00 2016-04-06T17:08:47+02:00 2018-05-03T16:08:47+02:00 """ ) deposit = update_deposit_with_metadata( authenticated_client, deposit_collection, deposit, codemeta_entry_data ) for url in private_get_raw_url_endpoints(deposit_collection, deposit): response = authenticated_client.get(url) assert response.status_code == status.HTTP_200_OK assert response._headers["content-type"][1] == "application/json" actual_data = response.json() assert actual_data == { "origin": { "type": "deposit", "url": "https://hal-test.archives-ouvertes.fr/external-id-partial", }, "metadata_raw": [codemeta_entry_data], "metadata_dict": parse_xml(codemeta_entry_data), "provider": { "metadata": {}, "provider_name": "", "provider_type": "deposit_client", "provider_url": "https://hal-test.archives-ouvertes.fr/", }, "tool": { "configuration": {"sword_version": "2"}, "name": "swh-deposit", "version": __version__, }, "deposit": { "author": SWH_PERSON, "committer": SWH_PERSON, "committer_date": { "negative_utc": False, "offset": 120, "timestamp": {"microseconds": 0, "seconds": 1493820527}, }, "author_date": { "negative_utc": False, "offset": 120, "timestamp": {"microseconds": 0, "seconds": 1428332927}, }, "client": deposit_collection.name, "id": deposit.id, "collection": deposit_collection.name, "revision_parents": [], }, } def test_access_to_nonexisting_deposit_returns_404_response( authenticated_client, deposit_collection, ): """Read unknown collection should return a 404 response """ unknown_id = 999 try: Deposit.objects.get(pk=unknown_id) except Deposit.DoesNotExist: assert True for url in private_get_raw_url_endpoints(deposit_collection, unknown_id): response = authenticated_client.get(url) assert response.status_code == status.HTTP_404_NOT_FOUND msg = "Deposit with id %s does not exist" % unknown_id assert msg in response.content.decode("utf-8") diff --git a/swh/deposit/tests/api/test_deposit_update.py b/swh/deposit/tests/api/test_deposit_update.py index 6be8da7e..360b7d06 100644 --- a/swh/deposit/tests/api/test_deposit_update.py +++ b/swh/deposit/tests/api/test_deposit_update.py @@ -1,805 +1,796 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from io import BytesIO import attr from django.core.files.uploadedfile import InMemoryUploadedFile from django.urls import reverse from rest_framework import status from swh.deposit.api.common import ACCEPT_ARCHIVE_CONTENT_TYPES from swh.deposit.config import ( DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_PARTIAL, - EDIT_SE_IRI, + EDIT_IRI, EM_IRI, + SE_IRI, APIConfig, ) from swh.deposit.models import Deposit, DepositCollection, DepositRequest from swh.deposit.parsers import parse_xml from swh.deposit.tests.common import check_archive, create_arborescence_archive from swh.model.hashutil import hash_to_bytes from swh.model.identifiers import parse_swhid, swhid from swh.model.model import ( MetadataAuthority, MetadataAuthorityType, MetadataFetcher, MetadataTargetType, RawExtrinsicMetadata, ) from swh.storage.interface import PagedResult def test_replace_archive_to_deposit_is_possible( tmp_path, partial_deposit, deposit_collection, authenticated_client, sample_archive, atom_dataset, ): """Replace all archive with another one should return a 204 response """ tmp_path = str(tmp_path) # given deposit = partial_deposit requests = DepositRequest.objects.filter(deposit=deposit, type="archive") assert len(list(requests)) == 1 check_archive(sample_archive["name"], requests[0].archive.name) # we have no metadata for that deposit requests = list(DepositRequest.objects.filter(deposit=deposit, type="metadata")) assert len(requests) == 0 response = authenticated_client.post( - reverse(EDIT_SE_IRI, args=[deposit_collection.name, deposit.id]), + reverse(SE_IRI, args=[deposit_collection.name, deposit.id]), content_type="application/atom+xml;type=entry", data=atom_dataset["entry-data1"], HTTP_SLUG=deposit.external_id, HTTP_IN_PROGRESS=True, ) requests = list(DepositRequest.objects.filter(deposit=deposit, type="metadata")) assert len(requests) == 1 update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit.id]) external_id = "some-external-id-1" archive2 = create_arborescence_archive( tmp_path, "archive2", "file2", b"some other content in file" ) response = authenticated_client.put( update_uri, content_type="application/zip", # as zip data=archive2["data"], # + headers CONTENT_LENGTH=archive2["length"], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=archive2["md5sum"], HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", HTTP_IN_PROGRESS="false", HTTP_CONTENT_DISPOSITION="attachment; filename=%s" % (archive2["name"],), ) assert response.status_code == status.HTTP_204_NO_CONTENT requests = DepositRequest.objects.filter(deposit=deposit, type="archive") assert len(list(requests)) == 1 check_archive(archive2["name"], requests[0].archive.name) # check we did not touch the other parts requests = list(DepositRequest.objects.filter(deposit=deposit, type="metadata")) assert len(requests) == 1 def test_replace_metadata_to_deposit_is_possible( tmp_path, authenticated_client, partial_deposit_with_metadata, deposit_collection, atom_dataset, ): """Replace all metadata with another one should return a 204 response """ # given deposit = partial_deposit_with_metadata raw_metadata0 = atom_dataset["entry-data0"] % deposit.external_id.encode("utf-8") requests_meta = DepositRequest.objects.filter(deposit=deposit, type="metadata") assert len(requests_meta) == 1 request_meta0 = requests_meta[0] assert request_meta0.raw_metadata == raw_metadata0 requests_archive0 = DepositRequest.objects.filter(deposit=deposit, type="archive") assert len(requests_archive0) == 1 - update_uri = reverse(EDIT_SE_IRI, args=[deposit_collection.name, deposit.id]) + update_uri = reverse(EDIT_IRI, args=[deposit_collection.name, deposit.id]) response = authenticated_client.put( update_uri, content_type="application/atom+xml;type=entry", data=atom_dataset["entry-data1"], ) assert response.status_code == status.HTTP_204_NO_CONTENT requests_meta = DepositRequest.objects.filter(deposit=deposit, type="metadata") assert len(requests_meta) == 1 request_meta1 = requests_meta[0] raw_metadata1 = request_meta1.raw_metadata assert raw_metadata1 == atom_dataset["entry-data1"] assert raw_metadata0 != raw_metadata1 assert request_meta0 != request_meta1 # check we did not touch the other parts requests_archive1 = DepositRequest.objects.filter(deposit=deposit, type="archive") assert len(requests_archive1) == 1 assert set(requests_archive0) == set(requests_archive1) def test_add_archive_to_deposit_is_possible( tmp_path, authenticated_client, deposit_collection, partial_deposit_with_metadata, sample_archive, ): """Add another archive to a deposit return a 201 response """ tmp_path = str(tmp_path) deposit = partial_deposit_with_metadata requests = DepositRequest.objects.filter(deposit=deposit, type="archive") assert len(requests) == 1 check_archive(sample_archive["name"], requests[0].archive.name) requests_meta0 = DepositRequest.objects.filter(deposit=deposit, type="metadata") assert len(requests_meta0) == 1 update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit.id]) external_id = "some-external-id-1" archive2 = create_arborescence_archive( tmp_path, "archive2", "file2", b"some other content in file" ) response = authenticated_client.post( update_uri, content_type="application/zip", # as zip data=archive2["data"], # + headers CONTENT_LENGTH=archive2["length"], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=archive2["md5sum"], HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", HTTP_IN_PROGRESS="false", HTTP_CONTENT_DISPOSITION="attachment; filename=%s" % (archive2["name"],), ) assert response.status_code == status.HTTP_201_CREATED requests = DepositRequest.objects.filter(deposit=deposit, type="archive").order_by( "id" ) assert len(requests) == 2 # first archive still exists check_archive(sample_archive["name"], requests[0].archive.name) # a new one was added check_archive(archive2["name"], requests[1].archive.name) # check we did not touch the other parts requests_meta1 = DepositRequest.objects.filter(deposit=deposit, type="metadata") assert len(requests_meta1) == 1 assert set(requests_meta0) == set(requests_meta1) def test_add_metadata_to_deposit_is_possible( authenticated_client, deposit_collection, partial_deposit_with_metadata, atom_dataset, ): """Add metadata with another one should return a 204 response """ deposit = partial_deposit_with_metadata requests = DepositRequest.objects.filter(deposit=deposit, type="metadata") assert len(requests) == 1 requests_archive0 = DepositRequest.objects.filter(deposit=deposit, type="archive") assert len(requests_archive0) == 1 - update_uri = reverse(EDIT_SE_IRI, args=[deposit_collection.name, deposit.id]) + update_uri = reverse(SE_IRI, args=[deposit_collection.name, deposit.id]) atom_entry = atom_dataset["entry-data1"] response = authenticated_client.post( update_uri, content_type="application/atom+xml;type=entry", data=atom_entry ) assert response.status_code == status.HTTP_201_CREATED requests = DepositRequest.objects.filter(deposit=deposit, type="metadata").order_by( "id" ) assert len(requests) == 2 expected_raw_meta0 = atom_dataset["entry-data0"] % ( deposit.external_id.encode("utf-8") ) # a new one was added assert requests[0].raw_metadata == expected_raw_meta0 assert requests[1].raw_metadata == atom_entry # check we did not touch the other parts requests_archive1 = DepositRequest.objects.filter(deposit=deposit, type="archive") assert len(requests_archive1) == 1 assert set(requests_archive0) == set(requests_archive1) def test_add_both_archive_and_metadata_to_deposit( authenticated_client, deposit_collection, partial_deposit_with_metadata, atom_dataset, sample_archive, ): """Scenario: Add both a new archive and new metadata to a partial deposit is ok Response: 201 """ deposit = partial_deposit_with_metadata requests = DepositRequest.objects.filter(deposit=deposit, type="metadata") assert len(requests) == 1 requests_archive0 = DepositRequest.objects.filter(deposit=deposit, type="archive") assert len(requests_archive0) == 1 - update_uri = reverse(EDIT_SE_IRI, args=[deposit_collection.name, deposit.id]) + update_uri = reverse(EDIT_IRI, args=[deposit_collection.name, deposit.id]) archive = InMemoryUploadedFile( BytesIO(sample_archive["data"]), field_name=sample_archive["name"], name=sample_archive["name"], content_type="application/x-tar", size=sample_archive["length"], charset=None, ) data_atom_entry = atom_dataset["entry-data1"] atom_entry = InMemoryUploadedFile( BytesIO(data_atom_entry.encode("utf-8")), field_name="atom0", name="atom0", content_type='application/atom+xml; charset="utf-8"', size=len(data_atom_entry), charset="utf-8", ) - update_uri = reverse(EDIT_SE_IRI, args=[deposit_collection.name, deposit.id]) + update_uri = reverse(SE_IRI, args=[deposit_collection.name, deposit.id]) response = authenticated_client.post( update_uri, format="multipart", data={"archive": archive, "atom_entry": atom_entry,}, ) assert response.status_code == status.HTTP_201_CREATED requests = DepositRequest.objects.filter(deposit=deposit, type="metadata").order_by( "id" ) assert len(requests) == 1 + 1, "New deposit request archive got added" expected_raw_meta0 = atom_dataset["entry-data0"] % ( deposit.external_id.encode("utf-8") ) # a new one was added assert requests[0].raw_metadata == expected_raw_meta0 assert requests[1].raw_metadata == data_atom_entry # check we did not touch the other parts requests_archive1 = DepositRequest.objects.filter(deposit=deposit, type="archive") assert len(requests_archive1) == 1 + 1, "New deposit request metadata got added" def test_post_metadata_empty_post_finalize_deposit_ok( authenticated_client, deposit_collection, partial_deposit_with_metadata, atom_dataset, ): """Empty atom post entry with header in-progress to false transitions deposit to 'deposited' status Response: 200 """ deposit = partial_deposit_with_metadata assert deposit.status == DEPOSIT_STATUS_PARTIAL - update_uri = reverse(EDIT_SE_IRI, args=[deposit_collection.name, deposit.id]) + update_uri = reverse(SE_IRI, args=[deposit_collection.name, deposit.id]) response = authenticated_client.post( update_uri, content_type="application/atom+xml;type=entry", data="", size=0, HTTP_IN_PROGRESS=False, ) assert response.status_code == status.HTTP_200_OK deposit = Deposit.objects.get(pk=deposit.id) assert deposit.status == DEPOSIT_STATUS_DEPOSITED def test_add_metadata_to_unknown_deposit( deposit_collection, authenticated_client, atom_dataset ): """Replacing metadata to unknown deposit should return a 404 response """ unknown_deposit_id = 1000 try: Deposit.objects.get(pk=unknown_deposit_id) except Deposit.DoesNotExist: assert True - url = reverse(EDIT_SE_IRI, args=[deposit_collection, unknown_deposit_id]) + url = reverse(SE_IRI, args=[deposit_collection, unknown_deposit_id]) response = authenticated_client.post( url, content_type="application/atom+xml;type=entry", data=atom_dataset["entry-data1"], ) assert response.status_code == status.HTTP_404_NOT_FOUND response_content = parse_xml(response.content) assert "Unknown collection name" in response_content["sword:error"]["summary"] def test_add_metadata_to_unknown_collection( partial_deposit, authenticated_client, atom_dataset ): """Replacing metadata to unknown deposit should return a 404 response """ deposit = partial_deposit unknown_collection_name = "unknown-collection" try: DepositCollection.objects.get(name=unknown_collection_name) except DepositCollection.DoesNotExist: assert True - url = reverse(EDIT_SE_IRI, args=[unknown_collection_name, deposit.id]) + url = reverse(SE_IRI, args=[unknown_collection_name, deposit.id]) response = authenticated_client.post( url, content_type="application/atom+xml;type=entry", data=atom_dataset["entry-data1"], ) assert response.status_code == status.HTTP_404_NOT_FOUND response_content = parse_xml(response.content) assert "Unknown collection name" in response_content["sword:error"]["summary"] def test_replace_metadata_to_unknown_deposit( authenticated_client, deposit_collection, atom_dataset ): """Adding metadata to unknown deposit should return a 404 response """ unknown_deposit_id = 998 try: Deposit.objects.get(pk=unknown_deposit_id) except Deposit.DoesNotExist: assert True - url = reverse(EDIT_SE_IRI, args=[deposit_collection.name, unknown_deposit_id]) + url = reverse(EDIT_IRI, args=[deposit_collection.name, unknown_deposit_id]) response = authenticated_client.put( url, content_type="application/atom+xml;type=entry", data=atom_dataset["entry-data1"], ) assert response.status_code == status.HTTP_404_NOT_FOUND response_content = parse_xml(response.content) assert ( "Deposit with id %s does not exist" % unknown_deposit_id == response_content["sword:error"]["summary"] ) def test_add_archive_to_unknown_deposit( authenticated_client, deposit_collection, atom_dataset ): """Adding metadata to unknown deposit should return a 404 response """ unknown_deposit_id = 997 try: Deposit.objects.get(pk=unknown_deposit_id) except Deposit.DoesNotExist: assert True url = reverse(EM_IRI, args=[deposit_collection.name, unknown_deposit_id]) response = authenticated_client.post( url, content_type="application/zip", data=atom_dataset["entry-data1"] ) assert response.status_code == status.HTTP_404_NOT_FOUND response_content = parse_xml(response.content) assert ( "Deposit with id %s does not exist" % unknown_deposit_id == response_content["sword:error"]["summary"] ) def test_replace_archive_to_unknown_deposit( authenticated_client, deposit_collection, atom_dataset ): """Replacing archive to unknown deposit should return a 404 response """ unknown_deposit_id = 996 try: Deposit.objects.get(pk=unknown_deposit_id) except Deposit.DoesNotExist: assert True url = reverse(EM_IRI, args=[deposit_collection.name, unknown_deposit_id]) response = authenticated_client.put( url, content_type="application/zip", data=atom_dataset["entry-data1"] ) assert response.status_code == status.HTTP_404_NOT_FOUND response_content = parse_xml(response.content) assert ( "Deposit with id %s does not exist" % unknown_deposit_id == response_content["sword:error"]["summary"] ) def test_post_metadata_to_em_iri_failure( authenticated_client, deposit_collection, partial_deposit, atom_dataset ): """Update (POST) archive with wrong content type should return 400 """ deposit = partial_deposit update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit.id]) response = authenticated_client.post( update_uri, content_type="application/x-gtar-compressed", data=atom_dataset["entry-data1"], ) assert response.status_code == status.HTTP_400_BAD_REQUEST assert b"Packaging format supported is restricted" in response.content for supported_format in ACCEPT_ARCHIVE_CONTENT_TYPES: assert supported_format.encode() in response.content def test_put_metadata_to_em_iri_failure( authenticated_client, deposit_collection, partial_deposit, atom_dataset ): """Update (PUT) archive with wrong content type should return 400 """ # given deposit = partial_deposit # when update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit.id]) response = authenticated_client.put( update_uri, content_type="application/atom+xml;type=entry", data=atom_dataset["entry-data1"], ) # then assert response.status_code == status.HTTP_400_BAD_REQUEST assert b"Packaging format supported is restricted" in response.content for supported_format in ACCEPT_ARCHIVE_CONTENT_TYPES: assert supported_format.encode() in response.content def test_put_update_metadata_and_archive_deposit_partial_nominal( tmp_path, authenticated_client, partial_deposit_with_metadata, deposit_collection, atom_dataset, sample_archive, ): """Scenario: Replace metadata and archive(s) with new ones should be ok Response: 204 """ # given deposit = partial_deposit_with_metadata raw_metadata0 = atom_dataset["entry-data0"] % deposit.external_id.encode("utf-8") requests_meta = DepositRequest.objects.filter(deposit=deposit, type="metadata") assert len(requests_meta) == 1 request_meta0 = requests_meta[0] assert request_meta0.raw_metadata == raw_metadata0 requests_archive0 = DepositRequest.objects.filter(deposit=deposit, type="archive") assert len(requests_archive0) == 1 archive = InMemoryUploadedFile( BytesIO(sample_archive["data"]), field_name=sample_archive["name"], name=sample_archive["name"], content_type="application/x-tar", size=sample_archive["length"], charset=None, ) data_atom_entry = atom_dataset["entry-data1"] atom_entry = InMemoryUploadedFile( BytesIO(data_atom_entry.encode("utf-8")), field_name="atom0", name="atom0", content_type='application/atom+xml; charset="utf-8"', size=len(data_atom_entry), charset="utf-8", ) - update_uri = reverse(EDIT_SE_IRI, args=[deposit_collection.name, deposit.id]) + update_uri = reverse(EDIT_IRI, args=[deposit_collection.name, deposit.id]) response = authenticated_client.put( update_uri, format="multipart", data={"archive": archive, "atom_entry": atom_entry,}, ) assert response.status_code == status.HTTP_204_NO_CONTENT # check we updated the metadata part requests_meta = DepositRequest.objects.filter(deposit=deposit, type="metadata") assert len(requests_meta) == 1 request_meta1 = requests_meta[0] raw_metadata1 = request_meta1.raw_metadata assert raw_metadata1 == data_atom_entry assert raw_metadata0 != raw_metadata1 assert request_meta0 != request_meta1 # and the archive part requests_archive1 = DepositRequest.objects.filter(deposit=deposit, type="archive") assert len(requests_archive1) == 1 assert set(requests_archive0) != set(requests_archive1) def test_put_update_metadata_done_deposit_nominal( tmp_path, authenticated_client, complete_deposit, deposit_collection, atom_dataset, sample_data, swh_storage, ): """Nominal scenario, client send an update of metadata on a deposit with status "done" with an existing swhid. Such swhid has its metadata updated accordingly both in the deposit backend and in the metadata storage. Response: 204 """ deposit_swhid = parse_swhid(complete_deposit.swhid) assert deposit_swhid.object_type == "directory" directory_id = hash_to_bytes(deposit_swhid.object_id) # directory targeted by the complete_deposit does not exist in the storage assert list(swh_storage.directory_missing([directory_id])) == [directory_id] # so let's create a directory reference in the storage (current deposit targets an # unknown swhid) existing_directory = sample_data.directory swh_storage.directory_add([existing_directory]) assert list(swh_storage.directory_missing([existing_directory.id])) == [] # and patch one complete deposit swhid so it targets said reference complete_deposit.swhid = swhid("directory", existing_directory.id) complete_deposit.save() actual_existing_requests_archive = DepositRequest.objects.filter( deposit=complete_deposit, type="archive" ) nb_archives = len(actual_existing_requests_archive) actual_existing_requests_metadata = DepositRequest.objects.filter( deposit=complete_deposit, type="metadata" ) nb_metadata = len(actual_existing_requests_metadata) - update_uri = reverse( - EDIT_SE_IRI, args=[deposit_collection.name, complete_deposit.id] - ) + update_uri = reverse(EDIT_IRI, args=[deposit_collection.name, complete_deposit.id]) response = authenticated_client.put( update_uri, content_type="application/atom+xml;type=entry", data=atom_dataset["entry-data1"], HTTP_X_CHECK_SWHID=complete_deposit.swhid, ) assert response.status_code == status.HTTP_204_NO_CONTENT new_requests_meta = DepositRequest.objects.filter( deposit=complete_deposit, type="metadata" ) assert len(new_requests_meta) == nb_metadata + 1 request_meta1 = new_requests_meta[0] raw_metadata1 = request_meta1.raw_metadata assert raw_metadata1 == atom_dataset["entry-data1"] # check we did not touch the other parts requests_archive1 = DepositRequest.objects.filter( deposit=complete_deposit, type="archive" ) assert len(requests_archive1) == nb_archives assert set(actual_existing_requests_archive) == set(requests_archive1) # Ensure metadata stored in the metadata storage is consistent metadata_authority = MetadataAuthority( type=MetadataAuthorityType.DEPOSIT_CLIENT, url=complete_deposit.client.provider_url, metadata={"name": complete_deposit.client.last_name}, ) actual_authority = swh_storage.metadata_authority_get( MetadataAuthorityType.DEPOSIT_CLIENT, url=complete_deposit.client.provider_url ) assert actual_authority == metadata_authority config = APIConfig() metadata_fetcher = MetadataFetcher( name=config.tool["name"], version=config.tool["version"], metadata=config.tool["configuration"], ) actual_fetcher = swh_storage.metadata_fetcher_get( config.tool["name"], config.tool["version"] ) assert actual_fetcher == metadata_fetcher directory_swhid = parse_swhid(complete_deposit.swhid) page_results = swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.DIRECTORY, directory_swhid, metadata_authority ) assert page_results == PagedResult( results=[ RawExtrinsicMetadata( type=MetadataTargetType.DIRECTORY, target=directory_swhid, discovery_date=request_meta1.date, authority=attr.evolve(metadata_authority, metadata=None), fetcher=attr.evolve(metadata_fetcher, metadata=None), format="sword-v2-atom-codemeta", metadata=raw_metadata1.encode(), origin=complete_deposit.origin_url, ) ], next_page_token=None, ) def test_put_update_metadata_done_deposit_failure_mismatched_swhid( tmp_path, authenticated_client, complete_deposit, deposit_collection, atom_dataset, swh_storage, ): """failure: client updates metadata on deposit with SWHID not matching the deposit's. Response: 400 """ incorrect_swhid = "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea" assert complete_deposit.swhid != incorrect_swhid - update_uri = reverse( - EDIT_SE_IRI, args=[deposit_collection.name, complete_deposit.id] - ) + update_uri = reverse(EDIT_IRI, args=[deposit_collection.name, complete_deposit.id]) response = authenticated_client.put( update_uri, content_type="application/atom+xml;type=entry", data=atom_dataset["entry-data1"], HTTP_X_CHECK_SWHID=incorrect_swhid, ) assert response.status_code == status.HTTP_400_BAD_REQUEST assert b"Mismatched provided SWHID" in response.content def test_put_update_metadata_done_deposit_failure_malformed_xml( tmp_path, authenticated_client, complete_deposit, deposit_collection, atom_dataset, swh_storage, ): """failure: client updates metadata on deposit done with a malformed xml Response: 400 """ - update_uri = reverse( - EDIT_SE_IRI, args=[deposit_collection.name, complete_deposit.id] - ) + update_uri = reverse(EDIT_IRI, args=[deposit_collection.name, complete_deposit.id]) response = authenticated_client.put( update_uri, content_type="application/atom+xml;type=entry", data=atom_dataset["entry-data-ko"], HTTP_X_CHECK_SWHID=complete_deposit.swhid, ) assert response.status_code == status.HTTP_400_BAD_REQUEST assert b"Malformed xml metadata" in response.content def test_put_update_metadata_done_deposit_failure_empty_xml( tmp_path, authenticated_client, complete_deposit, deposit_collection, atom_dataset, swh_storage, ): """failure: client updates metadata on deposit done with an empty xml. Response: 400 """ - update_uri = reverse( - EDIT_SE_IRI, args=[deposit_collection.name, complete_deposit.id] - ) + update_uri = reverse(EDIT_IRI, args=[deposit_collection.name, complete_deposit.id]) for atom_key in ["entry-data-empty-body", "entry-data-empty-body-no-namespace"]: atom_content = atom_dataset[atom_key] response = authenticated_client.put( update_uri, content_type="application/atom+xml;type=entry", data=atom_content, HTTP_X_CHECK_SWHID=complete_deposit.swhid, ) assert response.status_code == status.HTTP_400_BAD_REQUEST assert b"Empty body request is not supported" in response.content def test_put_update_metadata_done_deposit_failure_functional_checks( tmp_path, authenticated_client, complete_deposit, deposit_collection, atom_dataset, swh_storage, ): """failure: client updates metadata on deposit done without required incomplete metadata Response: 400 """ - update_uri = reverse( - EDIT_SE_IRI, args=[deposit_collection.name, complete_deposit.id] - ) + update_uri = reverse(EDIT_IRI, args=[deposit_collection.name, complete_deposit.id]) response = authenticated_client.put( update_uri, content_type="application/atom+xml;type=entry", # no title, nor author, nor name fields data=atom_dataset["entry-data-fail-metadata-functional-checks"], HTTP_X_CHECK_SWHID=complete_deposit.swhid, ) assert response.status_code == status.HTTP_400_BAD_REQUEST assert b"Functional metadata checks failure" in response.content # detail on the errors assert b"- Mandatory fields are missing (author)" in response.content assert ( b"- Mandatory alternate fields are missing (name or title)" in response.content ) diff --git a/swh/deposit/tests/conftest.py b/swh/deposit/tests/conftest.py index cb2c92e3..783633bd 100644 --- a/swh/deposit/tests/conftest.py +++ b/swh/deposit/tests/conftest.py @@ -1,445 +1,445 @@ # Copyright (C) 2019-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import base64 from functools import partial import os import re from typing import Mapping from django.test.utils import setup_databases # type: ignore from django.urls import reverse import psycopg2 from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT import pytest from rest_framework import status from rest_framework.test import APIClient import yaml from swh.core.config import read from swh.core.pytest_plugin import get_response_cb from swh.deposit.config import ( COL_IRI, DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_LOAD_FAILURE, DEPOSIT_STATUS_LOAD_SUCCESS, DEPOSIT_STATUS_PARTIAL, DEPOSIT_STATUS_REJECTED, DEPOSIT_STATUS_VERIFIED, - EDIT_SE_IRI, + SE_IRI, setup_django_for, ) from swh.deposit.parsers import parse_xml from swh.deposit.tests.common import create_arborescence_archive from swh.model.identifiers import DIRECTORY, REVISION, SNAPSHOT, swhid from swh.scheduler import get_scheduler # mypy is asked to ignore the import statement above because setup_databases # is not part of the d.t.utils.__all__ variable. TEST_USER = { "username": "test", "password": "password", "email": "test@example.org", "provider_url": "https://hal-test.archives-ouvertes.fr/", "domain": "archives-ouvertes.fr/", "collection": {"name": "test"}, } def pytest_configure(): setup_django_for("testing") @pytest.fixture def requests_mock_datadir(datadir, requests_mock_datadir): """Override default behavior to deal with put/post methods """ cb = partial(get_response_cb, datadir=datadir) requests_mock_datadir.put(re.compile("https://"), body=cb) requests_mock_datadir.post(re.compile("https://"), body=cb) return requests_mock_datadir @pytest.fixture() def deposit_config(swh_scheduler_config, swh_storage_backend_config): return { "max_upload_size": 500, "extraction_dir": "/tmp/swh-deposit/test/extraction-dir", "checks": False, "scheduler": {"cls": "local", **swh_scheduler_config,}, "storage_metadata": swh_storage_backend_config, } @pytest.fixture() def deposit_config_path(tmp_path, monkeypatch, deposit_config): conf_path = os.path.join(tmp_path, "deposit.yml") with open(conf_path, "w") as f: f.write(yaml.dump(deposit_config)) monkeypatch.setenv("SWH_CONFIG_FILENAME", conf_path) return conf_path @pytest.fixture(autouse=True) def deposit_autoconfig(deposit_config_path): """Enforce config for deposit classes inherited from APIConfig.""" cfg = read(deposit_config_path) if "scheduler" in cfg: # scheduler setup: require the check-deposit and load-deposit tasks scheduler = get_scheduler(**cfg["scheduler"]) task_types = [ { "type": "check-deposit", "backend_name": "swh.deposit.loader.tasks.ChecksDepositTsk", "description": "Check deposit metadata/archive before loading", "num_retries": 3, }, { "type": "load-deposit", "backend_name": "swh.loader.package.deposit.tasks.LoadDeposit", "description": "Loading deposit archive into swh archive", "num_retries": 3, }, ] for task_type in task_types: scheduler.create_task_type(task_type) @pytest.fixture(scope="session") def django_db_setup(request, django_db_blocker, postgresql_proc): from django.conf import settings settings.DATABASES["default"].update( { ("ENGINE", "django.db.backends.postgresql"), ("NAME", "tests"), ("USER", postgresql_proc.user), # noqa ("HOST", postgresql_proc.host), # noqa ("PORT", postgresql_proc.port), # noqa } ) with django_db_blocker.unblock(): setup_databases( verbosity=request.config.option.verbose, interactive=False, keepdb=False ) def execute_sql(sql): """Execute sql to postgres db""" with psycopg2.connect(database="postgres") as conn: conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) cur = conn.cursor() cur.execute(sql) @pytest.fixture(autouse=True, scope="session") def swh_proxy(): """Automatically inject this fixture in all tests to ensure no outside connection takes place. """ os.environ["http_proxy"] = "http://localhost:999" os.environ["https_proxy"] = "http://localhost:999" def create_deposit_collection(collection_name: str): """Create a deposit collection with name collection_name """ from swh.deposit.models import DepositCollection try: collection = DepositCollection._default_manager.get(name=collection_name) except DepositCollection.DoesNotExist: collection = DepositCollection(name=collection_name) collection.save() return collection def deposit_collection_factory(collection_name=TEST_USER["collection"]["name"]): @pytest.fixture def _deposit_collection(db, collection_name=collection_name): return create_deposit_collection(collection_name) return _deposit_collection deposit_collection = deposit_collection_factory() deposit_another_collection = deposit_collection_factory("another-collection") @pytest.fixture def deposit_user(db, deposit_collection): """Create/Return the test_user "test" """ from swh.deposit.models import DepositClient try: user = DepositClient._default_manager.get(username=TEST_USER["username"]) except DepositClient.DoesNotExist: user = DepositClient._default_manager.create_user( username=TEST_USER["username"], email=TEST_USER["email"], password=TEST_USER["password"], provider_url=TEST_USER["provider_url"], domain=TEST_USER["domain"], ) user.collections = [deposit_collection.id] user.save() return user @pytest.fixture def client(): """Override pytest-django one which does not work for djangorestframework. """ return APIClient() # <- drf's client @pytest.fixture def authenticated_client(client, deposit_user): """Returned a logged client This also patched the client instance to keep a reference on the associated deposit_user. """ _token = "%s:%s" % (deposit_user.username, TEST_USER["password"]) token = base64.b64encode(_token.encode("utf-8")) authorization = "Basic %s" % token.decode("utf-8") client.credentials(HTTP_AUTHORIZATION=authorization) client.deposit_client = deposit_user yield client client.logout() @pytest.fixture def sample_archive(tmp_path): """Returns a sample archive """ tmp_path = str(tmp_path) # pytest version limitation in previous version archive = create_arborescence_archive( tmp_path, "archive1", "file1", b"some content in file" ) return archive @pytest.fixture def atom_dataset(datadir) -> Mapping[str, str]: """Compute the paths to atom files. Returns: Dict of atom name per content (bytes) """ atom_path = os.path.join(datadir, "atom") data = {} for filename in os.listdir(atom_path): filepath = os.path.join(atom_path, filename) with open(filepath, "rb") as f: raw_content = f.read().decode("utf-8") # Keep the filename without extension atom_name = filename.split(".")[0] data[atom_name] = raw_content return data def create_deposit( authenticated_client, collection_name: str, sample_archive, external_id: str, deposit_status=DEPOSIT_STATUS_DEPOSITED, ): """Create a skeleton shell deposit """ url = reverse(COL_IRI, args=[collection_name]) # when response = authenticated_client.post( url, content_type="application/zip", # as zip data=sample_archive["data"], # + headers CONTENT_LENGTH=sample_archive["length"], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=sample_archive["md5sum"], HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", HTTP_IN_PROGRESS="false", HTTP_CONTENT_DISPOSITION="attachment; filename=%s" % (sample_archive["name"]), ) # then assert response.status_code == status.HTTP_201_CREATED from swh.deposit.models import Deposit deposit = Deposit._default_manager.get(external_id=external_id) if deposit.status != deposit_status: deposit.status = deposit_status deposit.save() assert deposit.status == deposit_status return deposit def create_binary_deposit( authenticated_client, collection_name: str, sample_archive, external_id: str, deposit_status: str = DEPOSIT_STATUS_DEPOSITED, atom_dataset: Mapping[str, bytes] = {}, ): """Create a deposit with both metadata and archive set. Then alters its status to `deposit_status`. """ deposit = create_deposit( authenticated_client, collection_name, sample_archive, external_id=external_id, deposit_status=DEPOSIT_STATUS_PARTIAL, ) response = authenticated_client.post( - reverse(EDIT_SE_IRI, args=[collection_name, deposit.id]), + reverse(SE_IRI, args=[collection_name, deposit.id]), content_type="application/atom+xml;type=entry", data=atom_dataset["entry-data0"] % deposit.external_id.encode("utf-8"), HTTP_SLUG=deposit.external_id, HTTP_IN_PROGRESS="true", ) assert response.status_code == status.HTTP_201_CREATED assert deposit.status == DEPOSIT_STATUS_PARTIAL from swh.deposit.models import Deposit deposit = Deposit._default_manager.get(pk=deposit.id) if deposit.status != deposit_status: deposit.status = deposit_status deposit.save() assert deposit.status == deposit_status return deposit def deposit_factory(deposit_status=DEPOSIT_STATUS_DEPOSITED): """Build deposit with a specific status """ @pytest.fixture() def _deposit( sample_archive, deposit_collection, authenticated_client, deposit_status=deposit_status, ): external_id = "external-id-%s" % deposit_status return create_deposit( authenticated_client, deposit_collection.name, sample_archive, external_id=external_id, deposit_status=deposit_status, ) return _deposit deposited_deposit = deposit_factory() rejected_deposit = deposit_factory(deposit_status=DEPOSIT_STATUS_REJECTED) partial_deposit = deposit_factory(deposit_status=DEPOSIT_STATUS_PARTIAL) verified_deposit = deposit_factory(deposit_status=DEPOSIT_STATUS_VERIFIED) completed_deposit = deposit_factory(deposit_status=DEPOSIT_STATUS_LOAD_SUCCESS) failed_deposit = deposit_factory(deposit_status=DEPOSIT_STATUS_LOAD_FAILURE) @pytest.fixture def partial_deposit_with_metadata( sample_archive, deposit_collection, authenticated_client, atom_dataset ): """Returns deposit with archive and metadata provided, status 'partial' """ return create_binary_deposit( authenticated_client, deposit_collection.name, sample_archive, external_id="external-id-partial", deposit_status=DEPOSIT_STATUS_PARTIAL, atom_dataset=atom_dataset, ) @pytest.fixture def partial_deposit_only_metadata( deposit_collection, authenticated_client, atom_dataset ): response = authenticated_client.post( reverse(COL_IRI, args=[deposit_collection.name]), content_type="application/atom+xml;type=entry", data=atom_dataset["entry-data1"], HTTP_SLUG="external-id-partial", HTTP_IN_PROGRESS=True, ) assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(response.content) deposit_id = response_content["deposit_id"] from swh.deposit.models import Deposit deposit = Deposit._default_manager.get(pk=deposit_id) assert deposit.status == DEPOSIT_STATUS_PARTIAL return deposit @pytest.fixture def complete_deposit(sample_archive, deposit_collection, authenticated_client): """Returns a completed deposit (load success) """ deposit = create_deposit( authenticated_client, deposit_collection.name, sample_archive, external_id="external-id-complete", deposit_status=DEPOSIT_STATUS_LOAD_SUCCESS, ) origin = "https://hal.archives-ouvertes.fr/hal-01727745" directory_id = "42a13fc721c8716ff695d0d62fc851d641f3a12b" revision_id = "548b3c0a2bb43e1fca191e24b5803ff6b3bc7c10" snapshot_id = "e5e82d064a9c3df7464223042e0c55d72ccff7f0" deposit.swhid = swhid(DIRECTORY, directory_id) deposit.swhid_context = swhid( DIRECTORY, directory_id, metadata={ "origin": origin, "visit": swhid(SNAPSHOT, snapshot_id), "anchor": swhid(REVISION, revision_id), "path": "/", }, ) deposit.save() return deposit @pytest.fixture() def tmp_path(tmp_path): return str(tmp_path) # issue with oldstable's pytest version