diff --git a/swh/deposit/api/collection.py b/swh/deposit/api/collection.py index d66ee47d..b877ccf6 100644 --- a/swh/deposit/api/collection.py +++ b/swh/deposit/api/collection.py @@ -1,175 +1,175 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from typing import Optional, Tuple from django.shortcuts import render from rest_framework import status from rest_framework.generics import ListAPIView from ..config import DEPOSIT_STATUS_LOAD_SUCCESS, EDIT_IRI from ..models import Deposit from ..parsers import ( SWHAtomEntryParser, SWHFileUploadTarParser, SWHFileUploadZipParser, SWHMultiPartParser, ) from .common import ( ACCEPT_ARCHIVE_CONTENT_TYPES, APIPost, ParsedRequestHeaders, Receipt, get_collection_by_name, ) from .utils import DefaultPagination, DepositSerializer class CollectionAPI(ListAPIView, APIPost): """Deposit request class defining api endpoints for sword deposit. What's known as 'Col-IRI' in the sword specification. HTTP verbs supported: GET and POST """ parser_classes = ( SWHMultiPartParser, SWHFileUploadZipParser, SWHFileUploadTarParser, SWHAtomEntryParser, ) serializer_class = DepositSerializer pagination_class = DefaultPagination def get(self, request, *args, **kwargs): """List the user's collection if the user has access to said collection. """ self.checks(request, kwargs["collection_name"]) paginated_result = super().get(request, *args, **kwargs) data = paginated_result.data # Build pagination link headers links = [] for link_name in ["next", "previous"]: link = data.get(link_name) if link is None: continue links.append(f'<{link}>; rel="{link_name}"') response = render( request, "deposit/collection_list.xml", context={ "count": data["count"], "results": [dict(d) for d in data["results"]], }, content_type="application/xml", status=status.HTTP_200_OK, ) - response._headers["Link"] = ",".join(links) + response["Link"] = ",".join(links) return response def get_queryset(self): """List the deposits for the authenticated user (pagination is handled by the `pagination_class` class attribute). """ return Deposit.objects.filter(client=self.request.user.id).order_by("id") def process_post( self, req, headers: ParsedRequestHeaders, collection_name: str, deposit: Optional[Deposit] = None, ) -> Tuple[int, str, Receipt]: """Create a first deposit as: - archive deposit (1 zip) - multipart (1 zip + 1 atom entry) - atom entry Args: req (Request): the request holding the information to parse and inject in db collection_name (str): the associated client Returns: An http response (HttpResponse) according to the situation. If everything is ok, a 201 response (created) with a deposit receipt. Raises: - archive deposit: - 400 (bad request) if the request is not providing an external identifier - 403 (forbidden) if the length of the archive exceeds the max size configured - 412 (precondition failed) if the length or hash provided mismatch the reality of the archive. - 415 (unsupported media type) if a wrong media type is provided - multipart deposit: - 400 (bad request) if the request is not providing an external identifier - 412 (precondition failed) if the potentially md5 hash provided mismatch the reality of the archive - 415 (unsupported media type) if a wrong media type is provided - Atom entry deposit: - 400 (bad request) if the request is not providing an external identifier - 400 (bad request) if the request's body is empty - 415 (unsupported media type) if a wrong media type is provided """ assert deposit is None deposit = self._deposit_create(req, collection_name, external_id=headers.slug) if req.content_type in ACCEPT_ARCHIVE_CONTENT_TYPES: receipt = self._binary_upload(req, headers, collection_name, deposit) elif req.content_type.startswith("multipart/"): receipt = self._multipart_upload(req, headers, collection_name, deposit) else: receipt = self._atom_entry(req, headers, collection_name, deposit) return status.HTTP_201_CREATED, EDIT_IRI, receipt def _deposit_create( self, request, collection_name: str, external_id: Optional[str] ) -> Deposit: collection = get_collection_by_name(collection_name) client = self.get_client(request) deposit_parent: Optional[Deposit] = None if external_id: # TODO: delete this when clients stopped relying on the slug try: # find a deposit parent (same external id, status load to success) deposit_parent = ( Deposit.objects.filter( client=client, external_id=external_id, status=DEPOSIT_STATUS_LOAD_SUCCESS, ) .order_by("-id")[0:1] .get() ) except Deposit.DoesNotExist: # then no parent for that deposit, deposit_parent already None pass return Deposit( collection=collection, external_id=external_id or "", client=client, parent=deposit_parent, ) diff --git a/swh/deposit/api/common.py b/swh/deposit/api/common.py index cbf97a99..56bb69a0 100644 --- a/swh/deposit/api/common.py +++ b/swh/deposit/api/common.py @@ -1,1268 +1,1268 @@ # Copyright (C) 2017-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from abc import ABCMeta, abstractmethod import datetime import hashlib import json from typing import Any, Dict, Optional, Sequence, Tuple, Type, Union import uuid import attr from django.core.files.uploadedfile import UploadedFile from django.http import FileResponse, HttpResponse from django.shortcuts import render from django.template.loader import render_to_string from django.urls import reverse from django.utils import timezone from rest_framework import status from rest_framework.authentication import BaseAuthentication, BasicAuthentication from rest_framework.permissions import BasePermission, IsAuthenticated from rest_framework.request import Request from rest_framework.views import APIView from swh.deposit.api.checks import check_metadata from swh.deposit.api.converters import convert_status_detail from swh.deposit.auth import HasDepositPermission, KeycloakBasicAuthentication from swh.deposit.models import Deposit from swh.deposit.utils import compute_metadata_context from swh.model import hashutil from swh.model.identifiers import ( ExtendedObjectType, ExtendedSWHID, QualifiedSWHID, ValidationError, ) from swh.model.model import ( MetadataAuthority, MetadataAuthorityType, Origin, RawExtrinsicMetadata, ) from swh.scheduler.utils import create_oneshot_task_dict from ..config import ( ARCHIVE_KEY, ARCHIVE_TYPE, CONT_FILE_IRI, DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_LOAD_SUCCESS, DEPOSIT_STATUS_PARTIAL, EDIT_IRI, EM_IRI, METADATA_KEY, METADATA_TYPE, RAW_METADATA_KEY, SE_IRI, STATE_IRI, APIConfig, ) from ..errors import ( BAD_REQUEST, CHECKSUM_MISMATCH, ERROR_CONTENT, FORBIDDEN, MAX_UPLOAD_SIZE_EXCEEDED, MEDIATION_NOT_ALLOWED, METHOD_NOT_ALLOWED, NOT_FOUND, PARSING_ERROR, DepositError, ParserError, ) from ..models import DepositClient, DepositCollection, DepositRequest from ..parsers import parse_xml from ..utils import extended_swhid_from_qualified, parse_swh_reference ACCEPT_PACKAGINGS = ["http://purl.org/net/sword/package/SimpleZip"] ACCEPT_ARCHIVE_CONTENT_TYPES = ["application/zip", "application/x-tar"] @attr.s class ParsedRequestHeaders: content_type = attr.ib(type=str) content_length = attr.ib(type=Optional[int]) in_progress = attr.ib(type=bool) content_disposition = attr.ib(type=Optional[str]) content_md5sum = attr.ib(type=Optional[bytes]) packaging = attr.ib(type=Optional[str]) slug = attr.ib(type=Optional[str]) on_behalf_of = attr.ib(type=Optional[str]) metadata_relevant = attr.ib(type=Optional[str]) swhid = attr.ib(type=Optional[str]) @attr.s class Receipt: """Data computed while handling the request body that will be served in the Deposit Receipt.""" deposit_id = attr.ib(type=int) deposit_date = attr.ib(type=datetime.datetime) status = attr.ib(type=str) archive = attr.ib(type=Optional[str]) def _compute_md5(filehandler: UploadedFile) -> bytes: h = hashlib.md5() for chunk in filehandler: h.update(chunk) # type: ignore return h.digest() def get_deposit_by_id( deposit_id: int, collection_name: Optional[str] = None ) -> Deposit: """Gets an existing Deposit object if it exists, or raises `DepositError`. If `collection` is not None, also checks the deposit belongs to the collection.""" try: deposit = Deposit.objects.get(pk=deposit_id) except Deposit.DoesNotExist: raise DepositError(NOT_FOUND, f"Deposit {deposit_id} does not exist") if collection_name and deposit.collection.name != collection_name: get_collection_by_name(collection_name) # raises if does not exist raise DepositError( NOT_FOUND, f"Deposit {deposit_id} does not belong to collection {collection_name}", ) return deposit def get_collection_by_name(collection_name: str): """Gets an existing Deposit object if it exists, or raises `DepositError`.""" try: collection = DepositCollection.objects.get(name=collection_name) except DepositCollection.DoesNotExist: raise DepositError(NOT_FOUND, f"Unknown collection name {collection_name}") assert collection is not None return collection def guess_deposit_origin_url(deposit: Deposit): """Guesses an origin url for the given deposit.""" external_id = deposit.external_id if not external_id: # The client provided neither an origin_url nor a slug. That's inconvenient, # but SWORD requires we support it. So let's generate a random slug. external_id = str(uuid.uuid4()) return "%s/%s" % (deposit.client.provider_url.rstrip("/"), external_id) def check_client_origin(client: DepositClient, origin_url: str): provider_url = client.provider_url.rstrip("/") + "/" if not origin_url.startswith(provider_url): raise DepositError( FORBIDDEN, f"Cannot create origin {origin_url}, it must start with {provider_url}", ) class APIBase(APIConfig, APIView, metaclass=ABCMeta): """Base deposit request class sharing multiple common behaviors. """ _client: Optional[DepositClient] = None def __init__(self): super().__init__() auth_provider = self.config.get("authentication_provider") if auth_provider == "basic": self.authentication_classes: Sequence[Type[BaseAuthentication]] = ( BasicAuthentication, ) self.permission_classes: Sequence[Type[BasePermission]] = (IsAuthenticated,) elif auth_provider == "keycloak": self.authentication_classes: Sequence[Type[BaseAuthentication]] = ( KeycloakBasicAuthentication, ) self.permission_classes: Sequence[Type[BasePermission]] = ( IsAuthenticated, HasDepositPermission, ) else: raise ValueError( "Configuration key 'authentication_provider' should be provided with" f"either 'basic' or 'keycloak' value not {auth_provider!r}." ) def _read_headers(self, request: Request) -> ParsedRequestHeaders: """Read and unify the necessary headers from the request (those are not stored in the same location or not properly formatted). Args: request: Input request Returns: Dictionary with the following keys (some associated values may be None): - content-type - content-length - in-progress - content-disposition - packaging - slug - on-behalf-of """ meta = request._request.META content_length = meta.get("CONTENT_LENGTH") if content_length and isinstance(content_length, str): content_length = int(content_length) # final deposit if not provided in_progress = meta.get("HTTP_IN_PROGRESS", False) if isinstance(in_progress, str): in_progress = in_progress.lower() == "true" content_md5sum = meta.get("HTTP_CONTENT_MD5") if content_md5sum: content_md5sum = bytes.fromhex(content_md5sum) return ParsedRequestHeaders( content_type=request.content_type, content_length=content_length, in_progress=in_progress, content_disposition=meta.get("HTTP_CONTENT_DISPOSITION"), content_md5sum=content_md5sum, packaging=meta.get("HTTP_PACKAGING"), slug=meta.get("HTTP_SLUG"), on_behalf_of=meta.get("HTTP_ON_BEHALF_OF"), metadata_relevant=meta.get("HTTP_METADATA_RELEVANT"), swhid=meta.get("HTTP_X_CHECK_SWHID"), ) def _deposit_put(self, deposit: Deposit, in_progress: bool = False) -> None: """Save/Update a deposit in db. Args: deposit: deposit being updated/created in_progress: deposit status """ if in_progress is False: self._complete_deposit(deposit) else: deposit.status = DEPOSIT_STATUS_PARTIAL deposit.save() def _complete_deposit(self, deposit: Deposit) -> None: """Marks the deposit as 'deposited', then schedule a check task if configured to do so.""" deposit.complete_date = timezone.now() deposit.status = DEPOSIT_STATUS_DEPOSITED deposit.save() if not deposit.origin_url: deposit.origin_url = guess_deposit_origin_url(deposit) if self.config["checks"]: scheduler = self.scheduler if deposit.status == DEPOSIT_STATUS_DEPOSITED and not deposit.check_task_id: task = create_oneshot_task_dict( "check-deposit", collection=deposit.collection.name, deposit_id=deposit.id, retries_left=3, ) check_task_id = scheduler.create_tasks([task])[0]["id"] deposit.check_task_id = check_task_id deposit.save() def _deposit_request_put( self, deposit: Deposit, deposit_request_data: Dict[str, Any], replace_metadata: bool = False, replace_archives: bool = False, ) -> DepositRequest: """Save a deposit request with metadata attached to a deposit. Args: deposit: The deposit concerned by the request deposit_request_data: The dictionary with at most 2 deposit request types (archive, metadata) to associate to the deposit replace_metadata: Flag defining if we add or update existing metadata to the deposit replace_archives: Flag defining if we add or update archives to existing deposit Returns: the DepositRequest object stored in the backend """ if replace_metadata: DepositRequest.objects.filter(deposit=deposit, type=METADATA_TYPE).delete() if replace_archives: DepositRequest.objects.filter(deposit=deposit, type=ARCHIVE_TYPE).delete() deposit_request = None archive_file = deposit_request_data.get(ARCHIVE_KEY) if archive_file: deposit_request = DepositRequest( type=ARCHIVE_TYPE, deposit=deposit, archive=archive_file ) deposit_request.save() metadata = deposit_request_data.get(METADATA_KEY) if metadata: raw_metadata = deposit_request_data[RAW_METADATA_KEY] deposit_request = DepositRequest( type=METADATA_TYPE, deposit=deposit, metadata=metadata, raw_metadata=raw_metadata.decode("utf-8"), ) deposit_request.save() assert deposit_request is not None return deposit_request def _delete_archives(self, collection_name: str, deposit: Deposit) -> Dict: """Delete archive references from the deposit id. """ DepositRequest.objects.filter(deposit=deposit, type=ARCHIVE_TYPE).delete() return {} def _delete_deposit(self, collection_name: str, deposit: Deposit) -> Dict: """Delete deposit reference. Args: collection_name: Client's collection deposit: The deposit to delete Returns Empty dict when ok. Dict with error key to describe the failure. """ if deposit.collection.name != collection_name: summary = "Cannot delete a deposit from another collection" description = "Deposit %s does not belong to the collection %s" % ( deposit.id, collection_name, ) raise DepositError( BAD_REQUEST, summary=summary, verbose_description=description ) DepositRequest.objects.filter(deposit=deposit).delete() deposit.delete() return {} def _check_file_length( self, filehandler: UploadedFile, content_length: Optional[int] = None, ) -> None: """Check the filehandler passed as argument has exactly the expected content_length Args: filehandler: The file to check content_length: the expected length if provided. Raises: DepositError if the actual length does not match """ max_upload_size = self.config["max_upload_size"] if content_length: length = filehandler.size if length != content_length: raise DepositError(status.HTTP_412_PRECONDITION_FAILED, "Wrong length") if filehandler.size > max_upload_size: raise DepositError( MAX_UPLOAD_SIZE_EXCEEDED, f"Upload size limit exceeded (max {max_upload_size} bytes)." "Please consider sending the archive in multiple steps.", ) def _check_file_md5sum( self, filehandler: UploadedFile, md5sum: Optional[bytes], ) -> None: """Check the filehandler passed as argument has the expected md5sum Args: filehandler: The file to check md5sum: md5 hash expected from the file's content Raises: DepositError if the md5sum does not match """ if md5sum: _md5sum = _compute_md5(filehandler) if _md5sum != md5sum: raise DepositError( CHECKSUM_MISMATCH, "Wrong md5 hash", f"The checksum sent {hashutil.hash_to_hex(md5sum)} and the actual " f"checksum {hashutil.hash_to_hex(_md5sum)} does not match.", ) def _binary_upload( self, request: Request, headers: ParsedRequestHeaders, collection_name: str, deposit: Deposit, replace_metadata: bool = False, replace_archives: bool = False, ) -> Receipt: """Binary upload routine. Other than such a request, a 415 response is returned. Args: request: the request holding information to parse and inject in db headers: parsed request headers collection_name: the associated client deposit: deposit to be updated replace_metadata: 'Update or add' request to existing deposit. If False (default), this adds new metadata request to existing ones. Otherwise, this will replace existing metadata. replace_archives: 'Update or add' request to existing deposit. If False (default), this adds new archive request to existing ones. Otherwise, this will replace existing archives. ones. Raises: - 400 (bad request) if the request is not providing an external identifier - 413 (request entity too large) if the length of the archive exceeds the max size configured - 412 (precondition failed) if the length or md5 hash provided mismatch the reality of the archive - 415 (unsupported media type) if a wrong media type is provided """ content_length = headers.content_length if not content_length: raise DepositError( BAD_REQUEST, "CONTENT_LENGTH header is mandatory", "For archive deposit, the CONTENT_LENGTH header must be sent.", ) content_disposition = headers.content_disposition if not content_disposition: raise DepositError( BAD_REQUEST, "CONTENT_DISPOSITION header is mandatory", "For archive deposit, the CONTENT_DISPOSITION header must be sent.", ) packaging = headers.packaging if packaging and packaging not in ACCEPT_PACKAGINGS: raise DepositError( BAD_REQUEST, f"Only packaging {ACCEPT_PACKAGINGS} is supported", f"The packaging provided {packaging} is not supported", ) filehandler = request.FILES["file"] assert isinstance(filehandler, UploadedFile), filehandler self._check_file_length(filehandler, content_length) self._check_file_md5sum(filehandler, headers.content_md5sum) # actual storage of data archive_metadata = filehandler self._deposit_put( deposit=deposit, in_progress=headers.in_progress, ) self._deposit_request_put( deposit, {ARCHIVE_KEY: archive_metadata}, replace_metadata=replace_metadata, replace_archives=replace_archives, ) return Receipt( deposit_id=deposit.id, deposit_date=deposit.reception_date, status=deposit.status, archive=filehandler.name, ) def _read_metadata(self, metadata_stream) -> Tuple[bytes, Dict[str, Any]]: """Given a metadata stream, reads the metadata and returns both the parsed and the raw metadata. """ raw_metadata = metadata_stream.read() metadata = parse_xml(raw_metadata) return raw_metadata, metadata def _multipart_upload( self, request: Request, headers: ParsedRequestHeaders, collection_name: str, deposit: Deposit, replace_metadata: bool = False, replace_archives: bool = False, ) -> Receipt: """Multipart upload supported with exactly: - 1 archive (zip) - 1 atom entry Other than such a request, a 415 response is returned. Args: request: the request holding information to parse and inject in db headers: parsed request headers collection_name: the associated client deposit: deposit to be updated replace_metadata: 'Update or add' request to existing deposit. If False (default), this adds new metadata request to existing ones. Otherwise, this will replace existing metadata. replace_archives: 'Update or add' request to existing deposit. If False (default), this adds new archive request to existing ones. Otherwise, this will replace existing archives. ones. Raises: - 400 (bad request) if the request is not providing an external identifier - 412 (precondition failed) if the potentially md5 hash provided mismatch the reality of the archive - 413 (request entity too large) if the length of the archive exceeds the max size configured - 415 (unsupported media type) if a wrong media type is provided """ content_types_present = set() data: Dict[str, Optional[Any]] = { "application/zip": None, # expected either zip "application/x-tar": None, # or x-tar "application/atom+xml": None, } for key, value in request.FILES.items(): fh = value content_type = fh.content_type if content_type in content_types_present: raise DepositError( ERROR_CONTENT, "Only 1 application/zip (or application/x-tar) archive " "and 1 atom+xml entry is supported (as per sword2.0 " "specification)", "You provided more than 1 application/(zip|x-tar) " "or more than 1 application/atom+xml content-disposition " "header in the multipart deposit", ) content_types_present.add(content_type) assert content_type is not None data[content_type] = fh if len(content_types_present) != 2: raise DepositError( ERROR_CONTENT, "You must provide both 1 application/zip (or " "application/x-tar) and 1 atom+xml entry for multipart " "deposit", "You need to provide only 1 application/(zip|x-tar) " "and 1 application/atom+xml content-disposition header " "in the multipart deposit", ) filehandler = data["application/zip"] if not filehandler: filehandler = data["application/x-tar"] assert isinstance(filehandler, UploadedFile), filehandler self._check_file_length(filehandler) self._check_file_md5sum(filehandler, headers.content_md5sum) try: raw_metadata, metadata = self._read_metadata(data["application/atom+xml"]) except ParserError: raise DepositError( PARSING_ERROR, "Malformed xml metadata", "The xml received is malformed. " "Please ensure your metadata file is correctly formatted.", ) self._set_deposit_origin_from_metadata(deposit, metadata, headers) # actual storage of data self._deposit_put( deposit=deposit, in_progress=headers.in_progress, ) deposit_request_data = { ARCHIVE_KEY: filehandler, METADATA_KEY: metadata, RAW_METADATA_KEY: raw_metadata, } self._deposit_request_put( deposit, deposit_request_data, replace_metadata, replace_archives ) assert filehandler is not None return Receipt( deposit_id=deposit.id, deposit_date=deposit.reception_date, archive=filehandler.name, status=deposit.status, ) def _store_metadata_deposit( self, deposit: Deposit, swhid_reference: Union[str, QualifiedSWHID], metadata: Dict, raw_metadata: bytes, deposit_origin: Optional[str] = None, ) -> Tuple[ExtendedSWHID, Deposit, DepositRequest]: """When all user inputs pass the checks, this associates the raw_metadata to the swhid_reference in the raw extrinsic metadata storage. In case of any issues, a bad request response is returned to the user with the details. Checks: - metadata are technically parsable - metadata pass the functional checks - SWHID (if any) is technically valid Args: deposit: Deposit reference swhid_reference: The swhid or the origin to attach metadata information to metadata: Full dict of metadata to check for validity (parsed out of raw_metadata) raw_metadata: The actual raw metadata to send in the storage metadata deposit_origin: Optional deposit origin url to use if any (e.g. deposit update scenario provides one) Raises: DepositError in case of incorrect inputs from the deposit client (e.g. functionally invalid metadata, ...) Returns: Tuple of target swhid, deposit, and deposit request """ metadata_ok, error_details = check_metadata(metadata) if not metadata_ok: assert error_details, "Details should be set when a failure occurs" raise DepositError( BAD_REQUEST, "Functional metadata checks failure", convert_status_detail(error_details), ) metadata_authority = MetadataAuthority( type=MetadataAuthorityType.DEPOSIT_CLIENT, url=deposit.client.provider_url, ) metadata_fetcher = self.swh_deposit_fetcher() # replace metadata within the deposit backend deposit_request_data = { METADATA_KEY: metadata, RAW_METADATA_KEY: raw_metadata, } # actually add the metadata to the completed deposit deposit_request = self._deposit_request_put(deposit, deposit_request_data) target_swhid: ExtendedSWHID # origin URL or CoreSWHID if isinstance(swhid_reference, str): target_swhid = Origin(swhid_reference).swhid() metadata_context = {} else: metadata_context = compute_metadata_context(swhid_reference) if deposit_origin: # metadata deposit update on completed deposit metadata_context["origin"] = deposit_origin target_swhid = extended_swhid_from_qualified(swhid_reference) self._check_swhid_in_archive(target_swhid) # metadata deposited by the client metadata_object = RawExtrinsicMetadata( target=target_swhid, # core swhid or origin discovery_date=deposit_request.date, authority=metadata_authority, fetcher=metadata_fetcher, format="sword-v2-atom-codemeta", metadata=raw_metadata, **metadata_context, ) # metadata on the metadata object swh_deposit_authority = self.swh_deposit_authority() swh_deposit_fetcher = self.swh_deposit_fetcher() metametadata_object = RawExtrinsicMetadata( target=metadata_object.swhid(), discovery_date=deposit_request.date, authority=swh_deposit_authority, fetcher=swh_deposit_fetcher, format="xml-deposit-info", metadata=render_to_string( "deposit/deposit_info.xml", context={"deposit": deposit} ).encode(), ) # write to metadata storage self.storage_metadata.metadata_authority_add( [metadata_authority, swh_deposit_authority] ) self.storage_metadata.metadata_fetcher_add( [metadata_fetcher, swh_deposit_fetcher] ) self.storage_metadata.raw_extrinsic_metadata_add( [metadata_object, metametadata_object] ) return (target_swhid, deposit, deposit_request) def _check_swhid_in_archive(self, target_swhid: ExtendedSWHID) -> None: """Check the target object already exists in the archive, and raises a BAD_REQUEST if it does not.""" if target_swhid.object_type in (ExtendedObjectType.CONTENT,): if list( self.storage.content_missing_per_sha1_git([target_swhid.object_id]) ): raise DepositError( BAD_REQUEST, f"Cannot load metadata on {target_swhid}, this content " f"object does not exist in the archive (yet?).", ) elif target_swhid.object_type in ( ExtendedObjectType.DIRECTORY, ExtendedObjectType.REVISION, ExtendedObjectType.RELEASE, ExtendedObjectType.SNAPSHOT, ): target_type_name = target_swhid.object_type.name.lower() method = getattr(self.storage, target_type_name + "_missing") if list(method([target_swhid.object_id])): raise DepositError( BAD_REQUEST, f"Cannot load metadata on {target_swhid}, this {target_type_name} " f"object does not exist in the archive (yet?).", ) elif target_swhid.object_type in (ExtendedObjectType.ORIGIN,): if None in list(self.storage.origin_get_by_sha1([target_swhid.object_id])): raise DepositError( BAD_REQUEST, "Cannot load metadata on origin, it is not (yet?) known to the " "archive.", ) else: # This should not happen, because target_swhid is generated from either # a core swhid or an origin URL. # Let's just check it again so the "switch" is exhaustive. raise ValueError( f"_check_swhid_in_archive expected core SWHID or origin SWHID, " f"but got {target_swhid}." ) def _atom_entry( self, request: Request, headers: ParsedRequestHeaders, collection_name: str, deposit: Deposit, replace_metadata: bool = False, replace_archives: bool = False, ) -> Receipt: """Atom entry deposit. Args: request: the request holding information to parse and inject in db headers: parsed request headers collection_name: the associated client deposit: deposit to be updated replace_metadata: 'Update or add' request to existing deposit. If False (default), this adds new metadata request to existing ones. Otherwise, this will replace existing metadata. replace_archives: 'Update or add' request to existing deposit. If False (default), this adds new archive request to existing ones. Otherwise, this will replace existing archives. ones. Raises: - 400 (bad request) if the request is not providing an external identifier - 400 (bad request) if the request's body is empty - 415 (unsupported media type) if a wrong media type is provided """ try: raw_metadata, metadata = self._read_metadata(request.data) except ParserError: raise DepositError( BAD_REQUEST, "Malformed xml metadata", "The xml received is malformed. " "Please ensure your metadata file is correctly formatted.", ) if metadata is None: raise DepositError( BAD_REQUEST, "Empty body request is not supported", "Atom entry deposit is supposed to send for metadata. " "If the body is empty, there is no metadata.", ) self._set_deposit_origin_from_metadata(deposit, metadata, headers) # Determine if we are in the metadata-only deposit case try: swhid_ref = parse_swh_reference(metadata) except ValidationError as e: raise DepositError( PARSING_ERROR, "Invalid SWHID reference", str(e), ) if swhid_ref is not None and ( deposit.origin_url or deposit.parent or deposit.external_id ): raise DepositError( BAD_REQUEST, " is for metadata-only deposits and " " / / Slug are for " "code deposits, only one may be used on a given deposit.", ) if swhid_ref is not None: deposit.save() # We need a deposit id target_swhid, depo, depo_request = self._store_metadata_deposit( deposit, swhid_ref, metadata, raw_metadata ) deposit.status = DEPOSIT_STATUS_LOAD_SUCCESS if isinstance(swhid_ref, QualifiedSWHID): deposit.swhid = str(extended_swhid_from_qualified(swhid_ref)) deposit.swhid_context = str(swhid_ref) deposit.complete_date = depo_request.date deposit.reception_date = depo_request.date deposit.save() return Receipt( deposit_id=deposit.id, deposit_date=depo_request.date, status=deposit.status, archive=None, ) self._deposit_put( deposit=deposit, in_progress=headers.in_progress, ) self._deposit_request_put( deposit, {METADATA_KEY: metadata, RAW_METADATA_KEY: raw_metadata}, replace_metadata, replace_archives, ) return Receipt( deposit_id=deposit.id, deposit_date=deposit.reception_date, status=deposit.status, archive=None, ) def _set_deposit_origin_from_metadata(self, deposit, metadata, headers): create_origin = metadata.get("swh:deposit", {}).get("swh:create_origin") add_to_origin = metadata.get("swh:deposit", {}).get("swh:add_to_origin") if create_origin and add_to_origin: raise DepositError( BAD_REQUEST, " and are mutually exclusive, " "as they respectively create a new origin and add to an existing " "origin.", ) if create_origin: origin_url = create_origin["swh:origin"]["@url"] check_client_origin(deposit.client, origin_url) deposit.origin_url = origin_url if add_to_origin: origin_url = add_to_origin["swh:origin"]["@url"] check_client_origin(deposit.client, origin_url) deposit.parent = ( Deposit.objects.filter( client=deposit.client, origin_url=origin_url, status=DEPOSIT_STATUS_LOAD_SUCCESS, ) .order_by("-id")[0:1] .get() ) deposit.origin_url = origin_url if "atom:external_identifier" in metadata: # Deprecated tag. # When clients stopped using it, this should raise an error # unconditionally if deposit.origin_url: raise DepositError( BAD_REQUEST, " is deprecated, you should only use " " and from now on.", ) if headers.slug and metadata["atom:external_identifier"] != headers.slug: raise DepositError( BAD_REQUEST, "The tag and Slug header are deprecated, " " or " "should be used instead.", ) def _empty_post( self, request: Request, headers: ParsedRequestHeaders, collection_name: str, deposit: Deposit, ) -> Receipt: """Empty post to finalize a deposit. Args: request: the request holding information to parse and inject in db headers: parsed request headers collection_name: the associated client deposit: deposit to be finalized """ self._complete_deposit(deposit) assert deposit.complete_date is not None return Receipt( deposit_id=deposit.id, deposit_date=deposit.complete_date, status=deposit.status, archive=None, ) def additional_checks( self, request: Request, headers: ParsedRequestHeaders, collection_name: str, deposit: Optional[Deposit], ) -> Dict[str, Any]: """Permit the child class to enrich additional checks. Returns: dict with 'error' detailing the problem. """ return {} def get_client(self, request) -> DepositClient: # This class depends on AuthenticatedAPIView, so request.user.username # is always set username = request.user.username assert username is not None if self._client is None: try: self._client = DepositClient.objects.get( # type: ignore username=username ) except DepositClient.DoesNotExist: raise DepositError(NOT_FOUND, f"Unknown client name {username}") assert self._client.username == username return self._client def checks( self, request: Request, collection_name: str, deposit: Optional[Deposit] = None ) -> ParsedRequestHeaders: if deposit is None: collection = get_collection_by_name(collection_name) else: assert collection_name == deposit.collection.name collection = deposit.collection client = self.get_client(request) collection_id = collection.id collections = client.collections assert collections is not None if collection_id not in collections: raise DepositError( FORBIDDEN, f"Client {client.username} cannot access collection {collection_name}", ) headers = self._read_headers(request) if deposit is not None: self.restrict_access(request, headers, deposit) if headers.on_behalf_of: raise DepositError(MEDIATION_NOT_ALLOWED, "Mediation is not supported.") self.additional_checks(request, headers, collection_name, deposit) return headers def restrict_access( self, request: Request, headers: ParsedRequestHeaders, deposit: Deposit ) -> None: """Allow modifications on deposit with status 'partial' only, reject the rest. """ if request.method != "GET" and deposit.status != DEPOSIT_STATUS_PARTIAL: summary = "You can only act on deposit with status '%s'" % ( DEPOSIT_STATUS_PARTIAL, ) description = f"This deposit has status '{deposit.status}'" raise DepositError( BAD_REQUEST, summary=summary, verbose_description=description ) def _basic_not_allowed_method(self, request: Request, method: str): raise DepositError( METHOD_NOT_ALLOWED, f"{method} method is not supported on this endpoint", ) def get( self, request: Request, *args, **kwargs ) -> Union[HttpResponse, FileResponse]: return self._basic_not_allowed_method(request, "GET") def post(self, request: Request, *args, **kwargs) -> HttpResponse: return self._basic_not_allowed_method(request, "POST") def put(self, request: Request, *args, **kwargs) -> HttpResponse: return self._basic_not_allowed_method(request, "PUT") def delete(self, request: Request, *args, **kwargs) -> HttpResponse: return self._basic_not_allowed_method(request, "DELETE") class APIGet(APIBase, metaclass=ABCMeta): """Mixin for class to support GET method. """ def get( # type: ignore self, request: Request, collection_name: str, deposit_id: int ) -> Union[HttpResponse, FileResponse]: """Endpoint to create/add resources to deposit. Returns: 200 response when no error during routine occurred 400 if the deposit does not belong to the collection 404 if the deposit or the collection does not exist """ deposit = get_deposit_by_id(deposit_id, collection_name) self.checks(request, collection_name, deposit) r = self.process_get(request, collection_name, deposit) status, content, content_type = r if content_type == "swh/generator": with content as path: return FileResponse( open(path, "rb"), status=status, content_type="application/tar" ) if content_type == "application/json": return HttpResponse( json.dumps(content), status=status, content_type=content_type ) return HttpResponse(content, status=status, content_type=content_type) @abstractmethod def process_get( self, request: Request, collection_name: str, deposit: Deposit ) -> Tuple[int, Any, str]: """Routine to deal with the deposit's get processing. Returns: Tuple status, stream of content, content-type """ pass class APIPost(APIBase, metaclass=ABCMeta): """Mixin for class to support POST method. """ def post( # type: ignore self, request: Request, collection_name: str, deposit_id: Optional[int] = None ) -> HttpResponse: """Endpoint to create/add resources to deposit. Returns: 204 response when no error during routine occurred. 400 if the deposit does not belong to the collection 404 if the deposit or the collection does not exist """ if deposit_id is None: deposit = None else: deposit = get_deposit_by_id(deposit_id, collection_name) headers = self.checks(request, collection_name, deposit) status, iri_key, receipt = self.process_post( request, headers, collection_name, deposit ) return self._make_deposit_receipt( request, collection_name, status, iri_key, receipt, ) def _make_deposit_receipt( self, request, collection_name: str, status: int, iri_key: str, receipt: Receipt, ) -> HttpResponse: """Returns an HttpResponse with a SWORD Deposit receipt as content.""" # Build the IRIs in the receipt args = [collection_name, receipt.deposit_id] iris = { iri: request.build_absolute_uri(reverse(iri, args=args)) for iri in [EM_IRI, EDIT_IRI, CONT_FILE_IRI, SE_IRI, STATE_IRI] } context = { **attr.asdict(receipt), **iris, "packagings": ACCEPT_PACKAGINGS, } response = render( request, "deposit/deposit_receipt.xml", context=context, content_type="application/xml", status=status, ) - response._headers["location"] = "Location", iris[iri_key] # type: ignore + response["Location"] = iris[iri_key] return response @abstractmethod def process_post( self, request, headers: ParsedRequestHeaders, collection_name: str, deposit: Optional[Deposit] = None, ) -> Tuple[int, str, Receipt]: """Routine to deal with the deposit's processing. Returns Tuple of: - response status code (200, 201, etc...) - key iri (EM_IRI, EDIT_IRI, etc...) - Receipt """ pass class APIPut(APIBase, metaclass=ABCMeta): """Mixin for class to support PUT method. """ def put( # type: ignore self, request: Request, collection_name: str, deposit_id: int ) -> HttpResponse: """Endpoint to update deposit resources. Returns: 204 response when no error during routine occurred. 400 if the deposit does not belong to the collection 404 if the deposit or the collection does not exist """ if deposit_id is None: deposit = None else: deposit = get_deposit_by_id(deposit_id, collection_name) headers = self.checks(request, collection_name, deposit) self.process_put(request, headers, collection_name, deposit) return HttpResponse(status=status.HTTP_204_NO_CONTENT) @abstractmethod def process_put( self, request: Request, headers: ParsedRequestHeaders, collection_name: str, deposit: Deposit, ) -> None: """Routine to deal with updating a deposit in some way. Returns dictionary of the processing result """ pass class APIDelete(APIBase, metaclass=ABCMeta): """Mixin for class to support DELETE method. """ def delete( # type: ignore self, request: Request, collection_name: str, deposit_id: Optional[int] = None ) -> HttpResponse: """Endpoint to delete some deposit's resources (archives, deposit). Returns: 204 response when no error during routine occurred. 400 if the deposit does not belong to the collection 404 if the deposit or the collection does not exist """ assert deposit_id is not None deposit = get_deposit_by_id(deposit_id, collection_name) self.checks(request, collection_name, deposit) self.process_delete(request, collection_name, deposit) return HttpResponse(status=status.HTTP_204_NO_CONTENT) @abstractmethod def process_delete( self, request: Request, collection_name: str, deposit: Deposit ) -> None: """Routine to delete a resource. This is mostly not allowed except for the EM_IRI (cf. .api.deposit_update.APIUpdateArchive) """ pass diff --git a/swh/deposit/tests/api/test_collection_list.py b/swh/deposit/tests/api/test_collection_list.py index 248216ae..42d25539 100644 --- a/swh/deposit/tests/api/test_collection_list.py +++ b/swh/deposit/tests/api/test_collection_list.py @@ -1,113 +1,113 @@ # Copyright (C) 2017-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from io import BytesIO from django.urls import reverse_lazy as reverse from requests.utils import parse_header_links from rest_framework import status from swh.deposit.config import COL_IRI, DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_PARTIAL from swh.deposit.models import DepositCollection from swh.deposit.parsers import parse_xml def test_deposit_collection_list_is_auth_protected(anonymous_client): """Deposit list should require authentication """ url = reverse(COL_IRI, args=("test",)) response = anonymous_client.get(url) assert response.status_code == status.HTTP_401_UNAUTHORIZED assert b"protected by basic authentication" in response.content def test_deposit_collection_list_collection_access_restricted_to_user_coll( deposit_another_collection, deposit_user, authenticated_client ): """Deposit list api should restrict access to user's collection """ collection_id = authenticated_client.deposit_client.collections[0] coll = DepositCollection.objects.get(pk=collection_id) # authenticated_client has access to the "coll" collection coll2 = deposit_another_collection assert coll.name != coll2.name # but does not have access to that coll2 collection url = reverse(COL_IRI, args=(coll2.name,)) response = authenticated_client.get(url) # so it gets rejected access to the listing of that coll2 collection assert response.status_code == status.HTTP_403_FORBIDDEN msg = f"{deposit_user.username} cannot access collection {coll2.name}" assert msg in response.content.decode("utf-8") def test_deposit_collection_list_nominal( partial_deposit, deposited_deposit, authenticated_client ): """Deposit list api should return the user deposits in a paginated way """ client_id = authenticated_client.deposit_client.id assert partial_deposit.client.id == client_id assert deposited_deposit.client.id == client_id # Both deposit were deposited by the authenticated client # so requesting the listing of the deposits, both should be listed deposit_id = str(partial_deposit.id) deposit_id2 = str(deposited_deposit.id) coll = partial_deposit.collection # requesting the listing of the deposit for the user's collection url = reverse(COL_IRI, args=(coll.name,)) response = authenticated_client.get(f"{url}?page_size=1") assert response.status_code == status.HTTP_200_OK data = parse_xml(BytesIO(response.content))["atom:feed"] assert ( data["swh:count"] == "2" ) # total result of 2 deposits if consuming all results - header_link = parse_header_links(response._headers["Link"]) + header_link = parse_header_links(response["Link"]) assert len(header_link) == 1 # only 1 next link expected_next = f"{url}?page=2&page_size=1" assert header_link[0]["url"].endswith(expected_next) assert header_link[0]["rel"] == "next" # only one deposit in the response deposit = data["atom:entry"] # dict as only 1 value (a-la js) assert isinstance(deposit, dict) assert deposit["swh:id"] == deposit_id assert deposit["swh:status"] == DEPOSIT_STATUS_PARTIAL # then 2nd page response2 = authenticated_client.get(expected_next) assert response2.status_code == status.HTTP_200_OK data2 = parse_xml(BytesIO(response2.content))["atom:feed"] assert data2["swh:count"] == "2" # still total of 2 deposits across all results expected_previous = f"{url}?page_size=1" - header_link2 = parse_header_links(response2._headers["Link"]) + header_link2 = parse_header_links(response2["Link"]) assert len(header_link2) == 1 # only 1 previous link assert header_link2[0]["url"].endswith(expected_previous) assert header_link2[0]["rel"] == "previous" # only 1 deposit in the response deposit2 = data2["atom:entry"] # dict as only 1 value (a-la js) assert isinstance(deposit2, dict) assert deposit2["swh:id"] == deposit_id2 assert deposit2["swh:status"] == DEPOSIT_STATUS_DEPOSITED # Retrieve every deposit in one query (no page_size parameter) response3 = authenticated_client.get(url) assert response3.status_code == status.HTTP_200_OK data3 = parse_xml(BytesIO(response3.content))["atom:feed"] assert data3["swh:count"] == "2" # total result of 2 deposits across all results deposits3 = data3["atom:entry"] # list here assert isinstance(deposits3, list) assert len(deposits3) == 2 - header_link3 = parse_header_links(response3._headers["Link"]) + header_link3 = parse_header_links(response3["Link"]) assert header_link3 == [] # no pagination as all results received in one round assert deposit in deposits3 assert deposit2 in deposits3 diff --git a/swh/deposit/tests/api/test_collection_post_binary.py b/swh/deposit/tests/api/test_collection_post_binary.py index b3e8cacb..6c1e7638 100644 --- a/swh/deposit/tests/api/test_collection_post_binary.py +++ b/swh/deposit/tests/api/test_collection_post_binary.py @@ -1,339 +1,336 @@ # Copyright (C) 2017-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """Tests the handling of the binary content when doing a POST Col-IRI.""" from io import BytesIO import uuid from django.urls import reverse_lazy as reverse import pytest from rest_framework import status from swh.deposit.config import COL_IRI, DEPOSIT_STATUS_DEPOSITED from swh.deposit.models import Deposit, DepositRequest from swh.deposit.parsers import parse_xml from swh.deposit.tests.common import ( check_archive, create_arborescence_archive, post_archive, ) def test_post_deposit_binary_no_slug( authenticated_client, deposit_collection, sample_archive, deposit_user, mocker ): """Posting a binary deposit without slug header should generate one """ id_ = str(uuid.uuid4()) mocker.patch("uuid.uuid4", return_value=id_) url = reverse(COL_IRI, args=[deposit_collection.name]) # when response = post_archive( authenticated_client, url, sample_archive, in_progress="false", ) assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) deposit_id = response_content["swh:deposit_id"] deposit = Deposit.objects.get(pk=deposit_id) assert deposit.collection == deposit_collection assert deposit.origin_url == deposit_user.provider_url + id_ assert deposit.status == DEPOSIT_STATUS_DEPOSITED def test_post_deposit_binary_support( authenticated_client, deposit_collection, sample_archive ): """Binary upload with content-type not in [zip,x-tar] should return 415 """ # given url = reverse(COL_IRI, args=[deposit_collection.name]) external_id = "some-external-id-1" # when response = authenticated_client.post( url, sample_archive, HTTP_SLUG=external_id, content_type="application/octet-stream", HTTP_IN_PROGRESS="false", ) # then assert response.status_code == status.HTTP_415_UNSUPPORTED_MEDIA_TYPE with pytest.raises(Deposit.DoesNotExist): Deposit.objects.get(external_id=external_id) def test_post_deposit_binary_upload_ok( authenticated_client, deposit_collection, sample_archive ): """Binary upload with correct headers should return 201 with receipt """ # given url = reverse(COL_IRI, args=[deposit_collection.name]) external_id = "some-external-id-1" # when response = post_archive( authenticated_client, url, sample_archive, HTTP_SLUG=external_id, HTTP_IN_PROGRESS="false", ) # then response_content = parse_xml(BytesIO(response.content)) assert response.status_code == status.HTTP_201_CREATED deposit_id = response_content["swh:deposit_id"] deposit = Deposit.objects.get(pk=deposit_id) assert deposit.status == DEPOSIT_STATUS_DEPOSITED assert deposit.external_id == external_id assert deposit.collection == deposit_collection assert deposit.swhid is None deposit_request = DepositRequest.objects.get(deposit=deposit) check_archive(sample_archive["name"], deposit_request.archive.name) assert deposit_request.metadata is None assert deposit_request.raw_metadata is None response_content = parse_xml(BytesIO(response.content)) assert response_content["swh:deposit_archive"] == sample_archive["name"] assert int(response_content["swh:deposit_id"]) == deposit.id assert response_content["swh:deposit_status"] == deposit.status # deprecated tags assert response_content["atom:deposit_archive"] == sample_archive["name"] assert int(response_content["atom:deposit_id"]) == deposit.id assert response_content["atom:deposit_status"] == deposit.status from django.urls import reverse as reverse_strict edit_iri = reverse_strict("edit_iri", args=[deposit_collection.name, deposit.id]) - assert response._headers["location"] == ( - "Location", - "http://testserver" + edit_iri, - ) + assert response["location"] == f"http://testserver{edit_iri}" def test_post_deposit_binary_failure_unsupported_packaging_header( authenticated_client, deposit_collection, sample_archive ): """Bin deposit without supported content_disposition header returns 400 """ # given url = reverse(COL_IRI, args=[deposit_collection.name]) external_id = "some-external-id" # when response = post_archive( authenticated_client, url, sample_archive, HTTP_SLUG=external_id, HTTP_PACKAGING="something-unsupported", ) # then assert response.status_code == status.HTTP_400_BAD_REQUEST assert ( b"The packaging provided something-unsupported is not supported" in response.content ) with pytest.raises(Deposit.DoesNotExist): Deposit.objects.get(external_id=external_id) def test_post_deposit_binary_upload_no_content_disposition_header( authenticated_client, deposit_collection, sample_archive ): """Binary upload without content_disposition header should return 400 """ # given url = reverse(COL_IRI, args=[deposit_collection.name]) external_id = "some-external-id" # when response = post_archive( authenticated_client, url, sample_archive, HTTP_SLUG=external_id, HTTP_IN_PROGRESS="false", HTTP_CONTENT_DISPOSITION=None, ) # then assert response.status_code == status.HTTP_400_BAD_REQUEST assert b"CONTENT_DISPOSITION header is mandatory" in response.content with pytest.raises(Deposit.DoesNotExist): Deposit.objects.get(external_id=external_id) def test_post_deposit_mediation_not_supported( authenticated_client, deposit_collection, sample_archive ): """Binary upload with mediation should return a 412 response """ # given url = reverse(COL_IRI, args=[deposit_collection.name]) external_id = "some-external-id-1" # when response = post_archive( authenticated_client, url, sample_archive, HTTP_SLUG=external_id, HTTP_IN_PROGRESS="false", HTTP_ON_BEHALF_OF="someone", ) # then assert response.status_code == status.HTTP_412_PRECONDITION_FAILED with pytest.raises(Deposit.DoesNotExist): Deposit.objects.get(external_id=external_id) def test_post_deposit_binary_upload_fail_if_upload_size_limit_exceeded( authenticated_client, deposit_collection, sample_archive, tmp_path ): """Binary upload must not exceed the limit set up... """ tmp_path = str(tmp_path) url = reverse(COL_IRI, args=[deposit_collection.name]) archive = create_arborescence_archive( tmp_path, "archive2", "file2", b"some content in file", up_to_size=500 ) external_id = "some-external-id" # when response = post_archive( authenticated_client, url, archive, HTTP_SLUG=external_id, HTTP_IN_PROGRESS="false", ) # then assert response.status_code == status.HTTP_413_REQUEST_ENTITY_TOO_LARGE assert b"Upload size limit exceeded" in response.content with pytest.raises(Deposit.DoesNotExist): Deposit.objects.get(external_id=external_id) def test_post_deposit_binary_upload_fail_if_content_length_missing( authenticated_client, deposit_collection, sample_archive, tmp_path ): """The Content-Length header is mandatory """ tmp_path = str(tmp_path) url = reverse(COL_IRI, args=[deposit_collection.name]) archive = create_arborescence_archive( tmp_path, "archive2", "file2", b"some content in file", up_to_size=500 ) external_id = "some-external-id" # when response = post_archive( authenticated_client, url, archive, CONTENT_LENGTH=None, HTTP_SLUG=external_id, HTTP_IN_PROGRESS="false", ) # then assert response.status_code == status.HTTP_400_BAD_REQUEST assert b"the CONTENT_LENGTH header must be sent." in response.content with pytest.raises(Deposit.DoesNotExist): Deposit.objects.get(external_id=external_id) def test_post_deposit_2_post_2_different_deposits( authenticated_client, deposit_collection, sample_archive ): """2 posting deposits should return 2 different 201 with receipt """ url = reverse(COL_IRI, args=[deposit_collection.name]) # when response = post_archive( authenticated_client, url, sample_archive, HTTP_SLUG="some-external-id-1", HTTP_IN_PROGRESS="false", ) # then assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) deposit_id = response_content["swh:deposit_id"] deposit = Deposit.objects.get(pk=deposit_id) deposits = Deposit.objects.all() assert len(deposits) == 1 assert deposits[0] == deposit # second post response = post_archive( authenticated_client, url, sample_archive, content_type="application/x-tar", HTTP_SLUG="another-external-id", HTTP_IN_PROGRESS="false", ) assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) deposit_id2 = response_content["swh:deposit_id"] deposit2 = Deposit.objects.get(pk=deposit_id2) assert deposit != deposit2 deposits = Deposit.objects.all().order_by("id") assert len(deposits) == 2 assert list(deposits), [deposit == deposit2] diff --git a/swh/deposit/tests/api/test_collection_post_multipart.py b/swh/deposit/tests/api/test_collection_post_multipart.py index 18e6dd61..5dabe829 100644 --- a/swh/deposit/tests/api/test_collection_post_multipart.py +++ b/swh/deposit/tests/api/test_collection_post_multipart.py @@ -1,386 +1,386 @@ # Copyright (C) 2017-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """Tests handling of multipart requests to POST Col-IRI.""" from io import BytesIO import uuid from django.core.files.uploadedfile import InMemoryUploadedFile from django.urls import reverse_lazy as reverse import pytest from rest_framework import status from swh.deposit.config import COL_IRI, DEPOSIT_STATUS_DEPOSITED from swh.deposit.models import Deposit, DepositRequest from swh.deposit.parsers import parse_xml from swh.deposit.tests.common import check_archive, post_multipart def test_post_deposit_multipart( authenticated_client, deposit_collection, atom_dataset, mocker, deposit_user, sample_archive, ): # given external_id = "foobar" origin_url = deposit_user.provider_url + external_id url = reverse(COL_IRI, args=[deposit_collection.name]) data_atom_entry = atom_dataset["entry-data0"] % origin_url # when response = post_multipart( authenticated_client, url, sample_archive, data_atom_entry, HTTP_IN_PROGRESS="false", ) print(response.content.decode()) assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) deposit_id = response_content["swh:deposit_id"] deposit = Deposit.objects.get(pk=deposit_id) assert deposit.collection == deposit_collection assert deposit.origin_url == origin_url assert deposit.status == DEPOSIT_STATUS_DEPOSITED def test_post_deposit_multipart_without_origin_url( authenticated_client, deposit_collection, atom_dataset, mocker, deposit_user, sample_archive, ): # given url = reverse(COL_IRI, args=[deposit_collection.name]) data_atom_entry = atom_dataset["entry-data-deposit-binary"] id_ = str(uuid.uuid4()) mocker.patch("uuid.uuid4", return_value=id_) # when response = post_multipart( authenticated_client, url, sample_archive, data_atom_entry, HTTP_IN_PROGRESS="false", ) assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) deposit_id = response_content["swh:deposit_id"] deposit = Deposit.objects.get(pk=deposit_id) assert deposit.collection == deposit_collection assert deposit.origin_url == deposit_user.provider_url + id_ assert deposit.status == DEPOSIT_STATUS_DEPOSITED def test_post_deposit_multipart_zip( authenticated_client, deposit_collection, atom_dataset, sample_archive ): """one multipart deposit (zip+xml) should be accepted """ # given url = reverse(COL_IRI, args=[deposit_collection.name]) data_atom_entry = atom_dataset["entry-data-deposit-binary"] external_id = "external-id" # when response = post_multipart( authenticated_client, url, sample_archive, data_atom_entry, HTTP_IN_PROGRESS="false", HTTP_SLUG=external_id, ) # then assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) deposit_id = response_content["swh:deposit_id"] deposit = Deposit.objects.get(pk=deposit_id) assert deposit.status == DEPOSIT_STATUS_DEPOSITED assert deposit.external_id == external_id assert deposit.collection == deposit_collection assert deposit.swhid is None deposit_requests = DepositRequest.objects.filter(deposit=deposit) assert len(deposit_requests) == 2 for deposit_request in deposit_requests: assert deposit_request.deposit == deposit if deposit_request.type == "archive": check_archive(sample_archive["name"], deposit_request.archive.name) assert deposit_request.metadata is None assert deposit_request.raw_metadata is None else: assert ( deposit_request.metadata["atom:id"] == "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a" ) assert deposit_request.raw_metadata == data_atom_entry def test_post_deposit_multipart_tar( authenticated_client, deposit_collection, atom_dataset, sample_archive ): """one multipart deposit (tar+xml) should be accepted """ # given url = reverse(COL_IRI, args=[deposit_collection.name]) data_atom_entry = atom_dataset["entry-data-deposit-binary"] external_id = "external-id" # when response = post_multipart( authenticated_client, url, sample_archive, data_atom_entry, HTTP_IN_PROGRESS="false", HTTP_SLUG=external_id, ) # then assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) deposit_id = response_content["swh:deposit_id"] deposit = Deposit.objects.get(pk=deposit_id) assert deposit.status == DEPOSIT_STATUS_DEPOSITED assert deposit.external_id == external_id assert deposit.collection == deposit_collection assert deposit.swhid is None deposit_requests = DepositRequest.objects.filter(deposit=deposit) assert len(deposit_requests) == 2 for deposit_request in deposit_requests: assert deposit_request.deposit == deposit if deposit_request.type == "archive": check_archive(sample_archive["name"], deposit_request.archive.name) assert deposit_request.metadata is None assert deposit_request.raw_metadata is None else: assert ( deposit_request.metadata["atom:id"] == "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a" ) assert deposit_request.raw_metadata == data_atom_entry def test_post_deposit_multipart_put_to_replace_metadata( authenticated_client, deposit_collection, atom_dataset, sample_archive ): """One multipart deposit followed by a metadata update should be accepted """ # given url = reverse(COL_IRI, args=[deposit_collection.name]) data_atom_entry = atom_dataset["entry-data-deposit-binary"] external_id = "external-id" # when response = post_multipart( authenticated_client, url, sample_archive, data_atom_entry, HTTP_IN_PROGRESS="true", HTTP_SLUG=external_id, ) # then assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) deposit_id = response_content["swh:deposit_id"] deposit = Deposit.objects.get(pk=deposit_id) assert deposit.status == "partial" assert deposit.external_id == external_id assert deposit.collection == deposit_collection assert deposit.swhid is None deposit_requests = DepositRequest.objects.filter(deposit=deposit) assert len(deposit_requests) == 2 for deposit_request in deposit_requests: assert deposit_request.deposit == deposit if deposit_request.type == "archive": check_archive(sample_archive["name"], deposit_request.archive.name) else: assert ( deposit_request.metadata["atom:id"] == "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a" ) assert deposit_request.raw_metadata == data_atom_entry - replace_metadata_uri = response._headers["location"][1] + replace_metadata_uri = response["location"] response = authenticated_client.put( replace_metadata_uri, content_type="application/atom+xml;type=entry", data=atom_dataset["entry-data-deposit-binary"], HTTP_IN_PROGRESS="false", ) assert response.status_code == status.HTTP_204_NO_CONTENT # deposit_id did not change deposit = Deposit.objects.get(pk=deposit_id) assert deposit.status == DEPOSIT_STATUS_DEPOSITED assert deposit.external_id == external_id assert deposit.collection == deposit_collection assert deposit.swhid is None deposit_requests = DepositRequest.objects.filter(deposit=deposit) assert len(deposit_requests) == 2 for deposit_request in deposit_requests: assert deposit_request.deposit == deposit if deposit_request.type == "archive": check_archive(sample_archive["name"], deposit_request.archive.name) else: assert ( deposit_request.metadata["atom:id"] == "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a" ) assert ( deposit_request.raw_metadata == atom_dataset["entry-data-deposit-binary"] ) # FAILURE scenarios def test_post_deposit_multipart_only_archive_and_atom_entry( authenticated_client, deposit_collection ): """Multipart deposit only accepts one archive and one atom+xml""" # given url = reverse(COL_IRI, args=[deposit_collection.name]) archive_content = b"some content representing archive" archive = InMemoryUploadedFile( BytesIO(archive_content), field_name="archive0", name="archive0", content_type="application/x-tar", size=len(archive_content), charset=None, ) other_archive_content = b"some-other-content" other_archive = InMemoryUploadedFile( BytesIO(other_archive_content), field_name="atom0", name="atom0", content_type="application/x-tar", size=len(other_archive_content), charset="utf-8", ) # when response = authenticated_client.post( url, format="multipart", data={"archive": archive, "atom_entry": other_archive,}, # + headers HTTP_IN_PROGRESS="false", HTTP_SLUG="external-id", ) # then assert response.status_code == status.HTTP_415_UNSUPPORTED_MEDIA_TYPE assert ( "Only 1 application/zip (or application/x-tar) archive" in response.content.decode("utf-8") ) # when archive.seek(0) response = authenticated_client.post( url, format="multipart", data={"archive": archive,}, # + headers HTTP_IN_PROGRESS="false", HTTP_SLUG="external-id", ) # then assert response.status_code == status.HTTP_415_UNSUPPORTED_MEDIA_TYPE assert ( "You must provide both 1 application/zip (or " "application/x-tar) and 1 atom+xml entry for " "multipart deposit" in response.content.decode("utf-8") ) is True def test_post_deposit_multipart_400_when_badly_formatted_xml( authenticated_client, deposit_collection, sample_archive, atom_dataset ): # given url = reverse(COL_IRI, args=[deposit_collection.name]) data_atom_entry_ko = atom_dataset["entry-data-ko"] # when response = post_multipart( authenticated_client, url, sample_archive, data_atom_entry_ko, HTTP_IN_PROGRESS="false", HTTP_SLUG="external-id", ) assert b"Malformed xml metadata" in response.content assert response.status_code == status.HTTP_400_BAD_REQUEST def test_post_deposit_multipart_if_upload_size_limit_exceeded( authenticated_client, deposit_collection, atom_dataset, sample_archive ): # given url = reverse(COL_IRI, args=[deposit_collection.name]) archive = { **sample_archive, "data": sample_archive["data"] * 8, } data_atom_entry = atom_dataset["entry-data-deposit-binary"] external_id = "external-id" # when response = post_multipart( authenticated_client, url, archive, data_atom_entry, HTTP_IN_PROGRESS="false", HTTP_SLUG=external_id, ) # then assert response.status_code == status.HTTP_413_REQUEST_ENTITY_TOO_LARGE assert b"Upload size limit exceeded" in response.content with pytest.raises(Deposit.DoesNotExist): Deposit.objects.get(external_id=external_id) diff --git a/swh/deposit/tests/api/test_deposit_private_read_archive.py b/swh/deposit/tests/api/test_deposit_private_read_archive.py index 3fd8c3ae..3ec0c186 100644 --- a/swh/deposit/tests/api/test_deposit_private_read_archive.py +++ b/swh/deposit/tests/api/test_deposit_private_read_archive.py @@ -1,106 +1,106 @@ # Copyright (C) 2017-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from os.path import exists, join import tarfile from django.urls import reverse_lazy as reverse from rest_framework import status from swh.deposit.api.private.deposit_read import aggregate_tarballs from swh.deposit.config import EM_IRI, PRIVATE_GET_RAW_CONTENT from swh.deposit.tests.common import create_arborescence_archive PRIVATE_GET_RAW_CONTENT_NC = PRIVATE_GET_RAW_CONTENT + "-nc" def private_get_raw_url_endpoints(collection, deposit): """There are 2 endpoints to check (one with collection, one without)""" return [ reverse(PRIVATE_GET_RAW_CONTENT, args=[collection.name, deposit.id]), reverse(PRIVATE_GET_RAW_CONTENT_NC, args=[deposit.id]), ] def test_access_to_existing_deposit_with_one_archive( authenticated_client, deposit_collection, complete_deposit, sample_archive, tmp_path, ): """Access to deposit should stream a 200 response with its raw content """ deposit = complete_deposit for i, url in enumerate(private_get_raw_url_endpoints(deposit_collection, deposit)): response = authenticated_client.get(url) assert response.status_code == status.HTTP_200_OK - assert response._headers["content-type"][1] == "application/tar" + assert response["content-type"] == "application/tar" # write the response stream in a temporary archive archive_path = join(tmp_path, f"archive_{i}.tar") with open(archive_path, "wb") as f: for chunk in response.streaming_content: f.write(chunk) # to check its properties are correct tfile = tarfile.open(archive_path) assert set(tfile.getnames()) == {".", "./file1"} assert tfile.extractfile("./file1").read() == b"some content in file" def test_access_to_existing_deposit_with_multiple_archives( tmp_path, authenticated_client, deposit_collection, partial_deposit, sample_archive ): """Access to deposit should stream a 200 response with its raw contents """ deposit = partial_deposit archive2 = create_arborescence_archive( tmp_path, "archive2", "file2", b"some other content in file" ) # Add a second archive to deposit update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit.id]) response = authenticated_client.post( update_uri, content_type="application/zip", # as zip data=archive2["data"], # + headers CONTENT_LENGTH=archive2["length"], HTTP_SLUG=deposit.external_id, HTTP_CONTENT_MD5=archive2["md5sum"], HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", HTTP_IN_PROGRESS="false", HTTP_CONTENT_DISPOSITION="attachment; filename=%s" % (archive2["name"],), ) assert response.status_code == status.HTTP_201_CREATED for i, url in enumerate(private_get_raw_url_endpoints(deposit_collection, deposit)): response = authenticated_client.get(url) assert response.status_code == status.HTTP_200_OK - assert response._headers["content-type"][1] == "application/tar" + assert response["content-type"] == "application/tar" # write the response stream in a temporary archive archive_path = join(tmp_path, f"archive_{i}.tar") with open(archive_path, "wb") as f: for chunk in response.streaming_content: f.write(chunk) # to check its properties are correct tfile = tarfile.open(archive_path) assert set(tfile.getnames()) == {".", "./file1", "./file2"} assert tfile.extractfile("./file1").read() == b"some content in file" assert tfile.extractfile("./file2").read() == b"some other content in file" def test_aggregate_tarballs_with_strange_archive(datadir, tmp_path): archive = join(datadir, "archives", "single-artifact-package.tar.gz") with aggregate_tarballs(tmp_path, [archive]) as tarball_path: assert exists(tarball_path) diff --git a/swh/deposit/tests/api/test_deposit_private_read_metadata.py b/swh/deposit/tests/api/test_deposit_private_read_metadata.py index c13c5ab3..4d021341 100644 --- a/swh/deposit/tests/api/test_deposit_private_read_metadata.py +++ b/swh/deposit/tests/api/test_deposit_private_read_metadata.py @@ -1,398 +1,398 @@ # Copyright (C) 2017-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.urls import reverse_lazy as reverse from rest_framework import status from swh.deposit import __version__, utils from swh.deposit.config import PRIVATE_GET_DEPOSIT_METADATA, SE_IRI, SWH_PERSON from swh.deposit.models import Deposit from swh.deposit.parsers import parse_xml PRIVATE_GET_DEPOSIT_METADATA_NC = PRIVATE_GET_DEPOSIT_METADATA + "-nc" def private_get_raw_url_endpoints(collection, deposit): """There are 2 endpoints to check (one with collection, one without)""" deposit_id = deposit if isinstance(deposit, int) else deposit.id return [ reverse(PRIVATE_GET_DEPOSIT_METADATA, args=[collection.name, deposit_id]), reverse(PRIVATE_GET_DEPOSIT_METADATA_NC, args=[deposit_id]), ] def update_deposit_with_metadata(authenticated_client, collection, deposit, metadata): # update deposit's metadata response = authenticated_client.post( reverse(SE_IRI, args=[collection.name, deposit.id]), content_type="application/atom+xml;type=entry", data=metadata, HTTP_SLUG=deposit.external_id, HTTP_IN_PROGRESS=True, ) assert response.status_code == status.HTTP_201_CREATED return deposit def test_read_metadata( authenticated_client, deposit_collection, partial_deposit, atom_dataset ): """Private metadata read api to existing deposit should return metadata """ deposit = partial_deposit deposit.external_id = "some-external-id" deposit.origin_url = f"https://hal-test.archives-ouvertes.fr/{deposit.external_id}" deposit.save() metadata_xml_atoms = [ atom_dataset[atom_key] for atom_key in ["entry-data2", "entry-data3"] ] metadata_xml_raws = [parse_xml(xml) for xml in metadata_xml_atoms] for atom_xml in metadata_xml_atoms: deposit = update_deposit_with_metadata( authenticated_client, deposit_collection, deposit, atom_xml, ) for url in private_get_raw_url_endpoints(deposit_collection, deposit): response = authenticated_client.get(url) assert response.status_code == status.HTTP_200_OK - assert response._headers["content-type"][1] == "application/json" + assert response["content-type"] == "application/json" actual_data = response.json() assert actual_data == { "origin": { "type": "deposit", "url": "https://hal-test.archives-ouvertes.fr/some-external-id", }, "metadata_raw": metadata_xml_atoms, "metadata_dict": utils.merge(*metadata_xml_raws), "provider": { "metadata": {}, "provider_name": "", "provider_type": "deposit_client", "provider_url": "https://hal-test.archives-ouvertes.fr/", }, "tool": { "configuration": {"sword_version": "2"}, "name": "swh-deposit", "version": __version__, }, "deposit": { "author": SWH_PERSON, "committer": SWH_PERSON, "committer_date": { "negative_utc": False, "offset": 0, "timestamp": {"microseconds": 0, "seconds": 1507389428}, }, "author_date": { "negative_utc": False, "offset": 0, "timestamp": {"microseconds": 0, "seconds": 1507389428}, }, "client": "test", "id": deposit.id, "collection": "test", "revision_parents": [], }, } def test_read_metadata_revision_with_parent( authenticated_client, deposit_collection, partial_deposit, atom_dataset ): """Private read metadata to a deposit (with parent) returns metadata """ deposit = partial_deposit deposit.external_id = "some-external-id" deposit.origin_url = f"https://hal-test.archives-ouvertes.fr/{deposit.external_id}" deposit.save() metadata_xml_atoms = [ atom_dataset[atom_key] for atom_key in ["entry-data2", "entry-data3"] ] metadata_xml_raws = [parse_xml(xml) for xml in metadata_xml_atoms] for atom_xml in metadata_xml_atoms: deposit = update_deposit_with_metadata( authenticated_client, deposit_collection, deposit, atom_xml, ) rev_id = "da78a9d4cf1d5d29873693fd496142e3a18c20fa" swhid = "swh:1:rev:%s" % rev_id fake_parent = Deposit( swhid=swhid, client=deposit.client, collection=deposit.collection ) fake_parent.save() deposit.parent = fake_parent deposit.save() for url in private_get_raw_url_endpoints(deposit_collection, deposit): response = authenticated_client.get(url) assert response.status_code == status.HTTP_200_OK - assert response._headers["content-type"][1] == "application/json" + assert response["content-type"] == "application/json" actual_data = response.json() assert actual_data == { "origin": { "type": "deposit", "url": "https://hal-test.archives-ouvertes.fr/some-external-id", }, "metadata_raw": metadata_xml_atoms, "metadata_dict": utils.merge(*metadata_xml_raws), "provider": { "metadata": {}, "provider_name": "", "provider_type": "deposit_client", "provider_url": "https://hal-test.archives-ouvertes.fr/", }, "tool": { "configuration": {"sword_version": "2"}, "name": "swh-deposit", "version": __version__, }, "deposit": { "author": SWH_PERSON, "committer": SWH_PERSON, "committer_date": { "negative_utc": False, "offset": 0, "timestamp": {"microseconds": 0, "seconds": 1507389428}, }, "author_date": { "negative_utc": False, "offset": 0, "timestamp": {"microseconds": 0, "seconds": 1507389428}, }, "client": "test", "id": deposit.id, "collection": "test", "revision_parents": [rev_id], }, } def test_read_metadata_3( authenticated_client, deposit_collection, partial_deposit, atom_dataset ): """date(Created|Published) provided, uses author/committer date """ deposit = partial_deposit deposit.external_id = "hal-01243065" deposit.origin_url = f"https://hal-test.archives-ouvertes.fr/{deposit.external_id}" deposit.save() # add metadata to the deposit with datePublished and dateCreated codemeta_entry_data = ( atom_dataset["metadata"] % """ 2015-04-06T17:08:47+02:00 2017-05-03T16:08:47+02:00 """ ) metadata_xml_atoms = [ atom_dataset["entry-data2"], atom_dataset["entry-data3"], codemeta_entry_data, ] metadata_xml_raws = [parse_xml(xml) for xml in metadata_xml_atoms] for atom_xml in metadata_xml_atoms: update_deposit_with_metadata( authenticated_client, deposit_collection, deposit, atom_xml, ) for url in private_get_raw_url_endpoints(deposit_collection, deposit): response = authenticated_client.get(url) assert response.status_code == status.HTTP_200_OK - assert response._headers["content-type"][1] == "application/json" + assert response["content-type"] == "application/json" actual_data = response.json() assert actual_data == { "origin": { "type": "deposit", "url": "https://hal-test.archives-ouvertes.fr/hal-01243065", }, "metadata_raw": metadata_xml_atoms, "metadata_dict": utils.merge(*metadata_xml_raws), "provider": { "metadata": {}, "provider_name": "", "provider_type": "deposit_client", "provider_url": "https://hal-test.archives-ouvertes.fr/", }, "tool": { "configuration": {"sword_version": "2"}, "name": "swh-deposit", "version": __version__, }, "deposit": { "author": SWH_PERSON, "committer": SWH_PERSON, "committer_date": { "negative_utc": False, "offset": 120, "timestamp": {"microseconds": 0, "seconds": 1493820527}, }, "author_date": { "negative_utc": False, "offset": 0, "timestamp": {"microseconds": 0, "seconds": 1507389428}, }, "client": deposit_collection.name, "id": deposit.id, "collection": deposit_collection.name, "revision_parents": [], }, } def test_read_metadata_4( authenticated_client, deposit_collection, atom_dataset, partial_deposit ): """dateCreated/datePublished not provided, revision uses complete_date """ deposit = partial_deposit codemeta_entry_data = atom_dataset["metadata"] % "" deposit = update_deposit_with_metadata( authenticated_client, deposit_collection, deposit, codemeta_entry_data ) # will use the deposit completed date as fallback date deposit.complete_date = "2016-04-06" deposit.save() for url in private_get_raw_url_endpoints(deposit_collection, deposit): response = authenticated_client.get(url) assert response.status_code == status.HTTP_200_OK - assert response._headers["content-type"][1] == "application/json" + assert response["content-type"] == "application/json" actual_data = response.json() assert actual_data == { "origin": {"type": "deposit", "url": None,}, "metadata_raw": [codemeta_entry_data], "metadata_dict": parse_xml(codemeta_entry_data), "provider": { "metadata": {}, "provider_name": "", "provider_type": "deposit_client", "provider_url": "https://hal-test.archives-ouvertes.fr/", }, "tool": { "configuration": {"sword_version": "2"}, "name": "swh-deposit", "version": __version__, }, "deposit": { "author": SWH_PERSON, "committer": SWH_PERSON, "committer_date": { "negative_utc": False, "offset": 0, "timestamp": {"microseconds": 0, "seconds": 1459900800}, }, "author_date": { "negative_utc": False, "offset": 0, "timestamp": {"microseconds": 0, "seconds": 1459900800}, }, "client": deposit_collection.name, "id": deposit.id, "collection": deposit_collection.name, "revision_parents": [], }, } def test_read_metadata_5( authenticated_client, deposit_collection, partial_deposit, atom_dataset ): """dateCreated/datePublished provided, revision uses author/committer date If multiple dateCreated provided, the first occurrence (of dateCreated) is selected. If multiple datePublished provided, the first occurrence (of datePublished) is selected. """ deposit = partial_deposit # add metadata to the deposit with multiple datePublished/dateCreated codemeta_entry_data = ( atom_dataset["metadata"] % """ 2015-04-06T17:08:47+02:00 2017-05-03T16:08:47+02:00 2016-04-06T17:08:47+02:00 2018-05-03T16:08:47+02:00 """ ) deposit = update_deposit_with_metadata( authenticated_client, deposit_collection, deposit, codemeta_entry_data ) for url in private_get_raw_url_endpoints(deposit_collection, deposit): response = authenticated_client.get(url) assert response.status_code == status.HTTP_200_OK - assert response._headers["content-type"][1] == "application/json" + assert response["content-type"] == "application/json" actual_data = response.json() assert actual_data == { "origin": { "type": "deposit", "url": "https://hal-test.archives-ouvertes.fr/hal-01243065", }, "metadata_raw": [codemeta_entry_data], "metadata_dict": parse_xml(codemeta_entry_data), "provider": { "metadata": {}, "provider_name": "", "provider_type": "deposit_client", "provider_url": "https://hal-test.archives-ouvertes.fr/", }, "tool": { "configuration": {"sword_version": "2"}, "name": "swh-deposit", "version": __version__, }, "deposit": { "author": SWH_PERSON, "committer": SWH_PERSON, "committer_date": { "negative_utc": False, "offset": 120, "timestamp": {"microseconds": 0, "seconds": 1493820527}, }, "author_date": { "negative_utc": False, "offset": 120, "timestamp": {"microseconds": 0, "seconds": 1428332927}, }, "client": deposit_collection.name, "id": deposit.id, "collection": deposit_collection.name, "revision_parents": [], }, } def test_access_to_nonexisting_deposit_returns_404_response( authenticated_client, deposit_collection, ): """Read unknown collection should return a 404 response """ unknown_id = 999 try: Deposit.objects.get(pk=unknown_id) except Deposit.DoesNotExist: assert True for url in private_get_raw_url_endpoints(deposit_collection, unknown_id): response = authenticated_client.get(url) assert response.status_code == status.HTTP_404_NOT_FOUND msg = "Deposit %s does not exist" % unknown_id assert msg in response.content.decode("utf-8")