diff --git a/swh/deposit/api/common.py b/swh/deposit/api/common.py index 13482de9..d0eea8d1 100644 --- a/swh/deposit/api/common.py +++ b/swh/deposit/api/common.py @@ -1,1201 +1,1162 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from abc import ABCMeta, abstractmethod import datetime import hashlib import json from typing import Any, Dict, Optional, Sequence, Tuple, Type, Union import attr from django.core.files.uploadedfile import InMemoryUploadedFile from django.http import FileResponse, HttpResponse from django.shortcuts import render from django.urls import reverse from django.utils import timezone from rest_framework import status from rest_framework.authentication import BaseAuthentication, BasicAuthentication from rest_framework.permissions import BasePermission, IsAuthenticated from rest_framework.request import Request from rest_framework.views import APIView from swh.deposit.api.checks import check_metadata from swh.deposit.api.converters import convert_status_detail from swh.deposit.models import Deposit from swh.deposit.utils import compute_metadata_context from swh.model import hashutil from swh.model.identifiers import SWHID, ValidationError from swh.model.model import ( MetadataAuthority, MetadataAuthorityType, MetadataFetcher, RawExtrinsicMetadata, ) from swh.scheduler.utils import create_oneshot_task_dict from ..config import ( ARCHIVE_KEY, ARCHIVE_TYPE, CONT_FILE_IRI, DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_LOAD_SUCCESS, DEPOSIT_STATUS_PARTIAL, EDIT_IRI, EM_IRI, METADATA_KEY, METADATA_TYPE, RAW_METADATA_KEY, SE_IRI, STATE_IRI, APIConfig, ) from ..errors import ( BAD_REQUEST, CHECKSUM_MISMATCH, ERROR_CONTENT, FORBIDDEN, MAX_UPLOAD_SIZE_EXCEEDED, MEDIATION_NOT_ALLOWED, METHOD_NOT_ALLOWED, NOT_FOUND, PARSING_ERROR, DepositError, ParserError, - make_error_dict, - make_error_response, - make_error_response_from_dict, - make_missing_slug_error, + raise_missing_slug_error, ) from ..models import DepositClient, DepositCollection, DepositRequest from ..parsers import parse_swh_reference, parse_xml ACCEPT_PACKAGINGS = ["http://purl.org/net/sword/package/SimpleZip"] ACCEPT_ARCHIVE_CONTENT_TYPES = ["application/zip", "application/x-tar"] @attr.s class ParsedRequestHeaders: content_type = attr.ib(type=str) content_length = attr.ib(type=Optional[int]) in_progress = attr.ib(type=bool) content_disposition = attr.ib(type=Optional[str]) content_md5sum = attr.ib(type=Optional[bytes]) packaging = attr.ib(type=Optional[str]) slug = attr.ib(type=Optional[str]) on_behalf_of = attr.ib(type=Optional[str]) metadata_relevant = attr.ib(type=Optional[str]) swhid = attr.ib(type=Optional[str]) def _compute_md5(filehandler: InMemoryUploadedFile) -> bytes: h = hashlib.md5() for chunk in filehandler: h.update(chunk) # type: ignore return h.digest() class AuthenticatedAPIView(APIView): """Mixin intended as a based API view to enforce the basic authentication check """ authentication_classes: Sequence[Type[BaseAuthentication]] = (BasicAuthentication,) permission_classes: Sequence[Type[BasePermission]] = (IsAuthenticated,) class APIBase(APIConfig, AuthenticatedAPIView, metaclass=ABCMeta): """Base deposit request class sharing multiple common behaviors. """ def _read_headers(self, request: Request) -> ParsedRequestHeaders: """Read and unify the necessary headers from the request (those are not stored in the same location or not properly formatted). Args: request (Request): Input request Returns: Dictionary with the following keys (some associated values may be None): - content-type - content-length - in-progress - content-disposition - packaging - slug - on-behalf-of """ meta = request._request.META content_length = meta.get("CONTENT_LENGTH") if content_length and isinstance(content_length, str): content_length = int(content_length) # final deposit if not provided in_progress = meta.get("HTTP_IN_PROGRESS", False) if isinstance(in_progress, str): in_progress = in_progress.lower() == "true" content_md5sum = meta.get("HTTP_CONTENT_MD5") if content_md5sum: content_md5sum = bytes.fromhex(content_md5sum) return ParsedRequestHeaders( content_type=request.content_type, content_length=content_length, in_progress=in_progress, content_disposition=meta.get("HTTP_CONTENT_DISPOSITION"), content_md5sum=content_md5sum, packaging=meta.get("HTTP_PACKAGING"), slug=meta.get("HTTP_SLUG"), on_behalf_of=meta.get("HTTP_ON_BEHALF_OF"), metadata_relevant=meta.get("HTTP_METADATA_RELEVANT"), swhid=meta.get("HTTP_X_CHECK_SWHID"), ) def _deposit_put( self, request: Request, deposit_id: Optional[int] = None, in_progress: bool = False, external_id: Optional[str] = None, ) -> Deposit: """Save/Update a deposit in db. Args: request: request data deposit_id: deposit identifier in_progress: deposit status external_id: external identifier to associate to the deposit Returns: The Deposit instance saved or updated. """ complete_date: Optional[datetime.datetime] = None deposit_parent: Optional[Deposit] = None if in_progress is False: complete_date = timezone.now() status_type = DEPOSIT_STATUS_DEPOSITED else: status_type = DEPOSIT_STATUS_PARTIAL if not deposit_id: try: # find a deposit parent (same external id, status load to success) deposit_parent = ( Deposit.objects.filter( external_id=external_id, status=DEPOSIT_STATUS_LOAD_SUCCESS ) .order_by("-id")[0:1] .get() ) except Deposit.DoesNotExist: # then no parent for that deposit, deposit_parent already None pass deposit = Deposit( collection=self._collection, external_id=external_id or "", complete_date=complete_date, status=status_type, client=self._client, parent=deposit_parent, ) else: deposit = Deposit.objects.get(pk=deposit_id) # update metadata deposit.complete_date = complete_date deposit.status = status_type if self.config["checks"]: deposit.save() # needed to have a deposit id scheduler = self.scheduler if deposit.status == DEPOSIT_STATUS_DEPOSITED and not deposit.check_task_id: task = create_oneshot_task_dict( "check-deposit", collection=deposit.collection.name, deposit_id=deposit.id, retries_left=3, ) check_task_id = scheduler.create_tasks([task])[0]["id"] deposit.check_task_id = check_task_id deposit.save() return deposit def _deposit_request_put( self, deposit: Deposit, deposit_request_data: Dict[str, Any], replace_metadata: bool = False, replace_archives: bool = False, ) -> DepositRequest: """Save a deposit request with metadata attached to a deposit. Args: deposit: The deposit concerned by the request deposit_request_data: The dictionary with at most 2 deposit request types (archive, metadata) to associate to the deposit replace_metadata: Flag defining if we add or update existing metadata to the deposit replace_archives: Flag defining if we add or update archives to existing deposit Returns: the DepositRequest object stored in the backend """ if replace_metadata: DepositRequest.objects.filter(deposit=deposit, type=METADATA_TYPE).delete() if replace_archives: DepositRequest.objects.filter(deposit=deposit, type=ARCHIVE_TYPE).delete() deposit_request = None archive_file = deposit_request_data.get(ARCHIVE_KEY) if archive_file: deposit_request = DepositRequest( type=ARCHIVE_TYPE, deposit=deposit, archive=archive_file ) deposit_request.save() metadata = deposit_request_data.get(METADATA_KEY) if metadata: raw_metadata = deposit_request_data[RAW_METADATA_KEY] deposit_request = DepositRequest( type=METADATA_TYPE, deposit=deposit, metadata=metadata, raw_metadata=raw_metadata.decode("utf-8"), ) deposit_request.save() assert deposit_request is not None return deposit_request def _delete_archives(self, collection_name: str, deposit_id: int) -> Dict: """Delete archive references from the deposit id. """ try: deposit = Deposit.objects.get(pk=deposit_id) except Deposit.DoesNotExist: - return make_error_dict( - NOT_FOUND, f"The deposit {deposit_id} does not exist" - ) + raise DepositError(NOT_FOUND, f"The deposit {deposit_id} does not exist") DepositRequest.objects.filter(deposit=deposit, type=ARCHIVE_TYPE).delete() return {} def _delete_deposit(self, collection_name: str, deposit_id: int) -> Dict: """Delete deposit reference. Args: collection_name: Client's collection deposit_id: The deposit to delete Returns Empty dict when ok. Dict with error key to describe the failure. """ try: deposit = Deposit.objects.get(pk=deposit_id) except Deposit.DoesNotExist: - return make_error_dict( - NOT_FOUND, f"The deposit {deposit_id} does not exist" - ) + raise DepositError(NOT_FOUND, f"The deposit {deposit_id} does not exist") if deposit.collection.name != collection_name: summary = "Cannot delete a deposit from another collection" description = "Deposit %s does not belong to the collection %s" % ( deposit_id, collection_name, ) - return make_error_dict( + raise DepositError( BAD_REQUEST, summary=summary, verbose_description=description ) DepositRequest.objects.filter(deposit=deposit).delete() deposit.delete() return {} def _check_preconditions_on( self, filehandler, md5sum: Optional[bytes], content_length: Optional[int] = None ) -> Optional[Dict]: """Check preconditions on provided file are respected. That is the length and/or the md5sum hash match the file's content. Args: filehandler (InMemoryUploadedFile): The file to check md5sum: md5 hash expected from the file's content content_length: the expected length if provided. Returns: Either none if no error or a dictionary with a key error detailing the problem. """ max_upload_size = self.config["max_upload_size"] if content_length: if content_length > max_upload_size: - return make_error_dict( + raise DepositError( MAX_UPLOAD_SIZE_EXCEEDED, f"Upload size limit exceeded (max {max_upload_size} bytes)." "Please consider sending the archive in multiple steps.", ) length = filehandler.size if length != content_length: - return make_error_dict( - status.HTTP_412_PRECONDITION_FAILED, "Wrong length" - ) + raise DepositError(status.HTTP_412_PRECONDITION_FAILED, "Wrong length") if md5sum: _md5sum = _compute_md5(filehandler) if _md5sum != md5sum: - return make_error_dict( + raise DepositError( CHECKSUM_MISMATCH, "Wrong md5 hash", f"The checksum sent {hashutil.hash_to_hex(md5sum)} and the actual " f"checksum {hashutil.hash_to_hex(_md5sum)} does not match.", ) return None def _binary_upload( self, request: Request, headers: ParsedRequestHeaders, collection_name: str, deposit_id: Optional[int] = None, replace_metadata: bool = False, replace_archives: bool = False, check_slug_is_present: bool = False, ) -> Dict[str, Any]: """Binary upload routine. Other than such a request, a 415 response is returned. Args: request (Request): the request holding information to parse and inject in db headers (ParsedRequestHeaders): parsed request headers collection_name (str): the associated client deposit_id (id): deposit identifier if provided replace_metadata (bool): 'Update or add' request to existing deposit. If False (default), this adds new metadata request to existing ones. Otherwise, this will replace existing metadata. replace_archives (bool): 'Update or add' request to existing deposit. If False (default), this adds new archive request to existing ones. Otherwise, this will replace existing archives. ones. check_slug_is_present: Check for the slug header if True and raise if not present Returns: In the optimal case a dict with the following keys: - deposit_id (int): Deposit identifier - deposit_date (date): Deposit date - archive: None (no archive is provided here) Otherwise, a dictionary with the key error and the associated failures, either: - 400 (bad request) if the request is not providing an external identifier - 413 (request entity too large) if the length of the archive exceeds the max size configured - 412 (precondition failed) if the length or md5 hash provided mismatch the reality of the archive - 415 (unsupported media type) if a wrong media type is provided """ content_length = headers.content_length if not content_length: - return make_error_dict( + raise DepositError( BAD_REQUEST, "CONTENT_LENGTH header is mandatory", "For archive deposit, the CONTENT_LENGTH header must be sent.", ) content_disposition = headers.content_disposition if not content_disposition: - return make_error_dict( + raise DepositError( BAD_REQUEST, "CONTENT_DISPOSITION header is mandatory", "For archive deposit, the CONTENT_DISPOSITION header must be sent.", ) packaging = headers.packaging if packaging and packaging not in ACCEPT_PACKAGINGS: - return make_error_dict( + raise DepositError( BAD_REQUEST, f"Only packaging {ACCEPT_PACKAGINGS} is supported", f"The packaging provided {packaging} is not supported", ) filehandler = request.FILES["file"] precondition_status_response = self._check_preconditions_on( filehandler, headers.content_md5sum, content_length ) if precondition_status_response: return precondition_status_response slug = headers.slug if check_slug_is_present and not slug: - return make_missing_slug_error() + raise_missing_slug_error() # actual storage of data archive_metadata = filehandler deposit = self._deposit_put( request, deposit_id=deposit_id, in_progress=headers.in_progress, external_id=slug, ) self._deposit_request_put( deposit, {ARCHIVE_KEY: archive_metadata}, replace_metadata=replace_metadata, replace_archives=replace_archives, ) return { "deposit_id": deposit.id, "deposit_date": deposit.reception_date, "status": deposit.status, "archive": filehandler.name, } def _read_metadata(self, metadata_stream) -> Tuple[bytes, Dict[str, Any]]: """Given a metadata stream, reads the metadata and returns both the parsed and the raw metadata. """ raw_metadata = metadata_stream.read() metadata = parse_xml(raw_metadata) return raw_metadata, metadata def _multipart_upload( self, request: Request, headers: ParsedRequestHeaders, collection_name: str, deposit_id: Optional[int] = None, replace_metadata: bool = False, replace_archives: bool = False, check_slug_is_present: bool = False, ) -> Dict: """Multipart upload supported with exactly: - 1 archive (zip) - 1 atom entry Other than such a request, a 415 response is returned. Args: request (Request): the request holding information to parse and inject in db headers: parsed request headers collection_name: the associated client deposit_id: deposit identifier if provided replace_metadata: 'Update or add' request to existing deposit. If False (default), this adds new metadata request to existing ones. Otherwise, this will replace existing metadata. replace_archives: 'Update or add' request to existing deposit. If False (default), this adds new archive request to existing ones. Otherwise, this will replace existing archives. ones. check_slug_is_present: Check for the slug header if True and raise if not present Returns: In the optimal case a dict with the following keys: - deposit_id (int): Deposit identifier - deposit_date (date): Deposit date - archive: None (no archive is provided here) Otherwise, a dictionary with the key error and the associated failures, either: - 400 (bad request) if the request is not providing an external identifier - 412 (precondition failed) if the potentially md5 hash provided mismatch the reality of the archive - 413 (request entity too large) if the length of the archive exceeds the max size configured - 415 (unsupported media type) if a wrong media type is provided """ slug = headers.slug if check_slug_is_present and not slug: - return make_missing_slug_error() + raise_missing_slug_error() content_types_present = set() data: Dict[str, Optional[Any]] = { "application/zip": None, # expected either zip "application/x-tar": None, # or x-tar "application/atom+xml": None, } for key, value in request.FILES.items(): fh = value content_type = fh.content_type if content_type in content_types_present: - return make_error_dict( + raise DepositError( ERROR_CONTENT, "Only 1 application/zip (or application/x-tar) archive " "and 1 atom+xml entry is supported (as per sword2.0 " "specification)", "You provided more than 1 application/(zip|x-tar) " "or more than 1 application/atom+xml content-disposition " "header in the multipart deposit", ) content_types_present.add(content_type) assert content_type is not None data[content_type] = fh if len(content_types_present) != 2: - return make_error_dict( + raise DepositError( ERROR_CONTENT, "You must provide both 1 application/zip (or " "application/x-tar) and 1 atom+xml entry for multipart " "deposit", "You need to provide only 1 application/(zip|x-tar) " "and 1 application/atom+xml content-disposition header " "in the multipart deposit", ) filehandler = data["application/zip"] if not filehandler: filehandler = data["application/x-tar"] precondition_status_response = self._check_preconditions_on( filehandler, headers.content_md5sum ) if precondition_status_response: return precondition_status_response try: raw_metadata, metadata = self._read_metadata(data["application/atom+xml"]) except ParserError: - return make_error_dict( + raise DepositError( PARSING_ERROR, "Malformed xml metadata", "The xml received is malformed. " "Please ensure your metadata file is correctly formatted.", ) # actual storage of data deposit = self._deposit_put( request, deposit_id=deposit_id, in_progress=headers.in_progress, external_id=slug, ) deposit_request_data = { ARCHIVE_KEY: filehandler, METADATA_KEY: metadata, RAW_METADATA_KEY: raw_metadata, } self._deposit_request_put( deposit, deposit_request_data, replace_metadata, replace_archives ) assert filehandler is not None return { "deposit_id": deposit.id, "deposit_date": deposit.reception_date, "archive": filehandler.name, "status": deposit.status, } def _store_metadata_deposit( self, deposit: Deposit, swhid_reference: Union[str, SWHID], metadata: Dict, raw_metadata: bytes, deposit_origin: Optional[str] = None, ) -> Tuple[Union[SWHID, str], Union[SWHID, str], Deposit, DepositRequest]: """When all user inputs pass the checks, this associates the raw_metadata to the swhid_reference in the raw extrinsic metadata storage. In case of any issues, a bad request response is returned to the user with the details. Checks: - metadata are technically parsable - metadata pass the functional checks - SWHID (if any) is technically valid Args: deposit: Deposit reference swhid_reference: The swhid or the origin to attach metadata information to metadata: Full dict of metadata to check for validity (parsed out of raw_metadata) raw_metadata: The actual raw metadata to send in the storage metadata deposit_origin: Optional deposit origin url to use if any (e.g. deposit update scenario provides one) Raises: DepositError in case of incorrect inputs from the deposit client (e.g. functionally invalid metadata, ...) Returns: Tuple of core swhid, swhid context, deposit and deposit request """ metadata_ok, error_details = check_metadata(metadata) if not metadata_ok: assert error_details, "Details should be set when a failure occurs" raise DepositError( BAD_REQUEST, "Functional metadata checks failure", convert_status_detail(error_details), ) metadata_authority = MetadataAuthority( type=MetadataAuthorityType.DEPOSIT_CLIENT, url=deposit.client.provider_url, metadata={"name": deposit.client.last_name}, ) metadata_fetcher = MetadataFetcher( name=self.tool["name"], version=self.tool["version"], metadata=self.tool["configuration"], ) # replace metadata within the deposit backend deposit_request_data = { METADATA_KEY: metadata, RAW_METADATA_KEY: raw_metadata, } # actually add the metadata to the completed deposit deposit_request = self._deposit_request_put(deposit, deposit_request_data) object_type, metadata_context = compute_metadata_context(swhid_reference) if deposit_origin: # metadata deposit update on completed deposit metadata_context["origin"] = deposit_origin swhid_core: Union[str, SWHID] if isinstance(swhid_reference, str): swhid_core = swhid_reference else: swhid_core = attr.evolve(swhid_reference, metadata={}) # store that metadata to the metadata storage metadata_object = RawExtrinsicMetadata( type=object_type, target=swhid_core, # core swhid or origin discovery_date=deposit_request.date, authority=metadata_authority, fetcher=metadata_fetcher, format="sword-v2-atom-codemeta", metadata=raw_metadata, **metadata_context, ) # write to metadata storage self.storage_metadata.metadata_authority_add([metadata_authority]) self.storage_metadata.metadata_fetcher_add([metadata_fetcher]) self.storage_metadata.raw_extrinsic_metadata_add([metadata_object]) return (swhid_core, swhid_reference, deposit, deposit_request) def _atom_entry( self, request: Request, headers: ParsedRequestHeaders, collection_name: str, deposit_id: Optional[int] = None, replace_metadata: bool = False, replace_archives: bool = False, check_slug_is_present: bool = False, ) -> Dict[str, Any]: """Atom entry deposit. Args: request: the request holding information to parse and inject in db headers: parsed request headers collection_name: the associated client deposit_id: deposit identifier if provided replace_metadata: 'Update or add' request to existing deposit. If False (default), this adds new metadata request to existing ones. Otherwise, this will replace existing metadata. replace_archives: 'Update or add' request to existing deposit. If False (default), this adds new archive request to existing ones. Otherwise, this will replace existing archives. ones. check_slug_is_present: Check for the slug header if True and raise if not present Returns: In the optimal case a dict with the following keys: - deposit_id: deposit id associated to the deposit - deposit_date: date of the deposit - archive: None (no archive is provided here) Otherwise, a dictionary with the key error and the associated failures, either: - 400 (bad request) if the request is not providing an external identifier - 400 (bad request) if the request's body is empty - 415 (unsupported media type) if a wrong media type is provided """ try: raw_metadata, metadata = self._read_metadata(request.data) except ParserError: - return make_error_dict( + raise DepositError( BAD_REQUEST, "Malformed xml metadata", "The xml received is malformed. " "Please ensure your metadata file is correctly formatted.", ) if not metadata: - return make_error_dict( + raise DepositError( BAD_REQUEST, "Empty body request is not supported", "Atom entry deposit is supposed to send for metadata. " "If the body is empty, there is no metadata.", ) # Determine if we are in the metadata-only deposit case try: swhid = parse_swh_reference(metadata) except ValidationError as e: - return make_error_dict(PARSING_ERROR, "Invalid SWHID reference", str(e),) + raise DepositError( + PARSING_ERROR, "Invalid SWHID reference", str(e), + ) if swhid is not None: external_id = metadata.get("external_identifier", headers.slug) else: slug = headers.slug if check_slug_is_present and not slug: - return make_missing_slug_error() + raise_missing_slug_error() external_id = metadata.get("external_identifier", slug) deposit = self._deposit_put( request, deposit_id=deposit_id, in_progress=headers.in_progress, external_id=external_id, ) if swhid is not None: - try: - swhid, swhid_ref, depo, depo_request = self._store_metadata_deposit( - deposit, swhid, metadata, raw_metadata - ) - except DepositError as deposit_error: - return deposit_error.to_dict() + swhid, swhid_ref, depo, depo_request = self._store_metadata_deposit( + deposit, swhid, metadata, raw_metadata + ) deposit.status = DEPOSIT_STATUS_LOAD_SUCCESS if isinstance(swhid_ref, SWHID): deposit.swhid = str(swhid) deposit.swhid_context = str(swhid_ref) deposit.complete_date = depo_request.date deposit.reception_date = depo_request.date deposit.save() return { "deposit_id": deposit.id, "deposit_date": depo_request.date, "status": deposit.status, "archive": None, } self._deposit_request_put( deposit, {METADATA_KEY: metadata, RAW_METADATA_KEY: raw_metadata}, replace_metadata, replace_archives, ) return { "deposit_id": deposit.id, "deposit_date": deposit.reception_date, "archive": None, "status": deposit.status, } def _empty_post( self, request: Request, headers: ParsedRequestHeaders, collection_name: str, deposit_id: int, ) -> Dict[str, Any]: """Empty post to finalize an empty deposit. Args: request: the request holding information to parse and inject in db headers: parsed request headers collection_name: the associated client deposit_id: deposit identifier Returns: Dictionary of result with the deposit's id, the date it was completed and no archive. """ deposit = Deposit.objects.get(pk=deposit_id) deposit.complete_date = timezone.now() deposit.status = DEPOSIT_STATUS_DEPOSITED deposit.save() return { "deposit_id": deposit_id, "deposit_date": deposit.complete_date, "status": deposit.status, "archive": None, } def additional_checks( self, request: Request, headers: ParsedRequestHeaders, collection_name: str, deposit_id: Optional[int] = None, ) -> Dict[str, Any]: """Permit the child class to enrich additional checks. Returns: dict with 'error' detailing the problem. """ return {} def checks( self, request: Request, collection_name: str, deposit_id: Optional[int] = None ) -> Dict[str, Any]: try: self._collection = DepositCollection.objects.get(name=collection_name) except DepositCollection.DoesNotExist: - return make_error_dict( - NOT_FOUND, f"Unknown collection name {collection_name}" - ) + raise DepositError(NOT_FOUND, f"Unknown collection name {collection_name}") assert self._collection is not None username = request.user.username if username: # unauthenticated request can have the username empty try: self._client: DepositClient = DepositClient.objects.get( # type: ignore username=username ) except DepositClient.DoesNotExist: - return make_error_dict(NOT_FOUND, f"Unknown client name {username}") + raise DepositError(NOT_FOUND, f"Unknown client name {username}") collection_id = self._collection.id collections = self._client.collections assert collections is not None if collection_id not in collections: - return make_error_dict( + raise DepositError( FORBIDDEN, f"Client {username} cannot access collection {collection_name}", ) headers = self._read_headers(request) if deposit_id: try: deposit = Deposit.objects.get(pk=deposit_id) except Deposit.DoesNotExist: - return make_error_dict( + raise DepositError( NOT_FOUND, f"Deposit with id {deposit_id} does not exist" ) assert deposit is not None - checks = self.restrict_access(request, headers, deposit) - if checks: - return checks + self.restrict_access(request, headers, deposit) if headers.on_behalf_of: - return make_error_dict(MEDIATION_NOT_ALLOWED, "Mediation is not supported.") + raise DepositError(MEDIATION_NOT_ALLOWED, "Mediation is not supported.") - checks = self.additional_checks(request, headers, collection_name, deposit_id) - if "error" in checks: - return checks + self.additional_checks(request, headers, collection_name, deposit_id) return {"headers": headers} def restrict_access( self, request: Request, headers: ParsedRequestHeaders, deposit: Deposit - ) -> Dict[str, Any]: + ) -> None: """Allow modifications on deposit with status 'partial' only, reject the rest. """ if request.method != "GET" and deposit.status != DEPOSIT_STATUS_PARTIAL: summary = "You can only act on deposit with status '%s'" % ( DEPOSIT_STATUS_PARTIAL, ) description = f"This deposit has status '{deposit.status}'" - return make_error_dict( + raise DepositError( BAD_REQUEST, summary=summary, verbose_description=description ) - return {} def _basic_not_allowed_method(self, request: Request, method: str): - return make_error_response( - request, + raise DepositError( METHOD_NOT_ALLOWED, f"{method} method is not supported on this endpoint", ) def get( self, request: Request, collection_name: str, deposit_id: int ) -> Union[HttpResponse, FileResponse]: return self._basic_not_allowed_method(request, "GET") def post( self, request: Request, collection_name: str, deposit_id: Optional[int] = None ) -> HttpResponse: return self._basic_not_allowed_method(request, "POST") def put( self, request: Request, collection_name: str, deposit_id: int ) -> HttpResponse: return self._basic_not_allowed_method(request, "PUT") def delete( self, request: Request, collection_name: str, deposit_id: Optional[int] = None ) -> HttpResponse: return self._basic_not_allowed_method(request, "DELETE") class APIGet(APIBase, metaclass=ABCMeta): """Mixin for class to support GET method. """ def get( self, request: Request, collection_name: str, deposit_id: int ) -> Union[HttpResponse, FileResponse]: """Endpoint to create/add resources to deposit. Returns: 200 response when no error during routine occurred 400 if the deposit does not belong to the collection 404 if the deposit or the collection does not exist """ - checks = self.checks(request, collection_name, deposit_id) - if "error" in checks: - return make_error_response_from_dict(request, checks["error"]) + self.checks(request, collection_name, deposit_id) r = self.process_get(request, collection_name, deposit_id) status, content, content_type = r if content_type == "swh/generator": with content as path: return FileResponse( open(path, "rb"), status=status, content_type="application/zip" ) if content_type == "application/json": return HttpResponse( json.dumps(content), status=status, content_type=content_type ) return HttpResponse(content, status=status, content_type=content_type) @abstractmethod def process_get( self, request: Request, collection_name: str, deposit_id: int ) -> Tuple[int, Any, str]: """Routine to deal with the deposit's get processing. Returns: Tuple status, stream of content, content-type """ pass class APIPost(APIBase, metaclass=ABCMeta): """Mixin for class to support POST method. """ def post( self, request: Request, collection_name: str, deposit_id: Optional[int] = None ) -> HttpResponse: """Endpoint to create/add resources to deposit. Returns: 204 response when no error during routine occurred. 400 if the deposit does not belong to the collection 404 if the deposit or the collection does not exist """ checks = self.checks(request, collection_name, deposit_id) - if "error" in checks: - return make_error_response_from_dict(request, checks["error"]) headers = checks["headers"] status, iri_key, data = self.process_post( request, headers, collection_name, deposit_id ) - error = data.get("error") - if error: - return make_error_response_from_dict(request, error) - return self._make_deposit_receipt( request, collection_name, status, iri_key, data, ) def _make_deposit_receipt( self, request, collection_name: str, status: int, iri_key: str, data: Dict[str, Any], ) -> HttpResponse: """Returns an HttpResponse with a SWORD Deposit receipt as content.""" # Build the IRIs in the receipt args = [collection_name, data["deposit_id"]] iris = { iri: request.build_absolute_uri(reverse(iri, args=args)) for iri in [EM_IRI, EDIT_IRI, CONT_FILE_IRI, SE_IRI, STATE_IRI] } data["packagings"] = ACCEPT_PACKAGINGS data.update(iris) response = render( request, "deposit/deposit_receipt.xml", context=data, content_type="application/xml", status=status, ) response._headers["location"] = "Location", data[iri_key] # type: ignore return response @abstractmethod def process_post( self, request, headers: ParsedRequestHeaders, collection_name: str, deposit_id: Optional[int] = None, ) -> Tuple[int, str, Dict]: """Routine to deal with the deposit's processing. Returns Tuple of: - response status code (200, 201, etc...) - key iri (EM_IRI, EDIT_IRI, etc...) - dictionary of the processing result """ pass class APIPut(APIBase, metaclass=ABCMeta): """Mixin for class to support PUT method. """ def put( self, request: Request, collection_name: str, deposit_id: int ) -> HttpResponse: """Endpoint to update deposit resources. Returns: 204 response when no error during routine occurred. 400 if the deposit does not belong to the collection 404 if the deposit or the collection does not exist """ checks = self.checks(request, collection_name, deposit_id) - if "error" in checks: - return make_error_response_from_dict(request, checks["error"]) - headers = checks["headers"] - data = self.process_put(request, headers, collection_name, deposit_id) - - error = data.get("error") - if error: - return make_error_response_from_dict(request, error) + self.process_put(request, headers, collection_name, deposit_id) return HttpResponse(status=status.HTTP_204_NO_CONTENT) @abstractmethod def process_put( self, request: Request, headers: ParsedRequestHeaders, collection_name: str, deposit_id: int, ) -> Dict[str, Any]: """Routine to deal with updating a deposit in some way. Returns dictionary of the processing result """ pass class APIDelete(APIBase, metaclass=ABCMeta): """Mixin for class to support DELETE method. """ def delete( self, request: Request, collection_name: str, deposit_id: Optional[int] = None ) -> HttpResponse: """Endpoint to delete some deposit's resources (archives, deposit). Returns: 204 response when no error during routine occurred. 400 if the deposit does not belong to the collection 404 if the deposit or the collection does not exist """ - checks = self.checks(request, collection_name, deposit_id) - if "error" in checks: - return make_error_response_from_dict(request, checks["error"]) - + self.checks(request, collection_name, deposit_id) assert deposit_id is not None - data = self.process_delete(request, collection_name, deposit_id) - error = data.get("error") - if error: - return make_error_response_from_dict(request, error) + self.process_delete(request, collection_name, deposit_id) return HttpResponse(status=status.HTTP_204_NO_CONTENT) @abstractmethod def process_delete( self, request: Request, collection_name: str, deposit_id: int ) -> Dict: """Routine to delete a resource. This is mostly not allowed except for the EM_IRI (cf. .api.deposit_update.APIUpdateArchive) """ return {} diff --git a/swh/deposit/api/content.py b/swh/deposit/api/content.py index b34abc7e..f0a2bd8b 100644 --- a/swh/deposit/api/content.py +++ b/swh/deposit/api/content.py @@ -1,55 +1,53 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.http import HttpResponse from django.shortcuts import render from rest_framework import status -from ..errors import NOT_FOUND, make_error_response, make_error_response_from_dict +from ..errors import NOT_FOUND, make_error_response from ..models import DEPOSIT_STATUS_DETAIL, Deposit, DepositRequest from .common import APIBase class ContentAPI(APIBase): """Deposit request class defining api endpoints for sword deposit. What's known as 'Cont-IRI' and 'File-IRI' in the sword specification. HTTP verbs supported: GET """ def get(self, req, collection_name: str, deposit_id: int) -> HttpResponse: - checks = self.checks(req, collection_name, deposit_id) - if "error" in checks: - return make_error_response_from_dict(req, checks["error"]) + self.checks(req, collection_name, deposit_id) try: deposit = Deposit.objects.get(pk=deposit_id) if deposit.collection.name != collection_name: raise Deposit.DoesNotExist except Deposit.DoesNotExist: return make_error_response( req, NOT_FOUND, "deposit %s does not belong to collection %s" % (deposit_id, collection_name), ) requests = DepositRequest.objects.filter(deposit=deposit) context = { "deposit_id": deposit.id, "status": deposit.status, "status_detail": DEPOSIT_STATUS_DETAIL[deposit.status], "requests": requests, } return render( req, "deposit/content.xml", context=context, content_type="application/xml", status=status.HTTP_200_OK, ) diff --git a/swh/deposit/api/edit.py b/swh/deposit/api/edit.py index b6eaf27a..fb341272 100644 --- a/swh/deposit/api/edit.py +++ b/swh/deposit/api/edit.py @@ -1,152 +1,149 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from typing import Any, Dict from rest_framework.request import Request from swh.deposit.models import Deposit from swh.model.identifiers import parse_swhid from ..config import DEPOSIT_STATUS_LOAD_SUCCESS -from ..errors import BAD_REQUEST, DepositError, ParserError, make_error_dict +from ..errors import BAD_REQUEST, DepositError, ParserError from ..parsers import SWHAtomEntryParser, SWHMultiPartParser from .common import APIDelete, APIPut, ParsedRequestHeaders class EditAPI(APIPut, APIDelete): """Deposit request class defining api endpoints for sword deposit. What's known as 'Edit-IRI' in the sword specification. HTTP verbs supported: PUT, DELETE """ parser_classes = (SWHMultiPartParser, SWHAtomEntryParser) def restrict_access( self, request: Request, headers: ParsedRequestHeaders, deposit: Deposit - ) -> Dict[str, Any]: + ) -> None: """Relax restriction access to allow metadata update on deposit with status "done" when a swhid is provided. """ if ( request.method == "PUT" and headers.swhid is not None and deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS ): # Allow metadata update on deposit with status "done" when swhid provided - return {} + return # otherwise, let the standard access restriction check occur - return super().restrict_access(request, headers, deposit) + super().restrict_access(request, headers, deposit) def process_put( self, request, headers: ParsedRequestHeaders, collection_name: str, deposit_id: int, ) -> Dict[str, Any]: """This allows the following scenarios: - multipart: replace all the deposit (status partial) metadata and archive with the provided ones. - atom: replace all the deposit (status partial) metadata with the provided ones. - with swhid, atom: Add new metatada to deposit (status done) with provided ones and push such metadata to the metadata storage directly. source: - http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_editingcontent_metadata - http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_editingcontent_multipart Raises: 400 if any of the following occur: - the swhid provided and the deposit swhid do not match - the provided metadata xml file is malformed - the provided xml atom entry is empty - the provided swhid does not exist in the archive Returns: 204 No content """ # noqa swhid = headers.swhid if swhid is None: if request.content_type.startswith("multipart/"): return self._multipart_upload( request, headers, collection_name, deposit_id=deposit_id, replace_archives=True, replace_metadata=True, ) # standard metadata update (replace all metadata already provided to the # deposit by the new ones) return self._atom_entry( request, headers, collection_name, deposit_id=deposit_id, replace_metadata=True, ) # Update metadata on a deposit already ingested # Write to the metadata storage (and the deposit backend) # no ingestion triggered deposit = Deposit.objects.get(pk=deposit_id) assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS if swhid != deposit.swhid: - return make_error_dict( + raise DepositError( BAD_REQUEST, f"Mismatched provided SWHID {swhid} with deposit's {deposit.swhid}.", "The provided SWHID does not match the deposit to update. " "Please ensure you send the correct deposit SWHID.", ) try: raw_metadata, metadata = self._read_metadata(request.data) except ParserError: - return make_error_dict( + raise DepositError( BAD_REQUEST, "Malformed xml metadata", "The xml received is malformed. " "Please ensure your metadata file is correctly formatted.", ) if not metadata: - return make_error_dict( + raise DepositError( BAD_REQUEST, "Empty body request is not supported", "Atom entry deposit is supposed to send for metadata. " "If the body is empty, there is no metadata.", ) - try: - _, _, deposit, deposit_request = self._store_metadata_deposit( - deposit, parse_swhid(swhid), metadata, raw_metadata, deposit.origin_url, - ) - except DepositError as deposit_error: - return deposit_error.to_dict() + _, _, deposit, deposit_request = self._store_metadata_deposit( + deposit, parse_swhid(swhid), metadata, raw_metadata, deposit.origin_url, + ) return { "deposit_id": deposit.id, "deposit_date": deposit_request.date, "status": deposit.status, "archive": None, } def process_delete(self, req, collection_name: str, deposit_id: int) -> Dict: """Delete the container (deposit). source: http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_deleteconteiner # noqa """ return self._delete_deposit(collection_name, deposit_id) diff --git a/swh/deposit/api/edit_media.py b/swh/deposit/api/edit_media.py index 2848164d..b6da9f7e 100644 --- a/swh/deposit/api/edit_media.py +++ b/swh/deposit/api/edit_media.py @@ -1,97 +1,96 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from typing import Any, Dict, Optional, Tuple from rest_framework import status from ..config import CONT_FILE_IRI -from ..errors import BAD_REQUEST, make_error_dict +from ..errors import BAD_REQUEST, DepositError from ..parsers import SWHFileUploadTarParser, SWHFileUploadZipParser from .common import ( ACCEPT_ARCHIVE_CONTENT_TYPES, APIDelete, APIPost, APIPut, ParsedRequestHeaders, ) class EditMediaAPI(APIPost, APIPut, APIDelete): """Deposit request class defining api endpoints for sword deposit. What's known as 'EM IRI' in the sword specification. HTTP verbs supported: PUT, POST, DELETE """ parser_classes = ( SWHFileUploadZipParser, SWHFileUploadTarParser, ) def process_put( self, req, headers: ParsedRequestHeaders, collection_name: str, deposit_id: int ) -> Dict[str, Any]: """Replace existing content for the existing deposit. source: http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_editingcontent_binary # noqa Returns: 204 No content """ if req.content_type not in ACCEPT_ARCHIVE_CONTENT_TYPES: msg = "Packaging format supported is restricted to %s" % ( ", ".join(ACCEPT_ARCHIVE_CONTENT_TYPES) ) - return make_error_dict(BAD_REQUEST, msg) + raise DepositError(BAD_REQUEST, msg) return self._binary_upload( req, headers, collection_name, deposit_id=deposit_id, replace_archives=True ) def process_post( self, req, headers: ParsedRequestHeaders, collection_name: str, deposit_id: Optional[int] = None, ) -> Tuple[int, str, Dict]: """Add new content to the existing deposit. source: http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_addingcontent_mediaresource # noqa Returns: 201 Created Headers: Location: [Cont-File-IRI] Body: [optional Deposit Receipt] """ if req.content_type not in ACCEPT_ARCHIVE_CONTENT_TYPES: msg = "Packaging format supported is restricted to %s" % ( ", ".join(ACCEPT_ARCHIVE_CONTENT_TYPES) ) - unused = 0 - return unused, "unused", make_error_dict(BAD_REQUEST, msg) + raise DepositError(BAD_REQUEST, msg) return ( status.HTTP_201_CREATED, CONT_FILE_IRI, self._binary_upload(req, headers, collection_name, deposit_id), ) def process_delete(self, req, collection_name: str, deposit_id: int) -> Dict: """Delete content (archives) from existing deposit. source: http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_deletingcontent # noqa Returns: 204 Created """ return self._delete_archives(collection_name, deposit_id) diff --git a/swh/deposit/api/private/__init__.py b/swh/deposit/api/private/__init__.py index 954a3dfe..bab6256c 100644 --- a/swh/deposit/api/private/__init__.py +++ b/swh/deposit/api/private/__init__.py @@ -1,103 +1,101 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from typing import Any, Dict, List, Tuple from rest_framework.permissions import AllowAny from swh.deposit import utils from swh.deposit.api.common import AuthenticatedAPIView -from swh.deposit.errors import NOT_FOUND, make_error_dict +from swh.deposit.errors import NOT_FOUND, DepositError from ...config import METADATA_TYPE, APIConfig from ...models import Deposit, DepositRequest class DepositReadMixin: """Deposit Read mixin """ def _deposit_requests(self, deposit, request_type): """Given a deposit, yields its associated deposit_request Args: deposit (Deposit): Deposit to list requests for request_type (str): 'archive' or 'metadata' Yields: deposit requests of type request_type associated to the deposit """ if isinstance(deposit, int): deposit = Deposit.objects.get(pk=deposit) deposit_requests = DepositRequest.objects.filter( type=request_type, deposit=deposit ).order_by("id") for deposit_request in deposit_requests: yield deposit_request def _metadata_get(self, deposit: Deposit) -> Tuple[Dict[str, Any], List[str]]: """Given a deposit, retrieve all metadata requests into one Dict and returns both that aggregated metadata dict and the list of raw_metdadata. Args: deposit: The deposit instance to extract metadata from Returns: Tuple of aggregated metadata dict, list of raw_metadata """ metadata: List[Dict[str, Any]] = [] raw_metadata: List[str] = [] for deposit_request in self._deposit_requests( deposit, request_type=METADATA_TYPE ): metadata.append(deposit_request.metadata) raw_metadata.append(deposit_request.raw_metadata) aggregated_metadata = utils.merge(*metadata) return (aggregated_metadata, raw_metadata) class APIPrivateView(APIConfig, AuthenticatedAPIView): """Mixin intended as private api (so no authentication) based API view (for the private ones). """ authentication_classes = () permission_classes = (AllowAny,) def checks(self, req, collection_name, deposit_id=None): """Override default checks implementation to allow empty collection. """ if deposit_id: try: Deposit.objects.get(pk=deposit_id) except Deposit.DoesNotExist: - return make_error_dict( + raise DepositError( NOT_FOUND, "Deposit with id %s does not exist" % deposit_id ) headers = self._read_headers(req) - checks = self.additional_checks(req, headers, collection_name, deposit_id) - if "error" in checks: - return checks + self.additional_checks(req, headers, collection_name, deposit_id) return {"headers": headers} def get( self, request, collection_name=None, deposit_id=None, *args, **kwargs, ): return super().get(request, collection_name, deposit_id) def put( self, request, collection_name=None, deposit_id=None, *args, **kwargs, ): return super().put(request, collection_name, deposit_id) diff --git a/swh/deposit/api/private/deposit_update_status.py b/swh/deposit/api/private/deposit_update_status.py index 78639805..6b0974f1 100644 --- a/swh/deposit/api/private/deposit_update_status.py +++ b/swh/deposit/api/private/deposit_update_status.py @@ -1,113 +1,113 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from typing import Dict from rest_framework.parsers import JSONParser from swh.model.identifiers import DIRECTORY, REVISION, SNAPSHOT, swhid from . import APIPrivateView -from ...errors import BAD_REQUEST, make_error_dict +from ...errors import BAD_REQUEST, DepositError from ...models import DEPOSIT_STATUS_DETAIL, DEPOSIT_STATUS_LOAD_SUCCESS, Deposit from ..common import APIPut, ParsedRequestHeaders MANDATORY_KEYS = ["origin_url", "revision_id", "directory_id", "snapshot_id"] class APIUpdateStatus(APIPrivateView, APIPut): """Deposit request class to update the deposit's status. HTTP verbs supported: PUT """ parser_classes = (JSONParser,) def additional_checks( self, request, headers: ParsedRequestHeaders, collection_name, deposit_id=None ): """Enrich existing checks to the default ones. New checks: - Ensure the status is provided - Ensure it exists - no missing information on load success update """ data = request.data status = data.get("status") if not status: msg = "The status key is mandatory with possible values %s" % list( DEPOSIT_STATUS_DETAIL.keys() ) - return make_error_dict(BAD_REQUEST, msg) + raise DepositError(BAD_REQUEST, msg) if status not in DEPOSIT_STATUS_DETAIL: msg = "Possible status in %s" % list(DEPOSIT_STATUS_DETAIL.keys()) - return make_error_dict(BAD_REQUEST, msg) + raise DepositError(BAD_REQUEST, msg) if status == DEPOSIT_STATUS_LOAD_SUCCESS: missing_keys = [] for key in MANDATORY_KEYS: value = data.get(key) if value is None: missing_keys.append(key) if missing_keys: msg = ( f"Updating deposit status to {status}" f" requires information {','.join(missing_keys)}" ) - return make_error_dict(BAD_REQUEST, msg) + raise DepositError(BAD_REQUEST, msg) return {} def process_put( self, request, headers: ParsedRequestHeaders, collection_name: str, deposit_id: int, ) -> Dict: """Update the deposit with status and SWHIDs Returns: 204 No content 400 Bad request if checks fail """ data = request.data deposit = Deposit.objects.get(pk=deposit_id) status = data["status"] deposit.status = status if status == DEPOSIT_STATUS_LOAD_SUCCESS: origin_url = data["origin_url"] directory_id = data["directory_id"] revision_id = data["revision_id"] dir_id = swhid(DIRECTORY, directory_id) snp_id = swhid(SNAPSHOT, data["snapshot_id"]) rev_id = swhid(REVISION, revision_id) deposit.swhid = dir_id # new id with contextual information deposit.swhid_context = swhid( DIRECTORY, directory_id, metadata={ "origin": origin_url, "visit": snp_id, "anchor": rev_id, "path": "/", }, ) else: # rejected deposit.status = status deposit.save() return {} diff --git a/swh/deposit/api/state.py b/swh/deposit/api/state.py index bb617cc3..8a6005b7 100644 --- a/swh/deposit/api/state.py +++ b/swh/deposit/api/state.py @@ -1,65 +1,63 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.http import HttpResponse from django.shortcuts import render from rest_framework import status -from ..errors import NOT_FOUND, make_error_response, make_error_response_from_dict +from ..errors import NOT_FOUND, make_error_response from ..models import DEPOSIT_STATUS_DETAIL, Deposit from .common import APIBase from .converters import convert_status_detail class StateAPI(APIBase): """Deposit status. What's known as 'State-IRI' in the sword specification. HTTP verbs supported: GET """ def get(self, req, collection_name: str, deposit_id: int) -> HttpResponse: - checks = self.checks(req, collection_name, deposit_id) - if "error" in checks: - return make_error_response_from_dict(req, checks["error"]) + self.checks(req, collection_name, deposit_id) try: deposit = Deposit.objects.get(pk=deposit_id) if deposit.collection.name != collection_name: raise Deposit.DoesNotExist except Deposit.DoesNotExist: return make_error_response( req, NOT_FOUND, "deposit %s does not belong to collection %s" % (deposit_id, collection_name), ) status_detail = convert_status_detail(deposit.status_detail) if not status_detail: status_detail = DEPOSIT_STATUS_DETAIL[deposit.status] context = { "deposit_id": deposit.id, "status_detail": status_detail, } keys = ( "status", "swhid", "swhid_context", "external_id", ) for k in keys: context[k] = getattr(deposit, k, None) return render( req, "deposit/status.xml", context=context, content_type="application/xml", status=status.HTTP_200_OK, ) diff --git a/swh/deposit/errors.py b/swh/deposit/errors.py index ff89a067..b11ec751 100644 --- a/swh/deposit/errors.py +++ b/swh/deposit/errors.py @@ -1,180 +1,201 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """Module in charge of providing the standard sword errors """ -from typing import Any, Dict +from typing import NoReturn from django.shortcuts import render from rest_framework import status FORBIDDEN = "forbidden" UNAUTHORIZED = "unauthorized" NOT_FOUND = "unknown" BAD_REQUEST = "bad-request" ERROR_CONTENT = "error-content" CHECKSUM_MISMATCH = "checksum-mismatch" MEDIATION_NOT_ALLOWED = "mediation-not-allowed" METHOD_NOT_ALLOWED = "method-not-allowed" MAX_UPLOAD_SIZE_EXCEEDED = "max_upload_size_exceeded" PARSING_ERROR = "parsing-error" class ParserError(ValueError): """Specific parsing error detected when parsing the xml metadata input """ pass ERRORS = { FORBIDDEN: { "status": status.HTTP_403_FORBIDDEN, "iri": "http://purl.org/net/sword/error/ErrorForbidden", "tag": "sword:ErrorForbidden", }, UNAUTHORIZED: { "status": status.HTTP_401_UNAUTHORIZED, "iri": "http://purl.org/net/sword/error/ErrorUnauthorized", "tag": "sword:ErrorUnauthorized", }, NOT_FOUND: { "status": status.HTTP_404_NOT_FOUND, "iri": "http://purl.org/net/sword/error/ErrorNotFound", "tag": "sword:ErrorNotFound", }, ERROR_CONTENT: { "status": status.HTTP_415_UNSUPPORTED_MEDIA_TYPE, "iri": "http://purl.org/net/sword/error/ErrorContent", "tag": "sword:ErrorContent", }, CHECKSUM_MISMATCH: { "status": status.HTTP_412_PRECONDITION_FAILED, "iri": "http://purl.org/net/sword/error/ErrorChecksumMismatch", "tag": "sword:ErrorChecksumMismatch", }, BAD_REQUEST: { "status": status.HTTP_400_BAD_REQUEST, "iri": "http://purl.org/net/sword/error/ErrorBadRequest", "tag": "sword:ErrorBadRequest", }, PARSING_ERROR: { "status": status.HTTP_400_BAD_REQUEST, "iri": "http://purl.org/net/sword/error/ErrorBadRequest", "tag": "sword:ErrorBadRequest", }, MEDIATION_NOT_ALLOWED: { "status": status.HTTP_412_PRECONDITION_FAILED, "iri": "http://purl.org/net/sword/error/MediationNotAllowed", "tag": "sword:MediationNotAllowed", }, METHOD_NOT_ALLOWED: { "status": status.HTTP_405_METHOD_NOT_ALLOWED, "iri": "http://purl.org/net/sword/error/MethodNotAllowed", "tag": "sword:MethodNotAllowed", }, MAX_UPLOAD_SIZE_EXCEEDED: { "status": status.HTTP_413_REQUEST_ENTITY_TOO_LARGE, "iri": "http://purl.org/net/sword/error/MaxUploadSizeExceeded", "tag": "sword:MaxUploadSizeExceeded", }, } def make_error_dict(key, summary=None, verbose_description=None): """Utility function to factorize error message dictionary. Args: key (str): Error status key referenced in swh.deposit.errors module summary (str/None): Error message clarifying the status verbose_description (str/None): A more verbose description or work around a potential problem. Returns: Dictionary with key 'error' detailing the 'status' and associated 'message' """ return { "error": { "key": key, "summary": summary, "verboseDescription": verbose_description, }, } def make_error_response_from_dict(req, error): """Utility function to return an http response with error detail. Args: req (Request): original request error (dict): Error described as dict, typically generated from the make_error_dict function. Returns: HttpResponse with detailed error. """ error_information = ERRORS[error["key"]] context = error context.update(error_information) return render( req, "deposit/error.xml", context=error, content_type="application/xml", status=error_information["status"], ) def make_error_response(req, key, summary=None, verbose_description=None): """Utility function to create an http response with detailed error. Args: req (Request): original request key (str): Error status key referenced in swh.deposit.errors module summary (str): Error message clarifying the status verbose_description (str / None): A more verbose description or work around a potential problem. Returns: Dictionary with key 'error' detailing the 'status' and associated 'message' """ error = make_error_dict(key, summary, verbose_description) return make_error_response_from_dict(req, error["error"]) -def make_missing_slug_error() -> Dict[str, Any]: +def raise_missing_slug_error() -> NoReturn: """Returns a missing slug header error dict """ - return make_error_dict( + raise DepositError( BAD_REQUEST, "Missing SLUG header", verbose_description=( "Provide in the SLUG header one identifier, for example the " "url pointing to the resource you are depositing." ), ) class DepositError(ValueError): """Represents an error that should be reported to the client """ - def __init__(self, key, summary, verbose_description): + def __init__(self, key, summary, verbose_description=None): self.key = key self.summary = summary self.verbose_description = verbose_description def to_dict(self): return make_error_dict(self.key, self.summary, self.verbose_description) + + +class DepositErrorMiddleware: + """A Django middleware that catches DepositError and returns a proper + error response.""" + + # __init__ and __call__ are boilerplate to make a pass-through Django + # middleware + + def __init__(self, get_response): + self.get_response = get_response + + def __call__(self, request): + response = self.get_response(request) + return response + + def process_exception(self, request, exception): + if isinstance(exception, DepositError): + return make_error_response_from_dict(request, exception.to_dict()["error"]) + else: + return None diff --git a/swh/deposit/settings/common.py b/swh/deposit/settings/common.py index 659651e0..749f7f6b 100644 --- a/swh/deposit/settings/common.py +++ b/swh/deposit/settings/common.py @@ -1,114 +1,115 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """ Django settings for swh project. Generated by 'django-admin startproject' using Django 1.10.7. For more information on this file, see https://docs.djangoproject.com/en/1.10/topics/settings/ For the full list of settings and their values, see https://docs.djangoproject.com/en/1.10/ref/settings/ """ import os # Build paths inside the project like this: os.path.join(BASE_DIR, ...) BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) # Quick-start development settings - unsuitable for production # See https://docs.djangoproject.com/en/1.10/howto/deployment/checklist/ ALLOWED_HOSTS = ["127.0.0.1", "localhost"] # Application definition INSTALLED_APPS = [ "django.contrib.auth", "django.contrib.contenttypes", "django.contrib.staticfiles", "django.contrib.sessions", "django.contrib.messages", "django.contrib.postgres", # for JSONField, ArrayField "swh.deposit.apps.DepositConfig", ] MIDDLEWARE = [ "django.middleware.security.SecurityMiddleware", "django.contrib.sessions.middleware.SessionMiddleware", "django.middleware.common.CommonMiddleware", "django.middleware.csrf.CsrfViewMiddleware", "django.contrib.auth.middleware.AuthenticationMiddleware", "django.contrib.messages.middleware.MessageMiddleware", "django.middleware.clickjacking.XFrameOptionsMiddleware", "swh.deposit.auth.WrapBasicAuthenticationResponseMiddleware", + "swh.deposit.errors.DepositErrorMiddleware", ] ROOT_URLCONF = "swh.deposit.urls" TEMPLATES = [ { "BACKEND": "django.template.backends.django.DjangoTemplates", "DIRS": [], "APP_DIRS": True, "OPTIONS": { "context_processors": [ "django.template.context_processors.debug", "django.template.context_processors.request", "django.contrib.auth.context_processors.auth", "django.contrib.messages.context_processors.messages", ], }, }, ] # Password validation # https://docs.djangoproject.com/en/1.10/ref/settings/#auth-password-validators AUTH_PASSWORD_VALIDATORS = [ { "NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator", # noqa }, {"NAME": "django.contrib.auth.password_validation.MinimumLengthValidator",}, {"NAME": "django.contrib.auth.password_validation.CommonPasswordValidator",}, {"NAME": "django.contrib.auth.password_validation.NumericPasswordValidator",}, ] # Internationalization # https://docs.djangoproject.com/en/1.10/topics/i18n/ LANGUAGE_CODE = "en-us" TIME_ZONE = "UTC" USE_I18N = True USE_L10N = True USE_TZ = True # Static files (CSS, JavaScript, Images) # https://docs.djangoproject.com/en/1.10/howto/static-files/ STATIC_URL = "/static/" REST_FRAMEWORK = { "DEFAULT_AUTHENTICATION_CLASSES": ( "rest_framework.authentication.BasicAuthentication", ), "EXCEPTION_HANDLER": "swh.deposit.exception.custom_exception_handler", } FILE_UPLOAD_HANDLERS = [ "django.core.files.uploadhandler.MemoryFileUploadHandler", "django.core.files.uploadhandler.TemporaryFileUploadHandler", ]