diff --git a/swh/deposit/api/common.py b/swh/deposit/api/common.py index 014955b3..bebd1572 100644 --- a/swh/deposit/api/common.py +++ b/swh/deposit/api/common.py @@ -1,963 +1,965 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import hashlib -from typing import Any, Tuple +from typing import Sequence, Type from abc import ABCMeta, abstractmethod from django.urls import reverse from django.http import HttpResponse from django.shortcuts import render from django.utils import timezone from rest_framework import status -from rest_framework.authentication import BasicAuthentication -from rest_framework.permissions import IsAuthenticated +from rest_framework.authentication import BaseAuthentication, BasicAuthentication +from rest_framework.permissions import BasePermission, IsAuthenticated from rest_framework.views import APIView from swh.model import hashutil from swh.scheduler.utils import create_oneshot_task_dict from ..config import ( SWHDefaultConfig, EDIT_SE_IRI, EM_IRI, CONT_FILE_IRI, ARCHIVE_KEY, METADATA_KEY, RAW_METADATA_KEY, STATE_IRI, DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_PARTIAL, PRIVATE_CHECK_DEPOSIT, DEPOSIT_STATUS_LOAD_SUCCESS, ARCHIVE_TYPE, METADATA_TYPE, ) from ..errors import ( MAX_UPLOAD_SIZE_EXCEEDED, BAD_REQUEST, ERROR_CONTENT, CHECKSUM_MISMATCH, make_error_dict, MEDIATION_NOT_ALLOWED, make_error_response_from_dict, FORBIDDEN, NOT_FOUND, make_error_response, METHOD_NOT_ALLOWED, ParserError, PARSING_ERROR, ) from ..models import Deposit, DepositRequest, DepositCollection, DepositClient from ..parsers import parse_xml ACCEPT_PACKAGINGS = ["http://purl.org/net/sword/package/SimpleZip"] ACCEPT_ARCHIVE_CONTENT_TYPES = ["application/zip", "application/x-tar"] class SWHAPIView(APIView): """Mixin intended as a based API view to enforce the basic authentication check """ - authentication_classes = (BasicAuthentication,) # type: Tuple[Any, ...] - permission_classes = (IsAuthenticated,) + authentication_classes: Sequence[Type[BaseAuthentication]] = (BasicAuthentication,) + permission_classes: Sequence[Type[BasePermission]] = (IsAuthenticated,) class SWHBaseDeposit(SWHDefaultConfig, SWHAPIView, metaclass=ABCMeta): """Base deposit request class sharing multiple common behaviors. """ - def _read_headers(self, req): + def _read_headers(self, request): """Read and unify the necessary headers from the request (those are not stored in the same location or not properly formatted). Args: - req (Request): Input request + request (Request): Input request Returns: Dictionary with the following keys (some associated values may be None): - content-type - content-length - in-progress - content-disposition - packaging - slug - on-behalf-of """ - meta = req._request.META - content_type = req.content_type + meta = request._request.META + content_type = request.content_type content_length = meta.get("CONTENT_LENGTH") if content_length and isinstance(content_length, str): content_length = int(content_length) # final deposit if not provided in_progress = meta.get("HTTP_IN_PROGRESS", False) content_disposition = meta.get("HTTP_CONTENT_DISPOSITION") if isinstance(in_progress, str): in_progress = in_progress.lower() == "true" content_md5sum = meta.get("HTTP_CONTENT_MD5") if content_md5sum: content_md5sum = bytes.fromhex(content_md5sum) packaging = meta.get("HTTP_PACKAGING") slug = meta.get("HTTP_SLUG") on_behalf_of = meta.get("HTTP_ON_BEHALF_OF") metadata_relevant = meta.get("HTTP_METADATA_RELEVANT") return { "content-type": content_type, "content-length": content_length, "in-progress": in_progress, "content-disposition": content_disposition, "content-md5sum": content_md5sum, "packaging": packaging, "slug": slug, "on-behalf-of": on_behalf_of, "metadata-relevant": metadata_relevant, } def _compute_md5(self, filehandler): """Compute uploaded file's md5 sum. Args: filehandler (InMemoryUploadedFile): the file to compute the md5 hash Returns: the md5 checksum (str) """ h = hashlib.md5() for chunk in filehandler: h.update(chunk) return h.digest() - def _deposit_put(self, req, deposit_id=None, in_progress=False, external_id=None): + def _deposit_put( + self, request, deposit_id=None, in_progress=False, external_id=None + ): """Save/Update a deposit in db. Args: deposit_id (int): deposit identifier in_progress (dict): The deposit's status external_id (str): The external identifier to associate to the deposit Returns: The Deposit instance saved or updated. """ if in_progress is False: complete_date = timezone.now() status_type = DEPOSIT_STATUS_DEPOSITED else: complete_date = None status_type = DEPOSIT_STATUS_PARTIAL if not deposit_id: try: # find a deposit parent (same external id, status load # to success) deposit_parent = ( Deposit.objects.filter( external_id=external_id, status=DEPOSIT_STATUS_LOAD_SUCCESS ) .order_by("-id")[0:1] .get() ) # noqa except Deposit.DoesNotExist: deposit_parent = None deposit = Deposit( collection=self._collection, external_id=external_id, complete_date=complete_date, status=status_type, client=self._client, parent=deposit_parent, ) else: deposit = Deposit.objects.get(pk=deposit_id) # update metadata deposit.complete_date = complete_date deposit.status = status_type if self.config["checks"]: deposit.save() # needed to have a deposit id args = [deposit.collection.name, deposit.id] scheduler = self.scheduler if deposit.status == DEPOSIT_STATUS_DEPOSITED and not deposit.check_task_id: - check_url = req.build_absolute_uri( + check_url = request.build_absolute_uri( reverse(PRIVATE_CHECK_DEPOSIT, args=args) ) task = create_oneshot_task_dict( "check-deposit", deposit_check_url=check_url ) check_task_id = scheduler.create_tasks([task])[0]["id"] deposit.check_task_id = check_task_id deposit.save() return deposit def _deposit_request_put( self, deposit, deposit_request_data, replace_metadata=False, replace_archives=False, ): """Save a deposit request with metadata attached to a deposit. Args: deposit (Deposit): The deposit concerned by the request deposit_request_data (dict): The dictionary with at most 2 deposit request types (archive, metadata) to associate to the deposit replace_metadata (bool): Flag defining if we add or update existing metadata to the deposit replace_archives (bool): Flag defining if we add or update archives to existing deposit Returns: None """ if replace_metadata: DepositRequest.objects.filter(deposit=deposit, type=METADATA_TYPE).delete() if replace_archives: DepositRequest.objects.filter(deposit=deposit, type=ARCHIVE_TYPE).delete() deposit_request = None archive_file = deposit_request_data.get(ARCHIVE_KEY) if archive_file: deposit_request = DepositRequest( type=ARCHIVE_TYPE, deposit=deposit, archive=archive_file ) deposit_request.save() metadata = deposit_request_data.get(METADATA_KEY) if metadata: raw_metadata = deposit_request_data.get(RAW_METADATA_KEY) deposit_request = DepositRequest( type=METADATA_TYPE, deposit=deposit, metadata=metadata, raw_metadata=raw_metadata.decode("utf-8"), ) deposit_request.save() assert deposit_request is not None def _delete_archives(self, collection_name, deposit_id): """Delete archives reference from the deposit id. """ try: deposit = Deposit.objects.get(pk=deposit_id) except Deposit.DoesNotExist: return make_error_dict( NOT_FOUND, "The deposit %s does not exist" % deposit_id ) DepositRequest.objects.filter(deposit=deposit, type=ARCHIVE_TYPE).delete() return {} def _delete_deposit(self, collection_name, deposit_id): """Delete deposit reference. Args: collection_name (str): Client's name deposit_id (id): The deposit to delete Returns Empty dict when ok. Dict with error key to describe the failure. """ try: deposit = Deposit.objects.get(pk=deposit_id) except Deposit.DoesNotExist: return make_error_dict( NOT_FOUND, "The deposit %s does not exist" % deposit_id ) if deposit.collection.name != collection_name: summary = "Cannot delete a deposit from another collection" description = "Deposit %s does not belong to the collection %s" % ( deposit_id, collection_name, ) return make_error_dict( BAD_REQUEST, summary=summary, verbose_description=description ) DepositRequest.objects.filter(deposit=deposit).delete() deposit.delete() return {} def _check_preconditions_on(self, filehandler, md5sum, content_length=None): """Check preconditions on provided file are respected. That is the length and/or the md5sum hash match the file's content. Args: filehandler (InMemoryUploadedFile): The file to check md5sum (hex str): md5 hash expected from the file's content content_length (int): the expected length if provided. Returns: Either none if no error or a dictionary with a key error detailing the problem. """ if content_length: if content_length > self.config["max_upload_size"]: return make_error_dict( MAX_UPLOAD_SIZE_EXCEEDED, "Upload size limit exceeded (max %s bytes)." % self.config["max_upload_size"], "Please consider sending the archive in " "multiple steps.", ) length = filehandler.size if length != content_length: return make_error_dict( status.HTTP_412_PRECONDITION_FAILED, "Wrong length" ) if md5sum: _md5sum = self._compute_md5(filehandler) if _md5sum != md5sum: return make_error_dict( CHECKSUM_MISMATCH, "Wrong md5 hash", "The checksum sent %s and the actual checksum " "%s does not match." % (hashutil.hash_to_hex(md5sum), hashutil.hash_to_hex(_md5sum)), ) return None def _binary_upload( self, - req, + request, headers, collection_name, deposit_id=None, replace_metadata=False, replace_archives=False, ): """Binary upload routine. Other than such a request, a 415 response is returned. Args: - req (Request): the request holding information to parse + request (Request): the request holding information to parse and inject in db headers (dict): request headers formatted collection_name (str): the associated client deposit_id (id): deposit identifier if provided replace_metadata (bool): 'Update or add' request to existing deposit. If False (default), this adds new metadata request to existing ones. Otherwise, this will replace existing metadata. replace_archives (bool): 'Update or add' request to existing deposit. If False (default), this adds new archive request to existing ones. Otherwise, this will replace existing archives. ones. Returns: In the optimal case a dict with the following keys: - deposit_id (int): Deposit identifier - deposit_date (date): Deposit date - archive: None (no archive is provided here) Otherwise, a dictionary with the key error and the associated failures, either: - 400 (bad request) if the request is not providing an external identifier - 413 (request entity too large) if the length of the archive exceeds the max size configured - 412 (precondition failed) if the length or md5 hash provided mismatch the reality of the archive - 415 (unsupported media type) if a wrong media type is provided """ content_length = headers["content-length"] if not content_length: return make_error_dict( BAD_REQUEST, "CONTENT_LENGTH header is mandatory", "For archive deposit, the " "CONTENT_LENGTH header must be sent.", ) content_disposition = headers["content-disposition"] if not content_disposition: return make_error_dict( BAD_REQUEST, "CONTENT_DISPOSITION header is mandatory", "For archive deposit, the " "CONTENT_DISPOSITION header must be sent.", ) packaging = headers["packaging"] if packaging and packaging not in ACCEPT_PACKAGINGS: return make_error_dict( BAD_REQUEST, "Only packaging %s is supported" % ACCEPT_PACKAGINGS, "The packaging provided %s is not supported" % packaging, ) - filehandler = req.FILES["file"] + filehandler = request.FILES["file"] precondition_status_response = self._check_preconditions_on( filehandler, headers["content-md5sum"], content_length ) if precondition_status_response: return precondition_status_response external_id = headers["slug"] # actual storage of data archive_metadata = filehandler deposit = self._deposit_put( - req, + request, deposit_id=deposit_id, in_progress=headers["in-progress"], external_id=external_id, ) self._deposit_request_put( deposit, {ARCHIVE_KEY: archive_metadata}, replace_metadata=replace_metadata, replace_archives=replace_archives, ) return { "deposit_id": deposit.id, "deposit_date": deposit.reception_date, "status": deposit.status, "archive": filehandler.name, } def _read_metadata(self, metadata_stream): """Given a metadata stream, reads the metadata and returns both the parsed and the raw metadata. """ raw_metadata = metadata_stream.read() metadata = parse_xml(raw_metadata) return raw_metadata, metadata def _multipart_upload( self, - req, + request, headers, collection_name, deposit_id=None, replace_metadata=False, replace_archives=False, ): """Multipart upload supported with exactly: - 1 archive (zip) - 1 atom entry Other than such a request, a 415 response is returned. Args: - req (Request): the request holding information to parse + request (Request): the request holding information to parse and inject in db headers (dict): request headers formatted collection_name (str): the associated client deposit_id (id): deposit identifier if provided replace_metadata (bool): 'Update or add' request to existing deposit. If False (default), this adds new metadata request to existing ones. Otherwise, this will replace existing metadata. replace_archives (bool): 'Update or add' request to existing deposit. If False (default), this adds new archive request to existing ones. Otherwise, this will replace existing archives. ones. Returns: In the optimal case a dict with the following keys: - deposit_id (int): Deposit identifier - deposit_date (date): Deposit date - archive: None (no archive is provided here) Otherwise, a dictionary with the key error and the associated failures, either: - 400 (bad request) if the request is not providing an external identifier - 412 (precondition failed) if the potentially md5 hash provided mismatch the reality of the archive - 413 (request entity too large) if the length of the archive exceeds the max size configured - 415 (unsupported media type) if a wrong media type is provided """ external_id = headers["slug"] content_types_present = set() data = { "application/zip": None, # expected either zip "application/x-tar": None, # or x-tar "application/atom+xml": None, } - for key, value in req.FILES.items(): + for key, value in request.FILES.items(): fh = value if fh.content_type in content_types_present: return make_error_dict( ERROR_CONTENT, "Only 1 application/zip (or application/x-tar) archive " "and 1 atom+xml entry is supported (as per sword2.0 " "specification)", "You provided more than 1 application/(zip|x-tar) " "or more than 1 application/atom+xml content-disposition " "header in the multipart deposit", ) content_types_present.add(fh.content_type) data[fh.content_type] = fh if len(content_types_present) != 2: return make_error_dict( ERROR_CONTENT, "You must provide both 1 application/zip (or " "application/x-tar) and 1 atom+xml entry for multipart " "deposit", "You need to provide only 1 application/(zip|x-tar) " "and 1 application/atom+xml content-disposition header " "in the multipart deposit", ) filehandler = data["application/zip"] if not filehandler: filehandler = data["application/x-tar"] precondition_status_response = self._check_preconditions_on( filehandler, headers["content-md5sum"] ) if precondition_status_response: return precondition_status_response try: raw_metadata, metadata = self._read_metadata(data["application/atom+xml"]) except ParserError: return make_error_dict( PARSING_ERROR, "Malformed xml metadata", "The xml received is malformed. " "Please ensure your metadata file is correctly formatted.", ) # actual storage of data deposit = self._deposit_put( - req, + request, deposit_id=deposit_id, in_progress=headers["in-progress"], external_id=external_id, ) deposit_request_data = { ARCHIVE_KEY: filehandler, METADATA_KEY: metadata, RAW_METADATA_KEY: raw_metadata, } self._deposit_request_put( deposit, deposit_request_data, replace_metadata, replace_archives ) return { "deposit_id": deposit.id, "deposit_date": deposit.reception_date, "archive": filehandler.name, "status": deposit.status, } def _atom_entry( self, - req, + request, headers, collection_name, deposit_id=None, replace_metadata=False, replace_archives=False, ): """Atom entry deposit. Args: - req (Request): the request holding information to parse + request (Request): the request holding information to parse and inject in db headers (dict): request headers formatted collection_name (str): the associated client deposit_id (id): deposit identifier if provided replace_metadata (bool): 'Update or add' request to existing deposit. If False (default), this adds new metadata request to existing ones. Otherwise, this will replace existing metadata. replace_archives (bool): 'Update or add' request to existing deposit. If False (default), this adds new archive request to existing ones. Otherwise, this will replace existing archives. ones. Returns: In the optimal case a dict with the following keys: - deposit_id: deposit id associated to the deposit - deposit_date: date of the deposit - archive: None (no archive is provided here) Otherwise, a dictionary with the key error and the associated failures, either: - 400 (bad request) if the request is not providing an external identifier - 400 (bad request) if the request's body is empty - 415 (unsupported media type) if a wrong media type is provided """ try: - raw_metadata, metadata = self._read_metadata(req.data) + raw_metadata, metadata = self._read_metadata(request.data) except ParserError: return make_error_dict( BAD_REQUEST, "Malformed xml metadata", "The xml received is malformed. " "Please ensure your metadata file is correctly formatted.", ) if not metadata: return make_error_dict( BAD_REQUEST, "Empty body request is not supported", "Atom entry deposit is supposed to send for metadata. " "If the body is empty, there is no metadata.", ) external_id = metadata.get("external_identifier", headers["slug"]) deposit = self._deposit_put( - req, + request, deposit_id=deposit_id, in_progress=headers["in-progress"], external_id=external_id, ) self._deposit_request_put( deposit, {METADATA_KEY: metadata, RAW_METADATA_KEY: raw_metadata}, replace_metadata, replace_archives, ) return { "deposit_id": deposit.id, "deposit_date": deposit.reception_date, "archive": None, "status": deposit.status, } - def _empty_post(self, req, headers, collection_name, deposit_id): + def _empty_post(self, request, headers, collection_name, deposit_id): """Empty post to finalize an empty deposit. Args: - req (Request): the request holding information to parse + request (Request): the request holding information to parse and inject in db headers (dict): request headers formatted collection_name (str): the associated client deposit_id (id): deposit identifier Returns: Dictionary of result with the deposit's id, the date it was completed and no archive. """ deposit = Deposit.objects.get(pk=deposit_id) deposit.complete_date = timezone.now() deposit.status = DEPOSIT_STATUS_DEPOSITED deposit.save() return { "deposit_id": deposit_id, "deposit_date": deposit.complete_date, "status": deposit.status, "archive": None, } - def _make_iris(self, req, collection_name, deposit_id): + def _make_iris(self, request, collection_name, deposit_id): """Define the IRI endpoints Args: - req (Request): The initial request + request (Request): The initial request collection_name (str): client/collection's name deposit_id (id): Deposit identifier Returns: Dictionary of keys with the iris' urls. """ args = [collection_name, deposit_id] return { - iri: req.build_absolute_uri(reverse(iri, args=args)) + iri: request.build_absolute_uri(reverse(iri, args=args)) for iri in [EM_IRI, EDIT_SE_IRI, CONT_FILE_IRI, STATE_IRI] } - def additional_checks(self, req, headers, collection_name, deposit_id=None): + def additional_checks(self, request, headers, collection_name, deposit_id=None): """Permit the child class to enrich additional checks. Returns: dict with 'error' detailing the problem. """ return {} - def checks(self, req, collection_name, deposit_id=None): + def checks(self, request, collection_name, deposit_id=None): try: self._collection = DepositCollection.objects.get(name=collection_name) except DepositCollection.DoesNotExist: return make_error_dict( NOT_FOUND, "Unknown collection name %s" % collection_name ) - username = req.user.username + username = request.user.username if username: # unauthenticated request can have the username empty try: self._client = DepositClient.objects.get(username=username) except DepositClient.DoesNotExist: return make_error_dict(NOT_FOUND, "Unknown client name %s" % username) if self._collection.id not in self._client.collections: return make_error_dict( FORBIDDEN, "Client %s cannot access collection %s" % (username, collection_name), ) if deposit_id: try: deposit = Deposit.objects.get(pk=deposit_id) except Deposit.DoesNotExist: return make_error_dict( NOT_FOUND, "Deposit with id %s does not exist" % deposit_id ) - checks = self.restrict_access(req, deposit) + checks = self.restrict_access(request, deposit) if checks: return checks - headers = self._read_headers(req) + headers = self._read_headers(request) if headers["on-behalf-of"]: return make_error_dict(MEDIATION_NOT_ALLOWED, "Mediation is not supported.") - checks = self.additional_checks(req, headers, collection_name, deposit_id) + checks = self.additional_checks(request, headers, collection_name, deposit_id) if "error" in checks: return checks return {"headers": headers} - def restrict_access(self, req, deposit=None): + def restrict_access(self, request, deposit=None): if deposit: - if req.method != "GET" and deposit.status != DEPOSIT_STATUS_PARTIAL: + if request.method != "GET" and deposit.status != DEPOSIT_STATUS_PARTIAL: summary = "You can only act on deposit with status '%s'" % ( DEPOSIT_STATUS_PARTIAL, ) description = "This deposit has status '%s'" % deposit.status return make_error_dict( BAD_REQUEST, summary=summary, verbose_description=description ) - def _basic_not_allowed_method(self, req, method): + def _basic_not_allowed_method(self, request, method): return make_error_response( - req, + request, METHOD_NOT_ALLOWED, "%s method is not supported on this endpoint" % method, ) - def get(self, req, *args, **kwargs): - return self._basic_not_allowed_method(req, "GET") + def get(self, request, *args, **kwargs): + return self._basic_not_allowed_method(request, "GET") - def post(self, req, *args, **kwargs): - return self._basic_not_allowed_method(req, "POST") + def post(self, request, *args, **kwargs): + return self._basic_not_allowed_method(request, "POST") - def put(self, req, *args, **kwargs): - return self._basic_not_allowed_method(req, "PUT") + def put(self, request, *args, **kwargs): + return self._basic_not_allowed_method(request, "PUT") - def delete(self, req, *args, **kwargs): - return self._basic_not_allowed_method(req, "DELETE") + def delete(self, request, *args, **kwargs): + return self._basic_not_allowed_method(request, "DELETE") class SWHGetDepositAPI(SWHBaseDeposit, metaclass=ABCMeta): """Mixin for class to support GET method. """ - def get(self, req, collection_name, deposit_id, format=None): + def get(self, request, collection_name, deposit_id, format=None): """Endpoint to create/add resources to deposit. Returns: 200 response when no error during routine occurred 400 if the deposit does not belong to the collection 404 if the deposit or the collection does not exist """ - checks = self.checks(req, collection_name, deposit_id) + checks = self.checks(request, collection_name, deposit_id) if "error" in checks: - return make_error_response_from_dict(req, checks["error"]) + return make_error_response_from_dict(request, checks["error"]) - r = self.process_get(req, collection_name, deposit_id) + r = self.process_get(request, collection_name, deposit_id) if isinstance(r, tuple): status, content, content_type = r return HttpResponse(content, status=status, content_type=content_type) return r @abstractmethod - def process_get(self, req, collection_name, deposit_id): + def process_get(self, request, collection_name, deposit_id): """Routine to deal with the deposit's get processing. Returns: Tuple status, stream of content, content-type """ pass class SWHPostDepositAPI(SWHBaseDeposit, metaclass=ABCMeta): """Mixin for class to support DELETE method. """ - def post(self, req, collection_name, deposit_id=None, format=None): + def post(self, request, collection_name, deposit_id=None, format=None): """Endpoint to create/add resources to deposit. Returns: 204 response when no error during routine occurred. 400 if the deposit does not belong to the collection 404 if the deposit or the collection does not exist """ - checks = self.checks(req, collection_name, deposit_id) + checks = self.checks(request, collection_name, deposit_id) if "error" in checks: - return make_error_response_from_dict(req, checks["error"]) + return make_error_response_from_dict(request, checks["error"]) headers = checks["headers"] _status, _iri_key, data = self.process_post( - req, headers, collection_name, deposit_id + request, headers, collection_name, deposit_id ) error = data.get("error") if error: - return make_error_response_from_dict(req, error) + return make_error_response_from_dict(request, error) data["packagings"] = ACCEPT_PACKAGINGS - iris = self._make_iris(req, collection_name, data["deposit_id"]) + iris = self._make_iris(request, collection_name, data["deposit_id"]) data.update(iris) response = render( - req, + request, "deposit/deposit_receipt.xml", context=data, content_type="application/xml", status=_status, ) response._headers["location"] = "Location", data[_iri_key] return response @abstractmethod - def process_post(self, req, headers, collection_name, deposit_id=None): + def process_post(self, request, headers, collection_name, deposit_id=None): """Routine to deal with the deposit's processing. Returns Tuple of: - response status code (200, 201, etc...) - key iri (EM_IRI, EDIT_SE_IRI, etc...) - dictionary of the processing result """ pass class SWHPutDepositAPI(SWHBaseDeposit, metaclass=ABCMeta): """Mixin for class to support PUT method. """ - def put(self, req, collection_name, deposit_id, format=None): + def put(self, request, collection_name, deposit_id, format=None): """Endpoint to update deposit resources. Returns: 204 response when no error during routine occurred. 400 if the deposit does not belong to the collection 404 if the deposit or the collection does not exist """ - checks = self.checks(req, collection_name, deposit_id) + checks = self.checks(request, collection_name, deposit_id) if "error" in checks: - return make_error_response_from_dict(req, checks["error"]) + return make_error_response_from_dict(request, checks["error"]) headers = checks["headers"] - data = self.process_put(req, headers, collection_name, deposit_id) + data = self.process_put(request, headers, collection_name, deposit_id) error = data.get("error") if error: - return make_error_response_from_dict(req, error) + return make_error_response_from_dict(request, error) return HttpResponse(status=status.HTTP_204_NO_CONTENT) @abstractmethod - def process_put(self, req, headers, collection_name, deposit_id): + def process_put(self, request, headers, collection_name, deposit_id): """Routine to deal with updating a deposit in some way. Returns dictionary of the processing result """ pass class SWHDeleteDepositAPI(SWHBaseDeposit, metaclass=ABCMeta): """Mixin for class to support DELETE method. """ - def delete(self, req, collection_name, deposit_id): + def delete(self, request, collection_name, deposit_id): """Endpoint to delete some deposit's resources (archives, deposit). Returns: 204 response when no error during routine occurred. 400 if the deposit does not belong to the collection 404 if the deposit or the collection does not exist """ - checks = self.checks(req, collection_name, deposit_id) + checks = self.checks(request, collection_name, deposit_id) if "error" in checks: - return make_error_response_from_dict(req, checks["error"]) + return make_error_response_from_dict(request, checks["error"]) - data = self.process_delete(req, collection_name, deposit_id) + data = self.process_delete(request, collection_name, deposit_id) error = data.get("error") if error: - return make_error_response_from_dict(req, error) + return make_error_response_from_dict(request, error) return HttpResponse(status=status.HTTP_204_NO_CONTENT) @abstractmethod - def process_delete(self, req, collection_name, deposit_id): + def process_delete(self, request, collection_name, deposit_id): """Routine to delete a resource. This is mostly not allowed except for the EM_IRI (cf. .api.deposit_update.SWHUpdateArchiveDeposit) """ pass diff --git a/swh/deposit/api/private/__init__.py b/swh/deposit/api/private/__init__.py index 4b1e1dd3..db3e2f5a 100644 --- a/swh/deposit/api/private/__init__.py +++ b/swh/deposit/api/private/__init__.py @@ -1,97 +1,109 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.deposit import utils from ...config import METADATA_TYPE, SWHDefaultConfig from ...models import DepositRequest, Deposit from rest_framework.permissions import AllowAny from swh.deposit.api.common import SWHAPIView from swh.deposit.errors import make_error_dict, NOT_FOUND class DepositReadMixin: """Deposit Read mixin """ def _deposit_requests(self, deposit, request_type): """Given a deposit, yields its associated deposit_request Args: deposit (Deposit): Deposit to list requests for request_type (str): 'archive' or 'metadata' Yields: deposit requests of type request_type associated to the deposit """ if isinstance(deposit, int): deposit = Deposit.objects.get(pk=deposit) deposit_requests = DepositRequest.objects.filter( type=request_type, deposit=deposit ).order_by("id") for deposit_request in deposit_requests: yield deposit_request def _metadata_get(self, deposit): """Given a deposit, aggregate all metadata requests. Args: deposit (Deposit): The deposit instance to extract metadata from. Returns: metadata dict from the deposit. """ metadata = ( m.metadata for m in self._deposit_requests(deposit, request_type=METADATA_TYPE) ) return utils.merge(*metadata) class SWHPrivateAPIView(SWHDefaultConfig, SWHAPIView): """Mixin intended as private api (so no authentication) based API view (for the private ones). """ authentication_classes = () permission_classes = (AllowAny,) def checks(self, req, collection_name, deposit_id=None): """Override default checks implementation to allow empty collection. """ if deposit_id: try: Deposit.objects.get(pk=deposit_id) except Deposit.DoesNotExist: return make_error_dict( NOT_FOUND, "Deposit with id %s does not exist" % deposit_id ) headers = self._read_headers(req) checks = self.additional_checks(req, headers, collection_name, deposit_id) if "error" in checks: return checks return {"headers": headers} def get( - self, req, collection_name=None, deposit_id=None, format=None, *args, **kwargs + self, + request, + collection_name=None, + deposit_id=None, + format=None, + *args, + **kwargs, ): - return super().get(req, collection_name, deposit_id, format) + return super().get(request, collection_name, deposit_id, format) def put( - self, req, collection_name=None, deposit_id=None, format=None, *args, **kwargs + self, + request, + collection_name=None, + deposit_id=None, + format=None, + *args, + **kwargs, ): - return super().put(req, collection_name, deposit_id, format) + return super().put(request, collection_name, deposit_id, format) diff --git a/swh/deposit/api/private/deposit_read.py b/swh/deposit/api/private/deposit_read.py index 50cb6231..6dd79e07 100644 --- a/swh/deposit/api/private/deposit_read.py +++ b/swh/deposit/api/private/deposit_read.py @@ -1,231 +1,231 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json import os import shutil import tempfile from contextlib import contextmanager from django.http import FileResponse from rest_framework import status from swh.core import tarball from swh.model import identifiers from swh.deposit.utils import normalize_date from . import DepositReadMixin, SWHPrivateAPIView from ...config import SWH_PERSON, ARCHIVE_TYPE from ..common import SWHGetDepositAPI from ...models import Deposit @contextmanager def aggregate_tarballs(extraction_dir, archive_paths): """Aggregate multiple tarballs into one and returns this new archive's path. Args: extraction_dir (path): Path to use for the tarballs computation archive_paths ([str]): Deposit's archive paths Returns: Tuple (directory to clean up, archive path (aggregated or not)) """ # rebuild one zip archive from (possibly) multiple ones os.makedirs(extraction_dir, 0o755, exist_ok=True) dir_path = tempfile.mkdtemp(prefix="swh.deposit-", dir=extraction_dir) # root folder to build an aggregated tarball aggregated_tarball_rootdir = os.path.join(dir_path, "aggregate") os.makedirs(aggregated_tarball_rootdir, 0o755, exist_ok=True) # uncompress in a temporary location all archives for archive_path in archive_paths: tarball.uncompress(archive_path, aggregated_tarball_rootdir) # Aggregate into one big tarball the multiple smaller ones temp_tarpath = shutil.make_archive( aggregated_tarball_rootdir, "zip", aggregated_tarball_rootdir ) # can already clean up temporary directory shutil.rmtree(aggregated_tarball_rootdir) try: yield temp_tarpath finally: shutil.rmtree(dir_path) class SWHDepositReadArchives(SWHPrivateAPIView, SWHGetDepositAPI, DepositReadMixin): """Dedicated class to read a deposit's raw archives content. Only GET is supported. """ ADDITIONAL_CONFIG = { "extraction_dir": ("str", "/tmp/swh-deposit/archive/"), } def __init__(self): super().__init__() self.extraction_dir = self.config["extraction_dir"] if not os.path.exists(self.extraction_dir): os.makedirs(self.extraction_dir) - def process_get(self, req, collection_name, deposit_id): + def process_get(self, request, collection_name, deposit_id): """Build a unique tarball from the multiple received and stream that content to the client. Args: - req (Request): + request (Request): collection_name (str): Collection owning the deposit deposit_id (id): Deposit concerned by the reading Returns: Tuple status, stream of content, content-type """ archive_paths = [ r.archive.path for r in self._deposit_requests(deposit_id, request_type=ARCHIVE_TYPE) ] with aggregate_tarballs(self.extraction_dir, archive_paths) as path: return FileResponse( open(path, "rb"), status=status.HTTP_200_OK, content_type="application/zip", ) class SWHDepositReadMetadata(SWHPrivateAPIView, SWHGetDepositAPI, DepositReadMixin): """Class in charge of aggregating metadata on a deposit. """ ADDITIONAL_CONFIG = { "provider": ( "dict", { # 'provider_name': '', # those are not set since read from the # 'provider_url': '', # deposit's client "provider_type": "deposit_client", "metadata": {}, }, ), "tool": ( "dict", { "name": "swh-deposit", "version": "0.0.1", "configuration": {"sword_version": "2"}, }, ), } def __init__(self): super().__init__() self.provider = self.config["provider"] self.tool = self.config["tool"] def _normalize_dates(self, deposit, metadata): """Normalize the date to use as a tuple of author date, committer date from the incoming metadata. Args: deposit (Deposit): Deposit model representation metadata (Dict): Metadata dict representation Returns: Tuple of author date, committer date. Those dates are swh normalized. """ commit_date = metadata.get("codemeta:datePublished") author_date = metadata.get("codemeta:dateCreated") if author_date and commit_date: pass elif commit_date: author_date = commit_date elif author_date: commit_date = author_date else: author_date = deposit.complete_date commit_date = deposit.complete_date return (normalize_date(author_date), normalize_date(commit_date)) def metadata_read(self, deposit): """Read and aggregate multiple data on deposit into one unified data dictionary. Args: deposit (Deposit): Deposit concerned by the data aggregation. Returns: Dictionary of data representing the deposit to inject in swh. """ metadata = self._metadata_get(deposit) # Read information metadata data = {"origin": {"type": "deposit", "url": deposit.origin_url,}} # revision fullname = deposit.client.username author_committer = SWH_PERSON # metadata provider self.provider["provider_name"] = deposit.client.last_name self.provider["provider_url"] = deposit.client.provider_url revision_type = "tar" revision_msg = "%s: Deposit %s in collection %s" % ( fullname, deposit.id, deposit.collection.name, ) author_date, commit_date = self._normalize_dates(deposit, metadata) data["revision"] = { "synthetic": True, "date": author_date, "committer_date": commit_date, "author": author_committer, "committer": author_committer, "type": revision_type, "message": revision_msg, "metadata": metadata, } if deposit.parent: swh_persistent_id = deposit.parent.swh_id persistent_identifier = identifiers.parse_persistent_identifier( swh_persistent_id ) parent_revision = persistent_identifier.object_id data["revision"]["parents"] = [parent_revision] data["branch_name"] = "master" data["origin_metadata"] = { "provider": self.provider, "tool": self.tool, "metadata": metadata, } return data - def process_get(self, req, collection_name, deposit_id): + def process_get(self, request, collection_name, deposit_id): deposit = Deposit.objects.get(pk=deposit_id) data = self.metadata_read(deposit) d = {} if data: d = json.dumps(data) return status.HTTP_200_OK, d, "application/json" diff --git a/swh/deposit/api/private/deposit_update_status.py b/swh/deposit/api/private/deposit_update_status.py index 8cb0b234..87d94f70 100644 --- a/swh/deposit/api/private/deposit_update_status.py +++ b/swh/deposit/api/private/deposit_update_status.py @@ -1,82 +1,82 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from rest_framework.parsers import JSONParser from swh.model.identifiers import persistent_identifier, REVISION, DIRECTORY from . import SWHPrivateAPIView from ..common import SWHPutDepositAPI from ...errors import make_error_dict, BAD_REQUEST from ...models import Deposit, DEPOSIT_STATUS_DETAIL from ...models import DEPOSIT_STATUS_LOAD_SUCCESS class SWHUpdateStatusDeposit(SWHPrivateAPIView, SWHPutDepositAPI): """Deposit request class to update the deposit's status. HTTP verbs supported: PUT """ parser_classes = (JSONParser,) - def additional_checks(self, req, headers, collection_name, deposit_id=None): + def additional_checks(self, request, headers, collection_name, deposit_id=None): """Enrich existing checks to the default ones. New checks: - Ensure the status is provided - Ensure it exists """ - data = req.data + data = request.data status = data.get("status") if not status: msg = "The status key is mandatory with possible values %s" % list( DEPOSIT_STATUS_DETAIL.keys() ) return make_error_dict(BAD_REQUEST, msg) if status not in DEPOSIT_STATUS_DETAIL: msg = "Possible status in %s" % list(DEPOSIT_STATUS_DETAIL.keys()) return make_error_dict(BAD_REQUEST, msg) if status == DEPOSIT_STATUS_LOAD_SUCCESS: swh_id = data.get("revision_id") if not swh_id: msg = "Updating status to %s requires a revision_id key" % (status,) return make_error_dict(BAD_REQUEST, msg) return {} - def process_put(self, req, headers, collection_name, deposit_id): + def process_put(self, request, headers, collection_name, deposit_id): """Update the deposit's status Returns: 204 No content """ deposit = Deposit.objects.get(pk=deposit_id) - deposit.status = req.data["status"] # checks already done before + deposit.status = request.data["status"] # checks already done before - origin_url = req.data.get("origin_url") + origin_url = request.data.get("origin_url") - dir_id = req.data.get("directory_id") + dir_id = request.data.get("directory_id") if dir_id: deposit.swh_id = persistent_identifier(DIRECTORY, dir_id) deposit.swh_id_context = persistent_identifier( DIRECTORY, dir_id, metadata={"origin": origin_url} ) - rev_id = req.data.get("revision_id") + rev_id = request.data.get("revision_id") if rev_id: deposit.swh_anchor_id = persistent_identifier(REVISION, rev_id) deposit.swh_anchor_id_context = persistent_identifier( REVISION, rev_id, metadata={"origin": origin_url} ) deposit.save() return {} diff --git a/tox.ini b/tox.ini index 192894e2..7e81cc5c 100644 --- a/tox.ini +++ b/tox.ini @@ -1,44 +1,45 @@ [tox] envlist=flake8,mypy,py3-django{1,2} [testenv] extras = testing deps = # the dependency below is needed for now as a workaround for # https://github.com/pypa/pip/issues/6239 swh.core[http] >= 0.0.75 dev: ipdb pytest-cov django1: Django>=1.11,<2 django2: Django>=2,<3 commands = pytest \ !dev: --cov {envsitepackagesdir}/swh/deposit --cov-branch \ {envsitepackagesdir}/swh/deposit \ {posargs} [testenv:black] skip_install = true deps = black commands = {envpython} -m black --check swh [testenv:flake8] skip_install = true deps = flake8 commands = {envpython} -m flake8 \ --exclude=.tox,.git,__pycache__,.tox,.eggs,*.egg,swh/deposit/migrations [testenv:mypy] setenv = DJANGO_SETTINGS_MODULE=swh.deposit.settings.testing extras = testing deps = mypy django-stubs + djangorestframework-stubs commands = mypy swh