diff --git a/swh/deposit/api/common.py b/swh/deposit/api/common.py --- a/swh/deposit/api/common.py +++ b/swh/deposit/api/common.py @@ -4,16 +4,19 @@ # See top-level LICENSE file for more information from abc import ABCMeta, abstractmethod +import datetime import hashlib -from typing import Sequence, Type +import json +from typing import Any, Dict, Optional, Sequence, Tuple, Type, Union -from django.http import HttpResponse +from django.http import FileResponse, HttpResponse from django.shortcuts import render from django.urls import reverse from django.utils import timezone from rest_framework import status from rest_framework.authentication import BaseAuthentication, BasicAuthentication from rest_framework.permissions import BasePermission, IsAuthenticated +from rest_framework.request import Request from rest_framework.views import APIView from swh.model import hashutil @@ -71,7 +74,7 @@ """ - def _read_headers(self, request): + def _read_headers(self, request: Request) -> Dict[str, Any]: """Read and unify the necessary headers from the request (those are not stored in the same location or not properly formatted). @@ -123,7 +126,7 @@ "metadata-relevant": metadata_relevant, } - def _compute_md5(self, filehandler): + def _compute_md5(self, filehandler) -> bytes: """Compute uploaded file's md5 sum. Args: @@ -140,31 +143,36 @@ return h.digest() def _deposit_put( - self, request, deposit_id=None, in_progress=False, external_id=None - ): + self, + request: Request, + deposit_id: Optional[int] = None, + in_progress: bool = False, + external_id: Optional[str] = None, + ) -> Deposit: """Save/Update a deposit in db. Args: - deposit_id (int): deposit identifier - in_progress (dict): The deposit's status - external_id (str): The external identifier to associate to - the deposit + request: request data + deposit_id: deposit identifier + in_progress: deposit status + external_id: external identifier to associate to the deposit Returns: The Deposit instance saved or updated. """ + complete_date: Optional[datetime.datetime] = None + deposit_parent: Optional[Deposit] = None + if in_progress is False: complete_date = timezone.now() status_type = DEPOSIT_STATUS_DEPOSITED else: - complete_date = None status_type = DEPOSIT_STATUS_PARTIAL if not deposit_id: try: - # find a deposit parent (same external id, status load - # to success) + # find a deposit parent (same external id, status load to success) deposit_parent = ( Deposit.objects.filter( external_id=external_id, status=DEPOSIT_STATUS_LOAD_SUCCESS @@ -173,8 +181,10 @@ .get() ) # noqa except Deposit.DoesNotExist: - deposit_parent = None + # then no parent for that deposit, deposit_parent already None + pass + assert external_id is not None deposit = Deposit( collection=self._collection, external_id=external_id, @@ -208,20 +218,20 @@ def _deposit_request_put( self, - deposit, - deposit_request_data, - replace_metadata=False, - replace_archives=False, - ): + deposit: Deposit, + deposit_request_data: Dict[str, Any], + replace_metadata: bool = False, + replace_archives: bool = False, + ) -> None: """Save a deposit request with metadata attached to a deposit. Args: - deposit (Deposit): The deposit concerned by the request - deposit_request_data (dict): The dictionary with at most 2 deposit - request types (archive, metadata) to associate to the deposit - replace_metadata (bool): Flag defining if we add or update + deposit: The deposit concerned by the request + deposit_request_data: The dictionary with at most 2 deposit + request types (archive, metadata) to associate to the deposit + replace_metadata: Flag defining if we add or update existing metadata to the deposit - replace_archives (bool): Flag defining if we add or update + replace_archives: Flag defining if we add or update archives to existing deposit Returns: @@ -245,7 +255,7 @@ metadata = deposit_request_data.get(METADATA_KEY) if metadata: - raw_metadata = deposit_request_data.get(RAW_METADATA_KEY) + raw_metadata = deposit_request_data[RAW_METADATA_KEY] deposit_request = DepositRequest( type=METADATA_TYPE, deposit=deposit, @@ -256,8 +266,8 @@ assert deposit_request is not None - def _delete_archives(self, collection_name, deposit_id): - """Delete archives reference from the deposit id. + def _delete_archives(self, collection_name: str, deposit_id: int) -> Dict: + """Delete archive references from the deposit id. """ try: @@ -270,12 +280,12 @@ return {} - def _delete_deposit(self, collection_name, deposit_id): + def _delete_deposit(self, collection_name: str, deposit_id: int) -> Dict: """Delete deposit reference. Args: - collection_name (str): Client's name - deposit_id (id): The deposit to delete + collection_name: Client's collection + deposit_id: The deposit to delete Returns Empty dict when ok. @@ -304,14 +314,16 @@ return {} - def _check_preconditions_on(self, filehandler, md5sum, content_length=None): + def _check_preconditions_on( + self, filehandler, md5sum: str, content_length: Optional[int] = None + ) -> Optional[Dict]: """Check preconditions on provided file are respected. That is the length and/or the md5sum hash match the file's content. Args: filehandler (InMemoryUploadedFile): The file to check - md5sum (hex str): md5 hash expected from the file's content - content_length (int): the expected length if provided. + md5sum: md5 hash expected from the file's content + content_length: the expected length if provided. Returns: Either none if no error or a dictionary with a key error @@ -347,13 +359,13 @@ def _binary_upload( self, - request, - headers, - collection_name, - deposit_id=None, - replace_metadata=False, - replace_archives=False, - ): + request: Request, + headers: Dict[str, Any], + collection_name: str, + deposit_id: Optional[int] = None, + replace_metadata: bool = False, + replace_archives: bool = False, + ) -> Dict[str, Any]: """Binary upload routine. Other than such a request, a 415 response is returned. @@ -447,7 +459,7 @@ "archive": filehandler.name, } - def _read_metadata(self, metadata_stream): + def _read_metadata(self, metadata_stream) -> Tuple[bytes, Dict[str, Any]]: """Given a metadata stream, reads the metadata and returns both the parsed and the raw metadata. @@ -458,13 +470,13 @@ def _multipart_upload( self, - request, - headers, - collection_name, - deposit_id=None, - replace_metadata=False, - replace_archives=False, - ): + request: Request, + headers: Dict[str, Any], + collection_name: str, + deposit_id: Optional[int] = None, + replace_metadata: bool = False, + replace_archives: bool = False, + ) -> Dict: """Multipart upload supported with exactly: - 1 archive (zip) - 1 atom entry @@ -474,13 +486,13 @@ Args: request (Request): the request holding information to parse and inject in db - headers (dict): request headers formatted - collection_name (str): the associated client - deposit_id (id): deposit identifier if provided - replace_metadata (bool): 'Update or add' request to existing + headers: request headers formatted + collection_name: the associated client + deposit_id: deposit identifier if provided + replace_metadata: 'Update or add' request to existing deposit. If False (default), this adds new metadata request to existing ones. Otherwise, this will replace existing metadata. - replace_archives (bool): 'Update or add' request to existing + replace_archives: 'Update or add' request to existing deposit. If False (default), this adds new archive request to existing ones. Otherwise, this will replace existing archives. ones. @@ -507,14 +519,15 @@ content_types_present = set() - data = { + data: Dict[str, Optional[Any]] = { "application/zip": None, # expected either zip "application/x-tar": None, # or x-tar "application/atom+xml": None, } for key, value in request.FILES.items(): fh = value - if fh.content_type in content_types_present: + content_type = fh.content_type + if content_type in content_types_present: return make_error_dict( ERROR_CONTENT, "Only 1 application/zip (or application/x-tar) archive " @@ -525,8 +538,9 @@ "header in the multipart deposit", ) - content_types_present.add(fh.content_type) - data[fh.content_type] = fh + content_types_present.add(content_type) + assert content_type is not None + data[content_type] = fh if len(content_types_present) != 2: return make_error_dict( @@ -576,6 +590,7 @@ deposit, deposit_request_data, replace_metadata, replace_archives ) + assert filehandler is not None return { "deposit_id": deposit.id, "deposit_date": deposit.reception_date, @@ -585,25 +600,25 @@ def _atom_entry( self, - request, - headers, - collection_name, - deposit_id=None, - replace_metadata=False, - replace_archives=False, - ): + request: Request, + headers: Dict[str, Any], + collection_name: str, + deposit_id: Optional[int] = None, + replace_metadata: bool = False, + replace_archives: bool = False, + ) -> Dict[str, Any]: """Atom entry deposit. Args: request (Request): the request holding information to parse and inject in db - headers (dict): request headers formatted - collection_name (str): the associated client - deposit_id (id): deposit identifier if provided - replace_metadata (bool): 'Update or add' request to existing + headers: request headers formatted + collection_name: the associated client + deposit_id: deposit identifier if provided + replace_metadata: 'Update or add' request to existing deposit. If False (default), this adds new metadata request to existing ones. Otherwise, this will replace existing metadata. - replace_archives (bool): 'Update or add' request to existing + replace_archives: 'Update or add' request to existing deposit. If False (default), this adds new archive request to existing ones. Otherwise, this will replace existing archives. ones. @@ -644,6 +659,10 @@ external_id = metadata.get("external_identifier", headers["slug"]) + # TODO: Determine if we are in the metadata-only deposit case. If it is, then + # save deposit and deposit request typed 'metadata' and send metadata to the + # metadata storage. Otherwise, do as existing deposit. + deposit = self._deposit_put( request, deposit_id=deposit_id, @@ -665,15 +684,17 @@ "status": deposit.status, } - def _empty_post(self, request, headers, collection_name, deposit_id): + def _empty_post( + self, request: Request, headers: Dict, collection_name: str, deposit_id: int + ) -> Dict[str, Any]: """Empty post to finalize an empty deposit. Args: - request (Request): the request holding information to parse + request: the request holding information to parse and inject in db - headers (dict): request headers formatted - collection_name (str): the associated client - deposit_id (id): deposit identifier + headers: request headers formatted + collection_name: the associated client + deposit_id: deposit identifier Returns: Dictionary of result with the deposit's id, the date @@ -692,7 +713,9 @@ "archive": None, } - def _make_iris(self, request, collection_name, deposit_id): + def _make_iris( + self, request: Request, collection_name: str, deposit_id: int + ) -> Dict[str, Any]: """Define the IRI endpoints Args: @@ -710,7 +733,13 @@ for iri in [EM_IRI, EDIT_SE_IRI, CONT_FILE_IRI, STATE_IRI] } - def additional_checks(self, request, headers, collection_name, deposit_id=None): + def additional_checks( + self, + request: Request, + headers: Dict[str, Any], + collection_name: str, + deposit_id: Optional[int] = None, + ) -> Dict[str, Any]: """Permit the child class to enrich additional checks. Returns: @@ -719,22 +748,30 @@ """ return {} - def checks(self, request, collection_name, deposit_id=None): + def checks( + self, request: Request, collection_name: str, deposit_id: Optional[int] = None + ) -> Dict[str, Any]: try: self._collection = DepositCollection.objects.get(name=collection_name) except DepositCollection.DoesNotExist: return make_error_dict( NOT_FOUND, f"Unknown collection name {collection_name}" ) + assert self._collection is not None username = request.user.username if username: # unauthenticated request can have the username empty try: - self._client = DepositClient.objects.get(username=username) + self._client: DepositClient = DepositClient.objects.get( # type: ignore + username=username + ) except DepositClient.DoesNotExist: return make_error_dict(NOT_FOUND, f"Unknown client name {username}") - if self._collection.id not in self._client.collections: + collection_id = self._collection.id + collections = self._client.collections + assert collections is not None + if collection_id not in collections: return make_error_dict( FORBIDDEN, f"Client {username} cannot access collection {collection_name}", @@ -762,7 +799,9 @@ return {"headers": headers} - def restrict_access(self, request, deposit=None): + def restrict_access( + self, request: Request, deposit: Optional[Deposit] = None + ) -> Dict[str, Any]: if deposit: if request.method != "GET" and deposit.status != DEPOSIT_STATUS_PARTIAL: summary = "You can only act on deposit with status '%s'" % ( @@ -772,24 +811,33 @@ return make_error_dict( BAD_REQUEST, summary=summary, verbose_description=description ) + return {} - def _basic_not_allowed_method(self, request, method): + def _basic_not_allowed_method(self, request: Request, method: str): return make_error_response( request, METHOD_NOT_ALLOWED, f"{method} method is not supported on this endpoint", ) - def get(self, request, *args, **kwargs): + def get( + self, request: Request, collection_name: str, deposit_id: int + ) -> Union[HttpResponse, FileResponse]: return self._basic_not_allowed_method(request, "GET") - def post(self, request, *args, **kwargs): + def post( + self, request: Request, collection_name: str, deposit_id: Optional[int] = None + ) -> HttpResponse: return self._basic_not_allowed_method(request, "POST") - def put(self, request, *args, **kwargs): + def put( + self, request: Request, collection_name: str, deposit_id: int + ) -> HttpResponse: return self._basic_not_allowed_method(request, "PUT") - def delete(self, request, *args, **kwargs): + def delete( + self, request: Request, collection_name: str, deposit_id: Optional[int] = None + ) -> HttpResponse: return self._basic_not_allowed_method(request, "DELETE") @@ -798,7 +846,9 @@ """ - def get(self, request, collection_name, deposit_id, format=None): + def get( + self, request: Request, collection_name: str, deposit_id: int + ) -> Union[HttpResponse, FileResponse]: """Endpoint to create/add resources to deposit. Returns: @@ -813,14 +863,22 @@ r = self.process_get(request, collection_name, deposit_id) - if isinstance(r, tuple): - status, content, content_type = r - return HttpResponse(content, status=status, content_type=content_type) - - return r + status, content, content_type = r + if content_type == "swh/generator": + with content as path: + return FileResponse( + open(path, "rb"), status=status, content_type="application/zip" + ) + if content_type == "application/json": + return HttpResponse( + json.dumps(content), status=status, content_type=content_type + ) + return HttpResponse(content, status=status, content_type=content_type) @abstractmethod - def process_get(self, request, collection_name, deposit_id): + def process_get( + self, request: Request, collection_name: str, deposit_id: int + ) -> Tuple[int, Any, str]: """Routine to deal with the deposit's get processing. Returns: @@ -835,7 +893,9 @@ """ - def post(self, request, collection_name, deposit_id=None, format=None): + def post( + self, request: Request, collection_name: str, deposit_id: Optional[int] = None + ) -> HttpResponse: """Endpoint to create/add resources to deposit. Returns: @@ -867,11 +927,17 @@ content_type="application/xml", status=_status, ) - response._headers["location"] = "Location", data[_iri_key] + response._headers["location"] = "Location", data[_iri_key] # type: ignore return response @abstractmethod - def process_post(self, request, headers, collection_name, deposit_id=None): + def process_post( + self, + request, + headers: Dict, + collection_name: str, + deposit_id: Optional[int] = None, + ) -> Tuple[int, str, Dict]: """Routine to deal with the deposit's processing. Returns @@ -889,7 +955,9 @@ """ - def put(self, request, collection_name, deposit_id, format=None): + def put( + self, request: Request, collection_name: str, deposit_id: int + ) -> HttpResponse: """Endpoint to update deposit resources. Returns: @@ -912,7 +980,9 @@ return HttpResponse(status=status.HTTP_204_NO_CONTENT) @abstractmethod - def process_put(self, request, headers, collection_name, deposit_id): + def process_put( + self, request: Request, headers: Dict, collection_name: str, deposit_id: int + ) -> Dict[str, Any]: """Routine to deal with updating a deposit in some way. Returns @@ -927,7 +997,9 @@ """ - def delete(self, request, collection_name, deposit_id): + def delete( + self, request: Request, collection_name: str, deposit_id: Optional[int] = None + ) -> HttpResponse: """Endpoint to delete some deposit's resources (archives, deposit). Returns: @@ -940,6 +1012,7 @@ if "error" in checks: return make_error_response_from_dict(request, checks["error"]) + assert deposit_id is not None data = self.process_delete(request, collection_name, deposit_id) error = data.get("error") if error: @@ -948,11 +1021,13 @@ return HttpResponse(status=status.HTTP_204_NO_CONTENT) @abstractmethod - def process_delete(self, request, collection_name, deposit_id): + def process_delete( + self, request: Request, collection_name: str, deposit_id: int + ) -> Dict: """Routine to delete a resource. This is mostly not allowed except for the EM_IRI (cf. .api.deposit_update.APIUpdateArchive) """ - pass + return {} diff --git a/swh/deposit/api/deposit.py b/swh/deposit/api/deposit.py --- a/swh/deposit/api/deposit.py +++ b/swh/deposit/api/deposit.py @@ -3,6 +3,8 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +from typing import Any, Dict, Optional, Tuple + from rest_framework import status from ..config import EDIT_SE_IRI @@ -32,7 +34,13 @@ SWHAtomEntryParser, ) - def additional_checks(self, req, headers, collection_name, deposit_id=None): + def additional_checks( + self, + req, + headers: Dict[str, Any], + collection_name: str, + deposit_id: Optional[int] = None, + ) -> Dict[str, Any]: slug = headers["slug"] if not slug: msg = "Missing SLUG header in request" @@ -41,7 +49,13 @@ return {} - def process_post(self, req, headers, collection_name, deposit_id=None): + def process_post( + self, + req, + headers: Dict[str, Any], + collection_name: str, + deposit_id: Optional[int] = None, + ) -> Tuple[int, str, Dict[str, Any]]: """Create a first deposit as: - archive deposit (1 zip) - multipart (1 zip + 1 atom entry) diff --git a/swh/deposit/api/deposit_content.py b/swh/deposit/api/deposit_content.py --- a/swh/deposit/api/deposit_content.py +++ b/swh/deposit/api/deposit_content.py @@ -3,6 +3,7 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +from django.http import HttpResponse from django.shortcuts import render from rest_framework import status @@ -12,7 +13,7 @@ class APIContent(APIBase): - def get(self, req, collection_name, deposit_id, format=None): + def get(self, req, collection_name: str, deposit_id: int) -> HttpResponse: checks = self.checks(req, collection_name, deposit_id) if "error" in checks: return make_error_response_from_dict(req, checks["error"]) diff --git a/swh/deposit/api/deposit_status.py b/swh/deposit/api/deposit_status.py --- a/swh/deposit/api/deposit_status.py +++ b/swh/deposit/api/deposit_status.py @@ -3,6 +3,7 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +from django.http import HttpResponse from django.shortcuts import render from rest_framework import status @@ -21,7 +22,7 @@ """ - def get(self, req, collection_name, deposit_id, format=None): + def get(self, req, collection_name: str, deposit_id: int) -> HttpResponse: checks = self.checks(req, collection_name, deposit_id) if "error" in checks: return make_error_response_from_dict(req, checks["error"]) diff --git a/swh/deposit/api/deposit_update.py b/swh/deposit/api/deposit_update.py --- a/swh/deposit/api/deposit_update.py +++ b/swh/deposit/api/deposit_update.py @@ -3,6 +3,8 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +from typing import Any, Dict, Optional, Tuple + from rest_framework import status from ..config import CONT_FILE_IRI, EDIT_SE_IRI, EM_IRI @@ -30,7 +32,9 @@ SWHFileUploadTarParser, ) - def process_put(self, req, headers, collection_name, deposit_id): + def process_put( + self, req, headers, collection_name: str, deposit_id: int + ) -> Dict[str, Any]: """Replace existing content for the existing deposit. source: http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_editingcontent_binary # noqa @@ -49,7 +53,9 @@ req, headers, collection_name, deposit_id=deposit_id, replace_archives=True ) - def process_post(self, req, headers, collection_name, deposit_id): + def process_post( + self, req, headers: Dict, collection_name: str, deposit_id: Optional[int] = None + ) -> Tuple[int, str, Dict]: """Add new content to the existing deposit. source: http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_addingcontent_mediaresource # noqa @@ -65,7 +71,8 @@ msg = "Packaging format supported is restricted to %s" % ( ", ".join(ACCEPT_ARCHIVE_CONTENT_TYPES) ) - return "unused", "unused", make_error_dict(BAD_REQUEST, msg) + unused = 0 + return unused, "unused", make_error_dict(BAD_REQUEST, msg) return ( status.HTTP_201_CREATED, @@ -73,7 +80,7 @@ self._binary_upload(req, headers, collection_name, deposit_id), ) - def process_delete(self, req, collection_name, deposit_id): + def process_delete(self, req, collection_name: str, deposit_id: int) -> Dict: """Delete content (archives) from existing deposit. source: http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_deletingcontent # noqa @@ -96,7 +103,9 @@ parser_classes = (SWHMultiPartParser, SWHAtomEntryParser) - def process_put(self, req, headers, collection_name, deposit_id): + def process_put( + self, req, headers: Dict, collection_name: str, deposit_id: int + ) -> Dict[str, Any]: """Replace existing deposit's metadata/archive with new ones. source: @@ -120,7 +129,13 @@ req, headers, collection_name, deposit_id=deposit_id, replace_metadata=True ) - def process_post(self, req, headers, collection_name, deposit_id): + def process_post( + self, + request, + headers: Dict, + collection_name: str, + deposit_id: Optional[int] = None, + ) -> Tuple[int, str, Dict]: """Add new metadata/archive to existing deposit. source: @@ -139,28 +154,29 @@ For the empty post case, this returns a 200. """ - if req.content_type.startswith("multipart/"): + assert deposit_id is not None + if request.content_type.startswith("multipart/"): return ( status.HTTP_201_CREATED, EM_IRI, self._multipart_upload( - req, headers, collection_name, deposit_id=deposit_id + request, headers, collection_name, deposit_id=deposit_id ), ) # check for final empty post # source: http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html # #continueddeposit_complete if headers["content-length"] == 0 and headers["in-progress"] is False: - data = self._empty_post(req, headers, collection_name, deposit_id) + data = self._empty_post(request, headers, collection_name, deposit_id) return (status.HTTP_200_OK, EDIT_SE_IRI, data) return ( status.HTTP_201_CREATED, EM_IRI, - self._atom_entry(req, headers, collection_name, deposit_id=deposit_id), + self._atom_entry(request, headers, collection_name, deposit_id=deposit_id), ) - def process_delete(self, req, collection_name, deposit_id): + def process_delete(self, req, collection_name: str, deposit_id: int) -> Dict: """Delete the container (deposit). source: http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_deleteconteiner # noqa diff --git a/swh/deposit/api/private/__init__.py b/swh/deposit/api/private/__init__.py --- a/swh/deposit/api/private/__init__.py +++ b/swh/deposit/api/private/__init__.py @@ -86,23 +86,11 @@ return {"headers": headers} def get( - self, - request, - collection_name=None, - deposit_id=None, - format=None, - *args, - **kwargs, + self, request, collection_name=None, deposit_id=None, *args, **kwargs, ): - return super().get(request, collection_name, deposit_id, format) + return super().get(request, collection_name, deposit_id) def put( - self, - request, - collection_name=None, - deposit_id=None, - format=None, - *args, - **kwargs, + self, request, collection_name=None, deposit_id=None, *args, **kwargs, ): - return super().put(request, collection_name, deposit_id, format) + return super().put(request, collection_name, deposit_id) diff --git a/swh/deposit/api/private/deposit_check.py b/swh/deposit/api/private/deposit_check.py --- a/swh/deposit/api/private/deposit_check.py +++ b/swh/deposit/api/private/deposit_check.py @@ -4,10 +4,10 @@ # See top-level LICENSE file for more information from itertools import chain -import json import re from shutil import get_unpack_formats import tarfile +from typing import Dict, Optional, Tuple import zipfile from rest_framework import status @@ -16,7 +16,7 @@ from . import APIPrivateView, DepositReadMixin from ...config import ARCHIVE_TYPE, DEPOSIT_STATUS_REJECTED, DEPOSIT_STATUS_VERIFIED -from ...models import Deposit +from ...models import Deposit, DepositRequest from ..common import APIGet MANDATORY_FIELDS_MISSING = "Mandatory fields are missing" @@ -60,7 +60,7 @@ """ - def _check_deposit_archives(self, deposit): + def _check_deposit_archives(self, deposit: Deposit) -> Tuple[bool, Optional[Dict]]: """Given a deposit, check each deposit request of type archive. Args: @@ -87,7 +87,9 @@ return True, None return False, {"archive": errors} - def _check_archive(self, archive_request): + def _check_archive( + self, archive_request: DepositRequest + ) -> Tuple[bool, Optional[str]]: """Check that a deposit associated archive is ok: - readable - supported archive format @@ -111,11 +113,11 @@ try: if zipfile.is_zipfile(archive_path): - with zipfile.ZipFile(archive_path) as f: - files = f.namelist() + with zipfile.ZipFile(archive_path) as zipfile_: + files = zipfile_.namelist() elif tarfile.is_tarfile(archive_path): - with tarfile.open(archive_path) as f: - files = f.getnames() + with tarfile.open(archive_path) as tarfile_: + files = tarfile_.getnames() else: return False, MANDATORY_ARCHIVE_UNSUPPORTED except Exception: @@ -128,7 +130,7 @@ return False, MANDATORY_ARCHIVE_INVALID return True, None - def _check_metadata(self, metadata): + def _check_metadata(self, metadata: Dict) -> Tuple[bool, Optional[Dict]]: """Check to execute on all metadata for mandatory field presence. Args: @@ -174,7 +176,9 @@ ) return False, {"metadata": detail} - def process_get(self, req, collection_name, deposit_id): + def process_get( + self, req, collection_name: str, deposit_id: int + ) -> Tuple[int, Dict, str]: """Build a unique tarball from the multiple received and stream that content to the client. @@ -189,15 +193,17 @@ """ deposit = Deposit.objects.get(pk=deposit_id) metadata = self._metadata_get(deposit) - problems = {} + problems: Dict = {} # will check each deposit's associated request (both of type # archive and metadata) for errors archives_status, error_detail = self._check_deposit_archives(deposit) if not archives_status: + assert error_detail is not None problems.update(error_detail) metadata_status, error_detail = self._check_metadata(metadata) if not metadata_status: + assert error_detail is not None problems.update(error_detail) deposit_status = archives_status and metadata_status @@ -225,4 +231,4 @@ deposit.save() - return status.HTTP_200_OK, json.dumps(response), "application/json" + return status.HTTP_200_OK, response, "application/json" diff --git a/swh/deposit/api/private/deposit_read.py b/swh/deposit/api/private/deposit_read.py --- a/swh/deposit/api/private/deposit_read.py +++ b/swh/deposit/api/private/deposit_read.py @@ -4,12 +4,11 @@ # See top-level LICENSE file for more information from contextlib import contextmanager -import json import os import shutil import tempfile +from typing import Any, Dict, Tuple -from django.http import FileResponse from rest_framework import status from swh.core import tarball @@ -74,14 +73,16 @@ if not os.path.exists(self.extraction_dir): os.makedirs(self.extraction_dir) - def process_get(self, request, collection_name, deposit_id): + def process_get( + self, request, collection_name: str, deposit_id: int + ) -> Tuple[int, Any, str]: """Build a unique tarball from the multiple received and stream that content to the client. Args: request (Request): - collection_name (str): Collection owning the deposit - deposit_id (id): Deposit concerned by the reading + collection_name: Collection owning the deposit + deposit_id: Deposit concerned by the reading Returns: Tuple status, stream of content, content-type @@ -91,12 +92,11 @@ r.archive.path for r in self._deposit_requests(deposit_id, request_type=ARCHIVE_TYPE) ] - with aggregate_tarballs(self.extraction_dir, archive_paths) as path: - return FileResponse( - open(path, "rb"), - status=status.HTTP_200_OK, - content_type="application/zip", - ) + return ( + status.HTTP_200_OK, + aggregate_tarballs(self.extraction_dir, archive_paths), + "swh/generator", + ) class APIReadMetadata(APIPrivateView, APIGet, DepositReadMixin): @@ -187,11 +187,9 @@ return data - def process_get(self, request, collection_name, deposit_id): + def process_get( + self, request, collection_name: str, deposit_id: int + ) -> Tuple[int, Dict, str]: deposit = Deposit.objects.get(pk=deposit_id) data = self.metadata_read(deposit) - d = {} - if data: - d = json.dumps(data) - - return status.HTTP_200_OK, d, "application/json" + return status.HTTP_200_OK, data if data else {}, "application/json" diff --git a/swh/deposit/api/private/deposit_update_status.py b/swh/deposit/api/private/deposit_update_status.py --- a/swh/deposit/api/private/deposit_update_status.py +++ b/swh/deposit/api/private/deposit_update_status.py @@ -3,6 +3,8 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +from typing import Dict + from rest_framework.parsers import JSONParser from swh.model.identifiers import DIRECTORY, REVISION, SNAPSHOT, swhid @@ -61,7 +63,9 @@ return {} - def process_put(self, request, headers, collection_name, deposit_id): + def process_put( + self, request, headers: Dict, collection_name: str, deposit_id: int + ) -> Dict: """Update the deposit with status and SWHIDs Returns: