Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9697254
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
80 KB
Subscribers
None
View Options
diff --git a/swh/deposit/api/common.py b/swh/deposit/api/common.py
index 6f696078..6bed49c5 100644
--- a/swh/deposit/api/common.py
+++ b/swh/deposit/api/common.py
@@ -1,958 +1,1033 @@
# Copyright (C) 2017-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from abc import ABCMeta, abstractmethod
+import datetime
import hashlib
-from typing import Sequence, Type
+import json
+from typing import Any, Dict, Optional, Sequence, Tuple, Type, Union
-from django.http import HttpResponse
+from django.http import FileResponse, HttpResponse
from django.shortcuts import render
from django.urls import reverse
from django.utils import timezone
from rest_framework import status
from rest_framework.authentication import BaseAuthentication, BasicAuthentication
from rest_framework.permissions import BasePermission, IsAuthenticated
+from rest_framework.request import Request
from rest_framework.views import APIView
from swh.model import hashutil
from swh.scheduler.utils import create_oneshot_task_dict
from ..config import (
ARCHIVE_KEY,
ARCHIVE_TYPE,
CONT_FILE_IRI,
DEPOSIT_STATUS_DEPOSITED,
DEPOSIT_STATUS_LOAD_SUCCESS,
DEPOSIT_STATUS_PARTIAL,
EDIT_SE_IRI,
EM_IRI,
METADATA_KEY,
METADATA_TYPE,
RAW_METADATA_KEY,
STATE_IRI,
APIConfig,
)
from ..errors import (
BAD_REQUEST,
CHECKSUM_MISMATCH,
ERROR_CONTENT,
FORBIDDEN,
MAX_UPLOAD_SIZE_EXCEEDED,
MEDIATION_NOT_ALLOWED,
METHOD_NOT_ALLOWED,
NOT_FOUND,
PARSING_ERROR,
ParserError,
make_error_dict,
make_error_response,
make_error_response_from_dict,
)
from ..models import Deposit, DepositClient, DepositCollection, DepositRequest
from ..parsers import parse_xml
ACCEPT_PACKAGINGS = ["http://purl.org/net/sword/package/SimpleZip"]
ACCEPT_ARCHIVE_CONTENT_TYPES = ["application/zip", "application/x-tar"]
class AuthenticatedAPIView(APIView):
"""Mixin intended as a based API view to enforce the basic
authentication check
"""
authentication_classes: Sequence[Type[BaseAuthentication]] = (BasicAuthentication,)
permission_classes: Sequence[Type[BasePermission]] = (IsAuthenticated,)
class APIBase(APIConfig, AuthenticatedAPIView, metaclass=ABCMeta):
"""Base deposit request class sharing multiple common behaviors.
"""
- def _read_headers(self, request):
+ def _read_headers(self, request: Request) -> Dict[str, Any]:
"""Read and unify the necessary headers from the request (those are
not stored in the same location or not properly formatted).
Args:
request (Request): Input request
Returns:
Dictionary with the following keys (some associated values may be
None):
- content-type
- content-length
- in-progress
- content-disposition
- packaging
- slug
- on-behalf-of
"""
meta = request._request.META
content_type = request.content_type
content_length = meta.get("CONTENT_LENGTH")
if content_length and isinstance(content_length, str):
content_length = int(content_length)
# final deposit if not provided
in_progress = meta.get("HTTP_IN_PROGRESS", False)
content_disposition = meta.get("HTTP_CONTENT_DISPOSITION")
if isinstance(in_progress, str):
in_progress = in_progress.lower() == "true"
content_md5sum = meta.get("HTTP_CONTENT_MD5")
if content_md5sum:
content_md5sum = bytes.fromhex(content_md5sum)
packaging = meta.get("HTTP_PACKAGING")
slug = meta.get("HTTP_SLUG")
on_behalf_of = meta.get("HTTP_ON_BEHALF_OF")
metadata_relevant = meta.get("HTTP_METADATA_RELEVANT")
return {
"content-type": content_type,
"content-length": content_length,
"in-progress": in_progress,
"content-disposition": content_disposition,
"content-md5sum": content_md5sum,
"packaging": packaging,
"slug": slug,
"on-behalf-of": on_behalf_of,
"metadata-relevant": metadata_relevant,
}
- def _compute_md5(self, filehandler):
+ def _compute_md5(self, filehandler) -> bytes:
"""Compute uploaded file's md5 sum.
Args:
filehandler (InMemoryUploadedFile): the file to compute the md5
hash
Returns:
the md5 checksum (str)
"""
h = hashlib.md5()
for chunk in filehandler:
h.update(chunk)
return h.digest()
def _deposit_put(
- self, request, deposit_id=None, in_progress=False, external_id=None
- ):
+ self,
+ request: Request,
+ deposit_id: Optional[int] = None,
+ in_progress: bool = False,
+ external_id: Optional[str] = None,
+ ) -> Deposit:
"""Save/Update a deposit in db.
Args:
- deposit_id (int): deposit identifier
- in_progress (dict): The deposit's status
- external_id (str): The external identifier to associate to
- the deposit
+ request: request data
+ deposit_id: deposit identifier
+ in_progress: deposit status
+ external_id: external identifier to associate to the deposit
Returns:
The Deposit instance saved or updated.
"""
+ complete_date: Optional[datetime.datetime] = None
+ deposit_parent: Optional[Deposit] = None
+
if in_progress is False:
complete_date = timezone.now()
status_type = DEPOSIT_STATUS_DEPOSITED
else:
- complete_date = None
status_type = DEPOSIT_STATUS_PARTIAL
if not deposit_id:
try:
- # find a deposit parent (same external id, status load
- # to success)
+ # find a deposit parent (same external id, status load to success)
deposit_parent = (
Deposit.objects.filter(
external_id=external_id, status=DEPOSIT_STATUS_LOAD_SUCCESS
)
.order_by("-id")[0:1]
.get()
) # noqa
except Deposit.DoesNotExist:
- deposit_parent = None
+ # then no parent for that deposit, deposit_parent already None
+ pass
+ assert external_id is not None
deposit = Deposit(
collection=self._collection,
external_id=external_id,
complete_date=complete_date,
status=status_type,
client=self._client,
parent=deposit_parent,
)
else:
deposit = Deposit.objects.get(pk=deposit_id)
# update metadata
deposit.complete_date = complete_date
deposit.status = status_type
if self.config["checks"]:
deposit.save() # needed to have a deposit id
scheduler = self.scheduler
if deposit.status == DEPOSIT_STATUS_DEPOSITED and not deposit.check_task_id:
task = create_oneshot_task_dict(
"check-deposit",
collection=deposit.collection.name,
deposit_id=deposit.id,
)
check_task_id = scheduler.create_tasks([task])[0]["id"]
deposit.check_task_id = check_task_id
deposit.save()
return deposit
def _deposit_request_put(
self,
- deposit,
- deposit_request_data,
- replace_metadata=False,
- replace_archives=False,
- ):
+ deposit: Deposit,
+ deposit_request_data: Dict[str, Any],
+ replace_metadata: bool = False,
+ replace_archives: bool = False,
+ ) -> None:
"""Save a deposit request with metadata attached to a deposit.
Args:
- deposit (Deposit): The deposit concerned by the request
- deposit_request_data (dict): The dictionary with at most 2 deposit
- request types (archive, metadata) to associate to the deposit
- replace_metadata (bool): Flag defining if we add or update
+ deposit: The deposit concerned by the request
+ deposit_request_data: The dictionary with at most 2 deposit
+ request types (archive, metadata) to associate to the deposit
+ replace_metadata: Flag defining if we add or update
existing metadata to the deposit
- replace_archives (bool): Flag defining if we add or update
+ replace_archives: Flag defining if we add or update
archives to existing deposit
Returns:
None
"""
if replace_metadata:
DepositRequest.objects.filter(deposit=deposit, type=METADATA_TYPE).delete()
if replace_archives:
DepositRequest.objects.filter(deposit=deposit, type=ARCHIVE_TYPE).delete()
deposit_request = None
archive_file = deposit_request_data.get(ARCHIVE_KEY)
if archive_file:
deposit_request = DepositRequest(
type=ARCHIVE_TYPE, deposit=deposit, archive=archive_file
)
deposit_request.save()
metadata = deposit_request_data.get(METADATA_KEY)
if metadata:
- raw_metadata = deposit_request_data.get(RAW_METADATA_KEY)
+ raw_metadata = deposit_request_data[RAW_METADATA_KEY]
deposit_request = DepositRequest(
type=METADATA_TYPE,
deposit=deposit,
metadata=metadata,
raw_metadata=raw_metadata.decode("utf-8"),
)
deposit_request.save()
assert deposit_request is not None
- def _delete_archives(self, collection_name, deposit_id):
- """Delete archives reference from the deposit id.
+ def _delete_archives(self, collection_name: str, deposit_id: int) -> Dict:
+ """Delete archive references from the deposit id.
"""
try:
deposit = Deposit.objects.get(pk=deposit_id)
except Deposit.DoesNotExist:
return make_error_dict(
NOT_FOUND, f"The deposit {deposit_id} does not exist"
)
DepositRequest.objects.filter(deposit=deposit, type=ARCHIVE_TYPE).delete()
return {}
- def _delete_deposit(self, collection_name, deposit_id):
+ def _delete_deposit(self, collection_name: str, deposit_id: int) -> Dict:
"""Delete deposit reference.
Args:
- collection_name (str): Client's name
- deposit_id (id): The deposit to delete
+ collection_name: Client's collection
+ deposit_id: The deposit to delete
Returns
Empty dict when ok.
Dict with error key to describe the failure.
"""
try:
deposit = Deposit.objects.get(pk=deposit_id)
except Deposit.DoesNotExist:
return make_error_dict(
NOT_FOUND, f"The deposit {deposit_id} does not exist"
)
if deposit.collection.name != collection_name:
summary = "Cannot delete a deposit from another collection"
description = "Deposit %s does not belong to the collection %s" % (
deposit_id,
collection_name,
)
return make_error_dict(
BAD_REQUEST, summary=summary, verbose_description=description
)
DepositRequest.objects.filter(deposit=deposit).delete()
deposit.delete()
return {}
- def _check_preconditions_on(self, filehandler, md5sum, content_length=None):
+ def _check_preconditions_on(
+ self, filehandler, md5sum: str, content_length: Optional[int] = None
+ ) -> Optional[Dict]:
"""Check preconditions on provided file are respected. That is the
length and/or the md5sum hash match the file's content.
Args:
filehandler (InMemoryUploadedFile): The file to check
- md5sum (hex str): md5 hash expected from the file's content
- content_length (int): the expected length if provided.
+ md5sum: md5 hash expected from the file's content
+ content_length: the expected length if provided.
Returns:
Either none if no error or a dictionary with a key error
detailing the problem.
"""
max_upload_size = self.config["max_upload_size"]
if content_length:
if content_length > max_upload_size:
return make_error_dict(
MAX_UPLOAD_SIZE_EXCEEDED,
f"Upload size limit exceeded (max {max_upload_size} bytes)."
"Please consider sending the archive in multiple steps.",
)
length = filehandler.size
if length != content_length:
return make_error_dict(
status.HTTP_412_PRECONDITION_FAILED, "Wrong length"
)
if md5sum:
_md5sum = self._compute_md5(filehandler)
if _md5sum != md5sum:
return make_error_dict(
CHECKSUM_MISMATCH,
"Wrong md5 hash",
f"The checksum sent {hashutil.hash_to_hex(md5sum)} and the actual "
f"checksum {hashutil.hash_to_hex(_md5sum)} does not match.",
)
return None
def _binary_upload(
self,
- request,
- headers,
- collection_name,
- deposit_id=None,
- replace_metadata=False,
- replace_archives=False,
- ):
+ request: Request,
+ headers: Dict[str, Any],
+ collection_name: str,
+ deposit_id: Optional[int] = None,
+ replace_metadata: bool = False,
+ replace_archives: bool = False,
+ ) -> Dict[str, Any]:
"""Binary upload routine.
Other than such a request, a 415 response is returned.
Args:
request (Request): the request holding information to parse
and inject in db
headers (dict): request headers formatted
collection_name (str): the associated client
deposit_id (id): deposit identifier if provided
replace_metadata (bool): 'Update or add' request to existing
deposit. If False (default), this adds new metadata request to
existing ones. Otherwise, this will replace existing metadata.
replace_archives (bool): 'Update or add' request to existing
deposit. If False (default), this adds new archive request to
existing ones. Otherwise, this will replace existing archives.
ones.
Returns:
In the optimal case a dict with the following keys:
- deposit_id (int): Deposit identifier
- deposit_date (date): Deposit date
- archive: None (no archive is provided here)
Otherwise, a dictionary with the key error and the
associated failures, either:
- 400 (bad request) if the request is not providing an external
identifier
- 413 (request entity too large) if the length of the
archive exceeds the max size configured
- 412 (precondition failed) if the length or md5 hash provided
mismatch the reality of the archive
- 415 (unsupported media type) if a wrong media type is provided
"""
content_length = headers["content-length"]
if not content_length:
return make_error_dict(
BAD_REQUEST,
"CONTENT_LENGTH header is mandatory",
"For archive deposit, the CONTENT_LENGTH header must be sent.",
)
content_disposition = headers["content-disposition"]
if not content_disposition:
return make_error_dict(
BAD_REQUEST,
"CONTENT_DISPOSITION header is mandatory",
"For archive deposit, the CONTENT_DISPOSITION header must be sent.",
)
packaging = headers["packaging"]
if packaging and packaging not in ACCEPT_PACKAGINGS:
return make_error_dict(
BAD_REQUEST,
f"Only packaging {ACCEPT_PACKAGINGS} is supported",
f"The packaging provided {packaging} is not supported",
)
filehandler = request.FILES["file"]
precondition_status_response = self._check_preconditions_on(
filehandler, headers["content-md5sum"], content_length
)
if precondition_status_response:
return precondition_status_response
external_id = headers["slug"]
# actual storage of data
archive_metadata = filehandler
deposit = self._deposit_put(
request,
deposit_id=deposit_id,
in_progress=headers["in-progress"],
external_id=external_id,
)
self._deposit_request_put(
deposit,
{ARCHIVE_KEY: archive_metadata},
replace_metadata=replace_metadata,
replace_archives=replace_archives,
)
return {
"deposit_id": deposit.id,
"deposit_date": deposit.reception_date,
"status": deposit.status,
"archive": filehandler.name,
}
- def _read_metadata(self, metadata_stream):
+ def _read_metadata(self, metadata_stream) -> Tuple[bytes, Dict[str, Any]]:
"""Given a metadata stream, reads the metadata and returns both the
parsed and the raw metadata.
"""
raw_metadata = metadata_stream.read()
metadata = parse_xml(raw_metadata)
return raw_metadata, metadata
def _multipart_upload(
self,
- request,
- headers,
- collection_name,
- deposit_id=None,
- replace_metadata=False,
- replace_archives=False,
- ):
+ request: Request,
+ headers: Dict[str, Any],
+ collection_name: str,
+ deposit_id: Optional[int] = None,
+ replace_metadata: bool = False,
+ replace_archives: bool = False,
+ ) -> Dict:
"""Multipart upload supported with exactly:
- 1 archive (zip)
- 1 atom entry
Other than such a request, a 415 response is returned.
Args:
request (Request): the request holding information to parse
and inject in db
- headers (dict): request headers formatted
- collection_name (str): the associated client
- deposit_id (id): deposit identifier if provided
- replace_metadata (bool): 'Update or add' request to existing
+ headers: request headers formatted
+ collection_name: the associated client
+ deposit_id: deposit identifier if provided
+ replace_metadata: 'Update or add' request to existing
deposit. If False (default), this adds new metadata request to
existing ones. Otherwise, this will replace existing metadata.
- replace_archives (bool): 'Update or add' request to existing
+ replace_archives: 'Update or add' request to existing
deposit. If False (default), this adds new archive request to
existing ones. Otherwise, this will replace existing archives.
ones.
Returns:
In the optimal case a dict with the following keys:
- deposit_id (int): Deposit identifier
- deposit_date (date): Deposit date
- archive: None (no archive is provided here)
Otherwise, a dictionary with the key error and the
associated failures, either:
- 400 (bad request) if the request is not providing an external
identifier
- 412 (precondition failed) if the potentially md5 hash provided
mismatch the reality of the archive
- 413 (request entity too large) if the length of the
archive exceeds the max size configured
- 415 (unsupported media type) if a wrong media type is provided
"""
external_id = headers["slug"]
content_types_present = set()
- data = {
+ data: Dict[str, Optional[Any]] = {
"application/zip": None, # expected either zip
"application/x-tar": None, # or x-tar
"application/atom+xml": None,
}
for key, value in request.FILES.items():
fh = value
- if fh.content_type in content_types_present:
+ content_type = fh.content_type
+ if content_type in content_types_present:
return make_error_dict(
ERROR_CONTENT,
"Only 1 application/zip (or application/x-tar) archive "
"and 1 atom+xml entry is supported (as per sword2.0 "
"specification)",
"You provided more than 1 application/(zip|x-tar) "
"or more than 1 application/atom+xml content-disposition "
"header in the multipart deposit",
)
- content_types_present.add(fh.content_type)
- data[fh.content_type] = fh
+ content_types_present.add(content_type)
+ assert content_type is not None
+ data[content_type] = fh
if len(content_types_present) != 2:
return make_error_dict(
ERROR_CONTENT,
"You must provide both 1 application/zip (or "
"application/x-tar) and 1 atom+xml entry for multipart "
"deposit",
"You need to provide only 1 application/(zip|x-tar) "
"and 1 application/atom+xml content-disposition header "
"in the multipart deposit",
)
filehandler = data["application/zip"]
if not filehandler:
filehandler = data["application/x-tar"]
precondition_status_response = self._check_preconditions_on(
filehandler, headers["content-md5sum"]
)
if precondition_status_response:
return precondition_status_response
try:
raw_metadata, metadata = self._read_metadata(data["application/atom+xml"])
except ParserError:
return make_error_dict(
PARSING_ERROR,
"Malformed xml metadata",
"The xml received is malformed. "
"Please ensure your metadata file is correctly formatted.",
)
# actual storage of data
deposit = self._deposit_put(
request,
deposit_id=deposit_id,
in_progress=headers["in-progress"],
external_id=external_id,
)
deposit_request_data = {
ARCHIVE_KEY: filehandler,
METADATA_KEY: metadata,
RAW_METADATA_KEY: raw_metadata,
}
self._deposit_request_put(
deposit, deposit_request_data, replace_metadata, replace_archives
)
+ assert filehandler is not None
return {
"deposit_id": deposit.id,
"deposit_date": deposit.reception_date,
"archive": filehandler.name,
"status": deposit.status,
}
def _atom_entry(
self,
- request,
- headers,
- collection_name,
- deposit_id=None,
- replace_metadata=False,
- replace_archives=False,
- ):
+ request: Request,
+ headers: Dict[str, Any],
+ collection_name: str,
+ deposit_id: Optional[int] = None,
+ replace_metadata: bool = False,
+ replace_archives: bool = False,
+ ) -> Dict[str, Any]:
"""Atom entry deposit.
Args:
request (Request): the request holding information to parse
and inject in db
- headers (dict): request headers formatted
- collection_name (str): the associated client
- deposit_id (id): deposit identifier if provided
- replace_metadata (bool): 'Update or add' request to existing
+ headers: request headers formatted
+ collection_name: the associated client
+ deposit_id: deposit identifier if provided
+ replace_metadata: 'Update or add' request to existing
deposit. If False (default), this adds new metadata request to
existing ones. Otherwise, this will replace existing metadata.
- replace_archives (bool): 'Update or add' request to existing
+ replace_archives: 'Update or add' request to existing
deposit. If False (default), this adds new archive request to
existing ones. Otherwise, this will replace existing archives.
ones.
Returns:
In the optimal case a dict with the following keys:
- deposit_id: deposit id associated to the deposit
- deposit_date: date of the deposit
- archive: None (no archive is provided here)
Otherwise, a dictionary with the key error and the
associated failures, either:
- 400 (bad request) if the request is not providing an external
identifier
- 400 (bad request) if the request's body is empty
- 415 (unsupported media type) if a wrong media type is provided
"""
try:
raw_metadata, metadata = self._read_metadata(request.data)
except ParserError:
return make_error_dict(
BAD_REQUEST,
"Malformed xml metadata",
"The xml received is malformed. "
"Please ensure your metadata file is correctly formatted.",
)
if not metadata:
return make_error_dict(
BAD_REQUEST,
"Empty body request is not supported",
"Atom entry deposit is supposed to send for metadata. "
"If the body is empty, there is no metadata.",
)
external_id = metadata.get("external_identifier", headers["slug"])
+ # TODO: Determine if we are in the metadata-only deposit case. If it is, then
+ # save deposit and deposit request typed 'metadata' and send metadata to the
+ # metadata storage. Otherwise, do as existing deposit.
+
deposit = self._deposit_put(
request,
deposit_id=deposit_id,
in_progress=headers["in-progress"],
external_id=external_id,
)
self._deposit_request_put(
deposit,
{METADATA_KEY: metadata, RAW_METADATA_KEY: raw_metadata},
replace_metadata,
replace_archives,
)
return {
"deposit_id": deposit.id,
"deposit_date": deposit.reception_date,
"archive": None,
"status": deposit.status,
}
- def _empty_post(self, request, headers, collection_name, deposit_id):
+ def _empty_post(
+ self, request: Request, headers: Dict, collection_name: str, deposit_id: int
+ ) -> Dict[str, Any]:
"""Empty post to finalize an empty deposit.
Args:
- request (Request): the request holding information to parse
+ request: the request holding information to parse
and inject in db
- headers (dict): request headers formatted
- collection_name (str): the associated client
- deposit_id (id): deposit identifier
+ headers: request headers formatted
+ collection_name: the associated client
+ deposit_id: deposit identifier
Returns:
Dictionary of result with the deposit's id, the date
it was completed and no archive.
"""
deposit = Deposit.objects.get(pk=deposit_id)
deposit.complete_date = timezone.now()
deposit.status = DEPOSIT_STATUS_DEPOSITED
deposit.save()
return {
"deposit_id": deposit_id,
"deposit_date": deposit.complete_date,
"status": deposit.status,
"archive": None,
}
- def _make_iris(self, request, collection_name, deposit_id):
+ def _make_iris(
+ self, request: Request, collection_name: str, deposit_id: int
+ ) -> Dict[str, Any]:
"""Define the IRI endpoints
Args:
request (Request): The initial request
collection_name (str): client/collection's name
deposit_id (id): Deposit identifier
Returns:
Dictionary of keys with the iris' urls.
"""
args = [collection_name, deposit_id]
return {
iri: request.build_absolute_uri(reverse(iri, args=args))
for iri in [EM_IRI, EDIT_SE_IRI, CONT_FILE_IRI, STATE_IRI]
}
- def additional_checks(self, request, headers, collection_name, deposit_id=None):
+ def additional_checks(
+ self,
+ request: Request,
+ headers: Dict[str, Any],
+ collection_name: str,
+ deposit_id: Optional[int] = None,
+ ) -> Dict[str, Any]:
"""Permit the child class to enrich additional checks.
Returns:
dict with 'error' detailing the problem.
"""
return {}
- def checks(self, request, collection_name, deposit_id=None):
+ def checks(
+ self, request: Request, collection_name: str, deposit_id: Optional[int] = None
+ ) -> Dict[str, Any]:
try:
self._collection = DepositCollection.objects.get(name=collection_name)
except DepositCollection.DoesNotExist:
return make_error_dict(
NOT_FOUND, f"Unknown collection name {collection_name}"
)
+ assert self._collection is not None
username = request.user.username
if username: # unauthenticated request can have the username empty
try:
- self._client = DepositClient.objects.get(username=username)
+ self._client: DepositClient = DepositClient.objects.get( # type: ignore
+ username=username
+ )
except DepositClient.DoesNotExist:
return make_error_dict(NOT_FOUND, f"Unknown client name {username}")
- if self._collection.id not in self._client.collections:
+ collection_id = self._collection.id
+ collections = self._client.collections
+ assert collections is not None
+ if collection_id not in collections:
return make_error_dict(
FORBIDDEN,
f"Client {username} cannot access collection {collection_name}",
)
if deposit_id:
try:
deposit = Deposit.objects.get(pk=deposit_id)
except Deposit.DoesNotExist:
return make_error_dict(
NOT_FOUND, f"Deposit with id {deposit_id} does not exist"
)
checks = self.restrict_access(request, deposit)
if checks:
return checks
headers = self._read_headers(request)
if headers["on-behalf-of"]:
return make_error_dict(MEDIATION_NOT_ALLOWED, "Mediation is not supported.")
checks = self.additional_checks(request, headers, collection_name, deposit_id)
if "error" in checks:
return checks
return {"headers": headers}
- def restrict_access(self, request, deposit=None):
+ def restrict_access(
+ self, request: Request, deposit: Optional[Deposit] = None
+ ) -> Dict[str, Any]:
if deposit:
if request.method != "GET" and deposit.status != DEPOSIT_STATUS_PARTIAL:
summary = "You can only act on deposit with status '%s'" % (
DEPOSIT_STATUS_PARTIAL,
)
description = f"This deposit has status '{deposit.status}'"
return make_error_dict(
BAD_REQUEST, summary=summary, verbose_description=description
)
+ return {}
- def _basic_not_allowed_method(self, request, method):
+ def _basic_not_allowed_method(self, request: Request, method: str):
return make_error_response(
request,
METHOD_NOT_ALLOWED,
f"{method} method is not supported on this endpoint",
)
- def get(self, request, *args, **kwargs):
+ def get(
+ self, request: Request, collection_name: str, deposit_id: int
+ ) -> Union[HttpResponse, FileResponse]:
return self._basic_not_allowed_method(request, "GET")
- def post(self, request, *args, **kwargs):
+ def post(
+ self, request: Request, collection_name: str, deposit_id: Optional[int] = None
+ ) -> HttpResponse:
return self._basic_not_allowed_method(request, "POST")
- def put(self, request, *args, **kwargs):
+ def put(
+ self, request: Request, collection_name: str, deposit_id: int
+ ) -> HttpResponse:
return self._basic_not_allowed_method(request, "PUT")
- def delete(self, request, *args, **kwargs):
+ def delete(
+ self, request: Request, collection_name: str, deposit_id: Optional[int] = None
+ ) -> HttpResponse:
return self._basic_not_allowed_method(request, "DELETE")
class APIGet(APIBase, metaclass=ABCMeta):
"""Mixin for class to support GET method.
"""
- def get(self, request, collection_name, deposit_id, format=None):
+ def get(
+ self, request: Request, collection_name: str, deposit_id: int
+ ) -> Union[HttpResponse, FileResponse]:
"""Endpoint to create/add resources to deposit.
Returns:
200 response when no error during routine occurred
400 if the deposit does not belong to the collection
404 if the deposit or the collection does not exist
"""
checks = self.checks(request, collection_name, deposit_id)
if "error" in checks:
return make_error_response_from_dict(request, checks["error"])
r = self.process_get(request, collection_name, deposit_id)
- if isinstance(r, tuple):
- status, content, content_type = r
- return HttpResponse(content, status=status, content_type=content_type)
-
- return r
+ status, content, content_type = r
+ if content_type == "swh/generator":
+ with content as path:
+ return FileResponse(
+ open(path, "rb"), status=status, content_type="application/zip"
+ )
+ if content_type == "application/json":
+ return HttpResponse(
+ json.dumps(content), status=status, content_type=content_type
+ )
+ return HttpResponse(content, status=status, content_type=content_type)
@abstractmethod
- def process_get(self, request, collection_name, deposit_id):
+ def process_get(
+ self, request: Request, collection_name: str, deposit_id: int
+ ) -> Tuple[int, Any, str]:
"""Routine to deal with the deposit's get processing.
Returns:
Tuple status, stream of content, content-type
"""
pass
class APIPost(APIBase, metaclass=ABCMeta):
"""Mixin for class to support DELETE method.
"""
- def post(self, request, collection_name, deposit_id=None, format=None):
+ def post(
+ self, request: Request, collection_name: str, deposit_id: Optional[int] = None
+ ) -> HttpResponse:
"""Endpoint to create/add resources to deposit.
Returns:
204 response when no error during routine occurred.
400 if the deposit does not belong to the collection
404 if the deposit or the collection does not exist
"""
checks = self.checks(request, collection_name, deposit_id)
if "error" in checks:
return make_error_response_from_dict(request, checks["error"])
headers = checks["headers"]
_status, _iri_key, data = self.process_post(
request, headers, collection_name, deposit_id
)
error = data.get("error")
if error:
return make_error_response_from_dict(request, error)
data["packagings"] = ACCEPT_PACKAGINGS
iris = self._make_iris(request, collection_name, data["deposit_id"])
data.update(iris)
response = render(
request,
"deposit/deposit_receipt.xml",
context=data,
content_type="application/xml",
status=_status,
)
- response._headers["location"] = "Location", data[_iri_key]
+ response._headers["location"] = "Location", data[_iri_key] # type: ignore
return response
@abstractmethod
- def process_post(self, request, headers, collection_name, deposit_id=None):
+ def process_post(
+ self,
+ request,
+ headers: Dict,
+ collection_name: str,
+ deposit_id: Optional[int] = None,
+ ) -> Tuple[int, str, Dict]:
"""Routine to deal with the deposit's processing.
Returns
Tuple of:
- response status code (200, 201, etc...)
- key iri (EM_IRI, EDIT_SE_IRI, etc...)
- dictionary of the processing result
"""
pass
class APIPut(APIBase, metaclass=ABCMeta):
"""Mixin for class to support PUT method.
"""
- def put(self, request, collection_name, deposit_id, format=None):
+ def put(
+ self, request: Request, collection_name: str, deposit_id: int
+ ) -> HttpResponse:
"""Endpoint to update deposit resources.
Returns:
204 response when no error during routine occurred.
400 if the deposit does not belong to the collection
404 if the deposit or the collection does not exist
"""
checks = self.checks(request, collection_name, deposit_id)
if "error" in checks:
return make_error_response_from_dict(request, checks["error"])
headers = checks["headers"]
data = self.process_put(request, headers, collection_name, deposit_id)
error = data.get("error")
if error:
return make_error_response_from_dict(request, error)
return HttpResponse(status=status.HTTP_204_NO_CONTENT)
@abstractmethod
- def process_put(self, request, headers, collection_name, deposit_id):
+ def process_put(
+ self, request: Request, headers: Dict, collection_name: str, deposit_id: int
+ ) -> Dict[str, Any]:
"""Routine to deal with updating a deposit in some way.
Returns
dictionary of the processing result
"""
pass
class APIDelete(APIBase, metaclass=ABCMeta):
"""Mixin for class to support DELETE method.
"""
- def delete(self, request, collection_name, deposit_id):
+ def delete(
+ self, request: Request, collection_name: str, deposit_id: Optional[int] = None
+ ) -> HttpResponse:
"""Endpoint to delete some deposit's resources (archives, deposit).
Returns:
204 response when no error during routine occurred.
400 if the deposit does not belong to the collection
404 if the deposit or the collection does not exist
"""
checks = self.checks(request, collection_name, deposit_id)
if "error" in checks:
return make_error_response_from_dict(request, checks["error"])
+ assert deposit_id is not None
data = self.process_delete(request, collection_name, deposit_id)
error = data.get("error")
if error:
return make_error_response_from_dict(request, error)
return HttpResponse(status=status.HTTP_204_NO_CONTENT)
@abstractmethod
- def process_delete(self, request, collection_name, deposit_id):
+ def process_delete(
+ self, request: Request, collection_name: str, deposit_id: int
+ ) -> Dict:
"""Routine to delete a resource.
This is mostly not allowed except for the
EM_IRI (cf. .api.deposit_update.APIUpdateArchive)
"""
- pass
+ return {}
diff --git a/swh/deposit/api/deposit.py b/swh/deposit/api/deposit.py
index b426b180..8cc4455c 100644
--- a/swh/deposit/api/deposit.py
+++ b/swh/deposit/api/deposit.py
@@ -1,98 +1,112 @@
# Copyright (C) 2017-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+from typing import Any, Dict, Optional, Tuple
+
from rest_framework import status
from ..config import EDIT_SE_IRI
from ..errors import BAD_REQUEST, make_error_dict
from ..parsers import (
SWHAtomEntryParser,
SWHFileUploadTarParser,
SWHFileUploadZipParser,
SWHMultiPartParser,
)
from .common import ACCEPT_ARCHIVE_CONTENT_TYPES, APIPost
class APIPostDeposit(APIPost):
"""Deposit request class defining api endpoints for sword deposit.
What's known as 'Col IRI' in the sword specification.
HTTP verbs supported: POST
"""
parser_classes = (
SWHMultiPartParser,
SWHFileUploadZipParser,
SWHFileUploadTarParser,
SWHAtomEntryParser,
)
- def additional_checks(self, req, headers, collection_name, deposit_id=None):
+ def additional_checks(
+ self,
+ req,
+ headers: Dict[str, Any],
+ collection_name: str,
+ deposit_id: Optional[int] = None,
+ ) -> Dict[str, Any]:
slug = headers["slug"]
if not slug:
msg = "Missing SLUG header in request"
verbose_description = "Provide in the SLUG header one identifier, for example the url pointing to the resource you are depositing." # noqa
return make_error_dict(BAD_REQUEST, msg, verbose_description)
return {}
- def process_post(self, req, headers, collection_name, deposit_id=None):
+ def process_post(
+ self,
+ req,
+ headers: Dict[str, Any],
+ collection_name: str,
+ deposit_id: Optional[int] = None,
+ ) -> Tuple[int, str, Dict[str, Any]]:
"""Create a first deposit as:
- archive deposit (1 zip)
- multipart (1 zip + 1 atom entry)
- atom entry
Args:
req (Request): the request holding the information to parse
and inject in db
collection_name (str): the associated client
Returns:
An http response (HttpResponse) according to the situation.
If everything is ok, a 201 response (created) with a
deposit receipt.
Otherwise, depending on the upload, the following errors
can be returned:
- archive deposit:
- 400 (bad request) if the request is not providing an external
identifier
- 403 (forbidden) if the length of the archive exceeds the
max size configured
- 412 (precondition failed) if the length or hash provided
mismatch the reality of the archive.
- 415 (unsupported media type) if a wrong media type is
provided
- multipart deposit:
- 400 (bad request) if the request is not providing an external
identifier
- 412 (precondition failed) if the potentially md5 hash
provided mismatch the reality of the archive
- 415 (unsupported media type) if a wrong media type is
provided
- Atom entry deposit:
- 400 (bad request) if the request is not providing an external
identifier
- 400 (bad request) if the request's body is empty
- 415 (unsupported media type) if a wrong media type is
provided
"""
assert deposit_id is None
if req.content_type in ACCEPT_ARCHIVE_CONTENT_TYPES:
data = self._binary_upload(req, headers, collection_name)
elif req.content_type.startswith("multipart/"):
data = self._multipart_upload(req, headers, collection_name)
else:
data = self._atom_entry(req, headers, collection_name)
return status.HTTP_201_CREATED, EDIT_SE_IRI, data
diff --git a/swh/deposit/api/deposit_content.py b/swh/deposit/api/deposit_content.py
index a7f861f4..fbab2fe4 100644
--- a/swh/deposit/api/deposit_content.py
+++ b/swh/deposit/api/deposit_content.py
@@ -1,46 +1,47 @@
# Copyright (C) 2017-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+from django.http import HttpResponse
from django.shortcuts import render
from rest_framework import status
from ..errors import NOT_FOUND, make_error_response, make_error_response_from_dict
from ..models import DEPOSIT_STATUS_DETAIL, Deposit, DepositRequest
from .common import APIBase
class APIContent(APIBase):
- def get(self, req, collection_name, deposit_id, format=None):
+ def get(self, req, collection_name: str, deposit_id: int) -> HttpResponse:
checks = self.checks(req, collection_name, deposit_id)
if "error" in checks:
return make_error_response_from_dict(req, checks["error"])
try:
deposit = Deposit.objects.get(pk=deposit_id)
if deposit.collection.name != collection_name:
raise Deposit.DoesNotExist
except Deposit.DoesNotExist:
return make_error_response(
req,
NOT_FOUND,
"deposit %s does not belong to collection %s"
% (deposit_id, collection_name),
)
requests = DepositRequest.objects.filter(deposit=deposit)
context = {
"deposit_id": deposit.id,
"status": deposit.status,
"status_detail": DEPOSIT_STATUS_DETAIL[deposit.status],
"requests": requests,
}
return render(
req,
"deposit/content.xml",
context=context,
content_type="application/xml",
status=status.HTTP_200_OK,
)
diff --git a/swh/deposit/api/deposit_status.py b/swh/deposit/api/deposit_status.py
index fa89276e..9c87db9c 100644
--- a/swh/deposit/api/deposit_status.py
+++ b/swh/deposit/api/deposit_status.py
@@ -1,64 +1,65 @@
# Copyright (C) 2017-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+from django.http import HttpResponse
from django.shortcuts import render
from rest_framework import status
from ..errors import NOT_FOUND, make_error_response, make_error_response_from_dict
from ..models import DEPOSIT_STATUS_DETAIL, Deposit
from .common import APIBase
from .converters import convert_status_detail
class APIStatus(APIBase):
"""Deposit status.
What's known as 'State IRI' in the sword specification.
HTTP verbs supported: GET
"""
- def get(self, req, collection_name, deposit_id, format=None):
+ def get(self, req, collection_name: str, deposit_id: int) -> HttpResponse:
checks = self.checks(req, collection_name, deposit_id)
if "error" in checks:
return make_error_response_from_dict(req, checks["error"])
try:
deposit = Deposit.objects.get(pk=deposit_id)
if deposit.collection.name != collection_name:
raise Deposit.DoesNotExist
except Deposit.DoesNotExist:
return make_error_response(
req,
NOT_FOUND,
"deposit %s does not belong to collection %s"
% (deposit_id, collection_name),
)
status_detail = convert_status_detail(deposit.status_detail)
if not status_detail:
status_detail = DEPOSIT_STATUS_DETAIL[deposit.status]
context = {
"deposit_id": deposit.id,
"status_detail": status_detail,
}
keys = (
"status",
"swh_id",
"swh_id_context",
"external_id",
)
for k in keys:
context[k] = getattr(deposit, k, None)
return render(
req,
"deposit/status.xml",
context=context,
content_type="application/xml",
status=status.HTTP_200_OK,
)
diff --git a/swh/deposit/api/deposit_update.py b/swh/deposit/api/deposit_update.py
index 749edd37..ded1bf5f 100644
--- a/swh/deposit/api/deposit_update.py
+++ b/swh/deposit/api/deposit_update.py
@@ -1,169 +1,185 @@
# Copyright (C) 2017-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+from typing import Any, Dict, Optional, Tuple
+
from rest_framework import status
from ..config import CONT_FILE_IRI, EDIT_SE_IRI, EM_IRI
from ..errors import BAD_REQUEST, make_error_dict
from ..parsers import (
SWHAtomEntryParser,
SWHFileUploadTarParser,
SWHFileUploadZipParser,
SWHMultiPartParser,
)
from .common import ACCEPT_ARCHIVE_CONTENT_TYPES, APIDelete, APIPost, APIPut
class APIUpdateArchive(APIPost, APIPut, APIDelete):
"""Deposit request class defining api endpoints for sword deposit.
What's known as 'EM IRI' in the sword specification.
HTTP verbs supported: PUT, POST, DELETE
"""
parser_classes = (
SWHFileUploadZipParser,
SWHFileUploadTarParser,
)
- def process_put(self, req, headers, collection_name, deposit_id):
+ def process_put(
+ self, req, headers, collection_name: str, deposit_id: int
+ ) -> Dict[str, Any]:
"""Replace existing content for the existing deposit.
source: http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_editingcontent_binary # noqa
Returns:
204 No content
"""
if req.content_type not in ACCEPT_ARCHIVE_CONTENT_TYPES:
msg = "Packaging format supported is restricted to %s" % (
", ".join(ACCEPT_ARCHIVE_CONTENT_TYPES)
)
return make_error_dict(BAD_REQUEST, msg)
return self._binary_upload(
req, headers, collection_name, deposit_id=deposit_id, replace_archives=True
)
- def process_post(self, req, headers, collection_name, deposit_id):
+ def process_post(
+ self, req, headers: Dict, collection_name: str, deposit_id: Optional[int] = None
+ ) -> Tuple[int, str, Dict]:
"""Add new content to the existing deposit.
source: http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_addingcontent_mediaresource # noqa
Returns:
201 Created
Headers: Location: [Cont-File-IRI]
Body: [optional Deposit Receipt]
"""
if req.content_type not in ACCEPT_ARCHIVE_CONTENT_TYPES:
msg = "Packaging format supported is restricted to %s" % (
", ".join(ACCEPT_ARCHIVE_CONTENT_TYPES)
)
- return "unused", "unused", make_error_dict(BAD_REQUEST, msg)
+ unused = 0
+ return unused, "unused", make_error_dict(BAD_REQUEST, msg)
return (
status.HTTP_201_CREATED,
CONT_FILE_IRI,
self._binary_upload(req, headers, collection_name, deposit_id),
)
- def process_delete(self, req, collection_name, deposit_id):
+ def process_delete(self, req, collection_name: str, deposit_id: int) -> Dict:
"""Delete content (archives) from existing deposit.
source: http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_deletingcontent # noqa
Returns:
204 Created
"""
return self._delete_archives(collection_name, deposit_id)
class APIUpdateMetadata(APIPost, APIPut, APIDelete):
"""Deposit request class defining api endpoints for sword deposit.
What's known as 'Edit IRI' (and SE IRI) in the sword specification.
HTTP verbs supported: POST (SE IRI), PUT (Edit IRI), DELETE
"""
parser_classes = (SWHMultiPartParser, SWHAtomEntryParser)
- def process_put(self, req, headers, collection_name, deposit_id):
+ def process_put(
+ self, req, headers: Dict, collection_name: str, deposit_id: int
+ ) -> Dict[str, Any]:
"""Replace existing deposit's metadata/archive with new ones.
source:
- http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_editingcontent_metadata # noqa
- http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_editingcontent_multipart # noqa
Returns:
204 No content
"""
if req.content_type.startswith("multipart/"):
return self._multipart_upload(
req,
headers,
collection_name,
deposit_id=deposit_id,
replace_archives=True,
replace_metadata=True,
)
return self._atom_entry(
req, headers, collection_name, deposit_id=deposit_id, replace_metadata=True
)
- def process_post(self, req, headers, collection_name, deposit_id):
+ def process_post(
+ self,
+ request,
+ headers: Dict,
+ collection_name: str,
+ deposit_id: Optional[int] = None,
+ ) -> Tuple[int, str, Dict]:
"""Add new metadata/archive to existing deposit.
source:
- http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_addingcontent_metadata # noqa
- http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_addingcontent_multipart # noqa
This also deals with an empty post corner case to finalize a
deposit.
Returns:
In optimal case for a multipart and atom-entry update, a
201 Created response. The body response will hold a
deposit. And the response headers will contain an entry
'Location' with the EM-IRI.
For the empty post case, this returns a 200.
"""
- if req.content_type.startswith("multipart/"):
+ assert deposit_id is not None
+ if request.content_type.startswith("multipart/"):
return (
status.HTTP_201_CREATED,
EM_IRI,
self._multipart_upload(
- req, headers, collection_name, deposit_id=deposit_id
+ request, headers, collection_name, deposit_id=deposit_id
),
)
# check for final empty post
# source: http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html
# #continueddeposit_complete
if headers["content-length"] == 0 and headers["in-progress"] is False:
- data = self._empty_post(req, headers, collection_name, deposit_id)
+ data = self._empty_post(request, headers, collection_name, deposit_id)
return (status.HTTP_200_OK, EDIT_SE_IRI, data)
return (
status.HTTP_201_CREATED,
EM_IRI,
- self._atom_entry(req, headers, collection_name, deposit_id=deposit_id),
+ self._atom_entry(request, headers, collection_name, deposit_id=deposit_id),
)
- def process_delete(self, req, collection_name, deposit_id):
+ def process_delete(self, req, collection_name: str, deposit_id: int) -> Dict:
"""Delete the container (deposit).
source: http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_deleteconteiner # noqa
"""
return self._delete_deposit(collection_name, deposit_id)
diff --git a/swh/deposit/api/private/__init__.py b/swh/deposit/api/private/__init__.py
index e9b98ee3..4a9aaaa8 100644
--- a/swh/deposit/api/private/__init__.py
+++ b/swh/deposit/api/private/__init__.py
@@ -1,108 +1,96 @@
# Copyright (C) 2017-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from rest_framework.permissions import AllowAny
from swh.deposit import utils
from swh.deposit.api.common import AuthenticatedAPIView
from swh.deposit.errors import NOT_FOUND, make_error_dict
from ...config import METADATA_TYPE, APIConfig
from ...models import Deposit, DepositRequest
class DepositReadMixin:
"""Deposit Read mixin
"""
def _deposit_requests(self, deposit, request_type):
"""Given a deposit, yields its associated deposit_request
Args:
deposit (Deposit): Deposit to list requests for
request_type (str): 'archive' or 'metadata'
Yields:
deposit requests of type request_type associated to the deposit
"""
if isinstance(deposit, int):
deposit = Deposit.objects.get(pk=deposit)
deposit_requests = DepositRequest.objects.filter(
type=request_type, deposit=deposit
).order_by("id")
for deposit_request in deposit_requests:
yield deposit_request
def _metadata_get(self, deposit):
"""Given a deposit, aggregate all metadata requests.
Args:
deposit (Deposit): The deposit instance to extract
metadata from.
Returns:
metadata dict from the deposit.
"""
metadata = (
m.metadata
for m in self._deposit_requests(deposit, request_type=METADATA_TYPE)
)
return utils.merge(*metadata)
class APIPrivateView(APIConfig, AuthenticatedAPIView):
"""Mixin intended as private api (so no authentication) based API view
(for the private ones).
"""
authentication_classes = ()
permission_classes = (AllowAny,)
def checks(self, req, collection_name, deposit_id=None):
"""Override default checks implementation to allow empty collection.
"""
if deposit_id:
try:
Deposit.objects.get(pk=deposit_id)
except Deposit.DoesNotExist:
return make_error_dict(
NOT_FOUND, "Deposit with id %s does not exist" % deposit_id
)
headers = self._read_headers(req)
checks = self.additional_checks(req, headers, collection_name, deposit_id)
if "error" in checks:
return checks
return {"headers": headers}
def get(
- self,
- request,
- collection_name=None,
- deposit_id=None,
- format=None,
- *args,
- **kwargs,
+ self, request, collection_name=None, deposit_id=None, *args, **kwargs,
):
- return super().get(request, collection_name, deposit_id, format)
+ return super().get(request, collection_name, deposit_id)
def put(
- self,
- request,
- collection_name=None,
- deposit_id=None,
- format=None,
- *args,
- **kwargs,
+ self, request, collection_name=None, deposit_id=None, *args, **kwargs,
):
- return super().put(request, collection_name, deposit_id, format)
+ return super().put(request, collection_name, deposit_id)
diff --git a/swh/deposit/api/private/deposit_check.py b/swh/deposit/api/private/deposit_check.py
index 680ec83c..d2afd5e7 100644
--- a/swh/deposit/api/private/deposit_check.py
+++ b/swh/deposit/api/private/deposit_check.py
@@ -1,228 +1,234 @@
# Copyright (C) 2017-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from itertools import chain
-import json
import re
from shutil import get_unpack_formats
import tarfile
+from typing import Dict, Optional, Tuple
import zipfile
from rest_framework import status
from swh.scheduler.utils import create_oneshot_task_dict
from . import APIPrivateView, DepositReadMixin
from ...config import ARCHIVE_TYPE, DEPOSIT_STATUS_REJECTED, DEPOSIT_STATUS_VERIFIED
-from ...models import Deposit
+from ...models import Deposit, DepositRequest
from ..common import APIGet
MANDATORY_FIELDS_MISSING = "Mandatory fields are missing"
ALTERNATE_FIELDS_MISSING = "Mandatory alternate fields are missing"
MANDATORY_ARCHIVE_UNREADABLE = (
"At least one of its associated archives is not readable" # noqa
)
MANDATORY_ARCHIVE_INVALID = (
"Mandatory archive is invalid (i.e contains only one archive)" # noqa
)
MANDATORY_ARCHIVE_UNSUPPORTED = "Mandatory archive type is not supported"
MANDATORY_ARCHIVE_MISSING = "Deposit without archive is rejected"
ARCHIVE_EXTENSIONS = [
"zip",
"tar",
"tar.gz",
"xz",
"tar.xz",
"bz2",
"tar.bz2",
"Z",
"tar.Z",
"tgz",
"7z",
]
PATTERN_ARCHIVE_EXTENSION = re.compile(r".*\.(%s)$" % "|".join(ARCHIVE_EXTENSIONS))
def known_archive_format(filename):
return any(
filename.endswith(t) for t in chain(*(x[1] for x in get_unpack_formats()))
)
class APIChecks(APIPrivateView, APIGet, DepositReadMixin):
"""Dedicated class to read a deposit's raw archives content.
Only GET is supported.
"""
- def _check_deposit_archives(self, deposit):
+ def _check_deposit_archives(self, deposit: Deposit) -> Tuple[bool, Optional[Dict]]:
"""Given a deposit, check each deposit request of type archive.
Args:
The deposit to check archives for
Returns
tuple (status, error_detail): True, None if all archives
are ok, (False, <detailed-error>) otherwise.
"""
requests = list(self._deposit_requests(deposit, request_type=ARCHIVE_TYPE))
if len(requests) == 0: # no associated archive is refused
return False, {"archive": [{"summary": MANDATORY_ARCHIVE_MISSING,}]}
errors = []
for archive_request in requests:
check, error_message = self._check_archive(archive_request)
if not check:
errors.append(
{"summary": error_message, "fields": [archive_request.id]}
)
if not errors:
return True, None
return False, {"archive": errors}
- def _check_archive(self, archive_request):
+ def _check_archive(
+ self, archive_request: DepositRequest
+ ) -> Tuple[bool, Optional[str]]:
"""Check that a deposit associated archive is ok:
- readable
- supported archive format
- valid content: the archive does not contain a single archive file
If any of those checks are not ok, return the corresponding
failing check.
Args:
archive_path (DepositRequest): Archive to check
Returns:
(True, None) if archive is check compliant, (False,
<detail-error>) otherwise.
"""
archive_path = archive_request.archive.path
if not known_archive_format(archive_path):
return False, MANDATORY_ARCHIVE_UNSUPPORTED
try:
if zipfile.is_zipfile(archive_path):
- with zipfile.ZipFile(archive_path) as f:
- files = f.namelist()
+ with zipfile.ZipFile(archive_path) as zipfile_:
+ files = zipfile_.namelist()
elif tarfile.is_tarfile(archive_path):
- with tarfile.open(archive_path) as f:
- files = f.getnames()
+ with tarfile.open(archive_path) as tarfile_:
+ files = tarfile_.getnames()
else:
return False, MANDATORY_ARCHIVE_UNSUPPORTED
except Exception:
return False, MANDATORY_ARCHIVE_UNREADABLE
if len(files) > 1:
return True, None
element = files[0]
if PATTERN_ARCHIVE_EXTENSION.match(element):
# archive in archive!
return False, MANDATORY_ARCHIVE_INVALID
return True, None
- def _check_metadata(self, metadata):
+ def _check_metadata(self, metadata: Dict) -> Tuple[bool, Optional[Dict]]:
"""Check to execute on all metadata for mandatory field presence.
Args:
metadata (dict): Metadata dictionary to check for mandatory fields
Returns:
tuple (status, error_detail): True, None if metadata are
ok (False, <detailed-error>) otherwise.
"""
required_fields = {
"author": False,
}
alternate_fields = {
("name", "title"): False, # alternate field, at least one
# of them must be present
}
for field, value in metadata.items():
for name in required_fields:
if name in field:
required_fields[name] = True
for possible_names in alternate_fields:
for possible_name in possible_names:
if possible_name in field:
alternate_fields[possible_names] = True
continue
mandatory_result = [k for k, v in required_fields.items() if not v]
optional_result = [" or ".join(k) for k, v in alternate_fields.items() if not v]
if mandatory_result == [] and optional_result == []:
return True, None
detail = []
if mandatory_result != []:
detail.append(
{"summary": MANDATORY_FIELDS_MISSING, "fields": mandatory_result}
)
if optional_result != []:
detail.append(
{"summary": ALTERNATE_FIELDS_MISSING, "fields": optional_result,}
)
return False, {"metadata": detail}
- def process_get(self, req, collection_name, deposit_id):
+ def process_get(
+ self, req, collection_name: str, deposit_id: int
+ ) -> Tuple[int, Dict, str]:
"""Build a unique tarball from the multiple received and stream that
content to the client.
Args:
req (Request):
collection_name (str): Collection owning the deposit
deposit_id (id): Deposit concerned by the reading
Returns:
Tuple status, stream of content, content-type
"""
deposit = Deposit.objects.get(pk=deposit_id)
metadata = self._metadata_get(deposit)
- problems = {}
+ problems: Dict = {}
# will check each deposit's associated request (both of type
# archive and metadata) for errors
archives_status, error_detail = self._check_deposit_archives(deposit)
if not archives_status:
+ assert error_detail is not None
problems.update(error_detail)
metadata_status, error_detail = self._check_metadata(metadata)
if not metadata_status:
+ assert error_detail is not None
problems.update(error_detail)
deposit_status = archives_status and metadata_status
# if any problems arose, the deposit is rejected
if not deposit_status:
deposit.status = DEPOSIT_STATUS_REJECTED
deposit.status_detail = problems
response = {
"status": deposit.status,
"details": deposit.status_detail,
}
else:
deposit.status = DEPOSIT_STATUS_VERIFIED
response = {
"status": deposit.status,
}
if not deposit.load_task_id and self.config["checks"]:
url = deposit.origin_url
task = create_oneshot_task_dict(
"load-deposit", url=url, deposit_id=deposit.id, retries_left=3
)
load_task_id = self.scheduler.create_tasks([task])[0]["id"]
deposit.load_task_id = load_task_id
deposit.save()
- return status.HTTP_200_OK, json.dumps(response), "application/json"
+ return status.HTTP_200_OK, response, "application/json"
diff --git a/swh/deposit/api/private/deposit_read.py b/swh/deposit/api/private/deposit_read.py
index 4a5f388a..51b6636e 100644
--- a/swh/deposit/api/private/deposit_read.py
+++ b/swh/deposit/api/private/deposit_read.py
@@ -1,197 +1,195 @@
# Copyright (C) 2017-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from contextlib import contextmanager
-import json
import os
import shutil
import tempfile
+from typing import Any, Dict, Tuple
-from django.http import FileResponse
from rest_framework import status
from swh.core import tarball
from swh.deposit.api import __version__
from swh.deposit.utils import normalize_date
from swh.model import identifiers
from . import APIPrivateView, DepositReadMixin
from ...config import ARCHIVE_TYPE, SWH_PERSON
from ...models import Deposit
from ..common import APIGet
@contextmanager
def aggregate_tarballs(extraction_dir, archive_paths):
"""Aggregate multiple tarballs into one and returns this new archive's
path.
Args:
extraction_dir (path): Path to use for the tarballs computation
archive_paths ([str]): Deposit's archive paths
Returns:
Tuple (directory to clean up, archive path (aggregated or not))
"""
# rebuild one zip archive from (possibly) multiple ones
os.makedirs(extraction_dir, 0o755, exist_ok=True)
dir_path = tempfile.mkdtemp(prefix="swh.deposit-", dir=extraction_dir)
# root folder to build an aggregated tarball
aggregated_tarball_rootdir = os.path.join(dir_path, "aggregate")
os.makedirs(aggregated_tarball_rootdir, 0o755, exist_ok=True)
# uncompress in a temporary location all archives
for archive_path in archive_paths:
tarball.uncompress(archive_path, aggregated_tarball_rootdir)
# Aggregate into one big tarball the multiple smaller ones
temp_tarpath = shutil.make_archive(
aggregated_tarball_rootdir, "zip", aggregated_tarball_rootdir
)
# can already clean up temporary directory
shutil.rmtree(aggregated_tarball_rootdir)
try:
yield temp_tarpath
finally:
shutil.rmtree(dir_path)
class APIReadArchives(APIPrivateView, APIGet, DepositReadMixin):
"""Dedicated class to read a deposit's raw archives content.
Only GET is supported.
"""
def __init__(self):
super().__init__()
self.extraction_dir = self.config["extraction_dir"]
if not os.path.exists(self.extraction_dir):
os.makedirs(self.extraction_dir)
- def process_get(self, request, collection_name, deposit_id):
+ def process_get(
+ self, request, collection_name: str, deposit_id: int
+ ) -> Tuple[int, Any, str]:
"""Build a unique tarball from the multiple received and stream that
content to the client.
Args:
request (Request):
- collection_name (str): Collection owning the deposit
- deposit_id (id): Deposit concerned by the reading
+ collection_name: Collection owning the deposit
+ deposit_id: Deposit concerned by the reading
Returns:
Tuple status, stream of content, content-type
"""
archive_paths = [
r.archive.path
for r in self._deposit_requests(deposit_id, request_type=ARCHIVE_TYPE)
]
- with aggregate_tarballs(self.extraction_dir, archive_paths) as path:
- return FileResponse(
- open(path, "rb"),
- status=status.HTTP_200_OK,
- content_type="application/zip",
- )
+ return (
+ status.HTTP_200_OK,
+ aggregate_tarballs(self.extraction_dir, archive_paths),
+ "swh/generator",
+ )
class APIReadMetadata(APIPrivateView, APIGet, DepositReadMixin):
"""Class in charge of aggregating metadata on a deposit.
"""
def __init__(self):
super().__init__()
self.provider = self.config["provider"]
self.tool = {
"name": "swh-deposit",
"version": __version__,
"configuration": {"sword_version": "2"},
}
def _normalize_dates(self, deposit, metadata):
"""Normalize the date to use as a tuple of author date, committer date
from the incoming metadata.
Args:
deposit (Deposit): Deposit model representation
metadata (Dict): Metadata dict representation
Returns:
Tuple of author date, committer date. Those dates are
swh normalized.
"""
commit_date = metadata.get("codemeta:datePublished")
author_date = metadata.get("codemeta:dateCreated")
if author_date and commit_date:
pass
elif commit_date:
author_date = commit_date
elif author_date:
commit_date = author_date
else:
author_date = deposit.complete_date
commit_date = deposit.complete_date
return (normalize_date(author_date), normalize_date(commit_date))
def metadata_read(self, deposit):
"""Read and aggregate multiple data on deposit into one unified data
dictionary.
Args:
deposit (Deposit): Deposit concerned by the data aggregation.
Returns:
Dictionary of data representing the deposit to inject in swh.
"""
metadata = self._metadata_get(deposit)
# Read information metadata
data = {"origin": {"type": "deposit", "url": deposit.origin_url,}}
# metadata provider
self.provider["provider_name"] = deposit.client.last_name
self.provider["provider_url"] = deposit.client.provider_url
author_date, commit_date = self._normalize_dates(deposit, metadata)
if deposit.parent:
swh_persistent_id = deposit.parent.swh_id
swhid = identifiers.parse_swhid(swh_persistent_id)
parent_revision = swhid.object_id
parents = [parent_revision]
else:
parents = []
data["origin_metadata"] = {
"provider": self.provider,
"tool": self.tool,
"metadata": metadata,
}
data["deposit"] = {
"id": deposit.id,
"client": deposit.client.username,
"collection": deposit.collection.name,
"author": SWH_PERSON,
"author_date": author_date,
"committer": SWH_PERSON,
"committer_date": commit_date,
"revision_parents": parents,
}
return data
- def process_get(self, request, collection_name, deposit_id):
+ def process_get(
+ self, request, collection_name: str, deposit_id: int
+ ) -> Tuple[int, Dict, str]:
deposit = Deposit.objects.get(pk=deposit_id)
data = self.metadata_read(deposit)
- d = {}
- if data:
- d = json.dumps(data)
-
- return status.HTTP_200_OK, d, "application/json"
+ return status.HTTP_200_OK, data if data else {}, "application/json"
diff --git a/swh/deposit/api/private/deposit_update_status.py b/swh/deposit/api/private/deposit_update_status.py
index 9df47390..af6bcb6c 100644
--- a/swh/deposit/api/private/deposit_update_status.py
+++ b/swh/deposit/api/private/deposit_update_status.py
@@ -1,103 +1,107 @@
# Copyright (C) 2017-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+from typing import Dict
+
from rest_framework.parsers import JSONParser
from swh.model.identifiers import DIRECTORY, REVISION, SNAPSHOT, swhid
from . import APIPrivateView
from ...errors import BAD_REQUEST, make_error_dict
from ...models import DEPOSIT_STATUS_DETAIL, DEPOSIT_STATUS_LOAD_SUCCESS, Deposit
from ..common import APIPut
MANDATORY_KEYS = ["origin_url", "revision_id", "directory_id", "snapshot_id"]
class APIUpdateStatus(APIPrivateView, APIPut):
"""Deposit request class to update the deposit's status.
HTTP verbs supported: PUT
"""
parser_classes = (JSONParser,)
def additional_checks(self, request, headers, collection_name, deposit_id=None):
"""Enrich existing checks to the default ones.
New checks:
- Ensure the status is provided
- Ensure it exists
- no missing information on load success update
"""
data = request.data
status = data.get("status")
if not status:
msg = "The status key is mandatory with possible values %s" % list(
DEPOSIT_STATUS_DETAIL.keys()
)
return make_error_dict(BAD_REQUEST, msg)
if status not in DEPOSIT_STATUS_DETAIL:
msg = "Possible status in %s" % list(DEPOSIT_STATUS_DETAIL.keys())
return make_error_dict(BAD_REQUEST, msg)
if status == DEPOSIT_STATUS_LOAD_SUCCESS:
missing_keys = []
for key in MANDATORY_KEYS:
value = data.get(key)
if value is None:
missing_keys.append(key)
if missing_keys:
msg = (
f"Updating deposit status to {status}"
f" requires information {','.join(missing_keys)}"
)
return make_error_dict(BAD_REQUEST, msg)
return {}
- def process_put(self, request, headers, collection_name, deposit_id):
+ def process_put(
+ self, request, headers: Dict, collection_name: str, deposit_id: int
+ ) -> Dict:
"""Update the deposit with status and SWHIDs
Returns:
204 No content
400 Bad request if checks fail
"""
data = request.data
deposit = Deposit.objects.get(pk=deposit_id)
status = data["status"]
deposit.status = status
if status == DEPOSIT_STATUS_LOAD_SUCCESS:
origin_url = data["origin_url"]
directory_id = data["directory_id"]
revision_id = data["revision_id"]
dir_id = swhid(DIRECTORY, directory_id)
snp_id = swhid(SNAPSHOT, data["snapshot_id"])
rev_id = swhid(REVISION, revision_id)
deposit.swh_id = dir_id
# new id with contextual information
deposit.swh_id_context = swhid(
DIRECTORY,
directory_id,
metadata={
"origin": origin_url,
"visit": snp_id,
"anchor": rev_id,
"path": "/",
},
)
else: # rejected
deposit.status = status
deposit.save()
return {}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Mon, Aug 18, 11:14 PM (2 w, 3 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3298293
Attached To
rDDEP Push deposit
Event Timeline
Log In to Comment