Changeset View
Changeset View
Standalone View
Standalone View
swh/deposit/api/common.py
# Copyright (C) 2017-2020 The Software Heritage developers | # Copyright (C) 2017-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from abc import ABCMeta, abstractmethod | from abc import ABCMeta, abstractmethod | ||||
import datetime | import datetime | ||||
import hashlib | import hashlib | ||||
import json | import json | ||||
from typing import Any, Dict, Optional, Sequence, Tuple, Type, Union | from typing import Any, Dict, Optional, Sequence, Tuple, Type, Union | ||||
import uuid | |||||
import attr | import attr | ||||
from django.core.files.uploadedfile import UploadedFile | from django.core.files.uploadedfile import UploadedFile | ||||
from django.http import FileResponse, HttpResponse | from django.http import FileResponse, HttpResponse | ||||
from django.shortcuts import render | from django.shortcuts import render | ||||
from django.urls import reverse | from django.urls import reverse | ||||
from django.utils import timezone | from django.utils import timezone | ||||
from rest_framework import status | from rest_framework import status | ||||
Show All 39 Lines | from ..errors import ( | ||||
FORBIDDEN, | FORBIDDEN, | ||||
MAX_UPLOAD_SIZE_EXCEEDED, | MAX_UPLOAD_SIZE_EXCEEDED, | ||||
MEDIATION_NOT_ALLOWED, | MEDIATION_NOT_ALLOWED, | ||||
METHOD_NOT_ALLOWED, | METHOD_NOT_ALLOWED, | ||||
NOT_FOUND, | NOT_FOUND, | ||||
PARSING_ERROR, | PARSING_ERROR, | ||||
DepositError, | DepositError, | ||||
ParserError, | ParserError, | ||||
raise_missing_slug_error, | |||||
) | ) | ||||
from ..models import DepositClient, DepositCollection, DepositRequest | from ..models import DepositClient, DepositCollection, DepositRequest | ||||
from ..parsers import parse_swh_reference, parse_xml | from ..parsers import parse_swh_reference, parse_xml | ||||
ACCEPT_PACKAGINGS = ["http://purl.org/net/sword/package/SimpleZip"] | ACCEPT_PACKAGINGS = ["http://purl.org/net/sword/package/SimpleZip"] | ||||
ACCEPT_ARCHIVE_CONTENT_TYPES = ["application/zip", "application/x-tar"] | ACCEPT_ARCHIVE_CONTENT_TYPES = ["application/zip", "application/x-tar"] | ||||
▲ Show 20 Lines • Show All 59 Lines • ▼ Show 20 Lines | def get_collection_by_name(collection_name: str): | ||||
assert collection is not None | assert collection is not None | ||||
return collection | return collection | ||||
def guess_deposit_origin_url(deposit: Deposit): | def guess_deposit_origin_url(deposit: Deposit): | ||||
"""Guesses an origin url for the given deposit.""" | """Guesses an origin url for the given deposit.""" | ||||
return "%s/%s" % (deposit.client.provider_url.rstrip("/"), deposit.external_id,) | external_id = deposit.external_id | ||||
if not external_id: | |||||
# The client provided neither an origin_url nor a slug. That's inconvenient, | |||||
# but SWORD requires we support it. So let's generate a random slug. | |||||
external_id = str(uuid.uuid4()) | |||||
return "%s/%s" % (deposit.client.provider_url.rstrip("/"), external_id) | |||||
class AuthenticatedAPIView(APIView): | class AuthenticatedAPIView(APIView): | ||||
"""Mixin intended as a based API view to enforce the basic | """Mixin intended as a based API view to enforce the basic | ||||
authentication check | authentication check | ||||
""" | """ | ||||
▲ Show 20 Lines • Show All 70 Lines • ▼ Show 20 Lines | class APIBase(APIConfig, AuthenticatedAPIView, metaclass=ABCMeta): | ||||
def _complete_deposit(self, deposit: Deposit) -> None: | def _complete_deposit(self, deposit: Deposit) -> None: | ||||
"""Marks the deposit as 'deposited', then schedule a check task if configured | """Marks the deposit as 'deposited', then schedule a check task if configured | ||||
to do so.""" | to do so.""" | ||||
deposit.complete_date = timezone.now() | deposit.complete_date = timezone.now() | ||||
deposit.status = DEPOSIT_STATUS_DEPOSITED | deposit.status = DEPOSIT_STATUS_DEPOSITED | ||||
deposit.save() | deposit.save() | ||||
if deposit.external_id and not deposit.origin_url: | if not deposit.origin_url: | ||||
deposit.origin_url = guess_deposit_origin_url(deposit) | deposit.origin_url = guess_deposit_origin_url(deposit) | ||||
if self.config["checks"]: | if self.config["checks"]: | ||||
scheduler = self.scheduler | scheduler = self.scheduler | ||||
if deposit.status == DEPOSIT_STATUS_DEPOSITED and not deposit.check_task_id: | if deposit.status == DEPOSIT_STATUS_DEPOSITED and not deposit.check_task_id: | ||||
task = create_oneshot_task_dict( | task = create_oneshot_task_dict( | ||||
"check-deposit", | "check-deposit", | ||||
collection=deposit.collection.name, | collection=deposit.collection.name, | ||||
▲ Show 20 Lines • Show All 143 Lines • ▼ Show 20 Lines | class APIBase(APIConfig, AuthenticatedAPIView, metaclass=ABCMeta): | ||||
def _binary_upload( | def _binary_upload( | ||||
self, | self, | ||||
request: Request, | request: Request, | ||||
headers: ParsedRequestHeaders, | headers: ParsedRequestHeaders, | ||||
collection_name: str, | collection_name: str, | ||||
deposit: Deposit, | deposit: Deposit, | ||||
replace_metadata: bool = False, | replace_metadata: bool = False, | ||||
replace_archives: bool = False, | replace_archives: bool = False, | ||||
check_slug_is_present: bool = False, | |||||
) -> Receipt: | ) -> Receipt: | ||||
"""Binary upload routine. | """Binary upload routine. | ||||
Other than such a request, a 415 response is returned. | Other than such a request, a 415 response is returned. | ||||
Args: | Args: | ||||
request: the request holding information to parse | request: the request holding information to parse | ||||
and inject in db | and inject in db | ||||
headers: parsed request headers | headers: parsed request headers | ||||
collection_name: the associated client | collection_name: the associated client | ||||
deposit: deposit to be updated | deposit: deposit to be updated | ||||
replace_metadata: 'Update or add' request to existing | replace_metadata: 'Update or add' request to existing | ||||
deposit. If False (default), this adds new metadata request to | deposit. If False (default), this adds new metadata request to | ||||
existing ones. Otherwise, this will replace existing metadata. | existing ones. Otherwise, this will replace existing metadata. | ||||
replace_archives: 'Update or add' request to existing | replace_archives: 'Update or add' request to existing | ||||
deposit. If False (default), this adds new archive request to | deposit. If False (default), this adds new archive request to | ||||
existing ones. Otherwise, this will replace existing archives. | existing ones. Otherwise, this will replace existing archives. | ||||
ones. | ones. | ||||
check_slug_is_present: Check for the slug header if True and raise | |||||
if not present | |||||
Raises: | Raises: | ||||
- 400 (bad request) if the request is not providing an external | - 400 (bad request) if the request is not providing an external | ||||
identifier | identifier | ||||
- 413 (request entity too large) if the length of the | - 413 (request entity too large) if the length of the | ||||
archive exceeds the max size configured | archive exceeds the max size configured | ||||
- 412 (precondition failed) if the length or md5 hash provided | - 412 (precondition failed) if the length or md5 hash provided | ||||
mismatch the reality of the archive | mismatch the reality of the archive | ||||
Show All 25 Lines | ) -> Receipt: | ||||
) | ) | ||||
filehandler = request.FILES["file"] | filehandler = request.FILES["file"] | ||||
assert isinstance(filehandler, UploadedFile), filehandler | assert isinstance(filehandler, UploadedFile), filehandler | ||||
self._check_file_length(filehandler, content_length) | self._check_file_length(filehandler, content_length) | ||||
self._check_file_md5sum(filehandler, headers.content_md5sum) | self._check_file_md5sum(filehandler, headers.content_md5sum) | ||||
slug = headers.slug | |||||
if check_slug_is_present and not slug: | |||||
raise_missing_slug_error() | |||||
# actual storage of data | # actual storage of data | ||||
archive_metadata = filehandler | archive_metadata = filehandler | ||||
self._deposit_put( | self._deposit_put( | ||||
deposit=deposit, in_progress=headers.in_progress, | deposit=deposit, in_progress=headers.in_progress, | ||||
) | ) | ||||
self._deposit_request_put( | self._deposit_request_put( | ||||
deposit, | deposit, | ||||
{ARCHIVE_KEY: archive_metadata}, | {ARCHIVE_KEY: archive_metadata}, | ||||
Show All 20 Lines | class APIBase(APIConfig, AuthenticatedAPIView, metaclass=ABCMeta): | ||||
def _multipart_upload( | def _multipart_upload( | ||||
self, | self, | ||||
request: Request, | request: Request, | ||||
headers: ParsedRequestHeaders, | headers: ParsedRequestHeaders, | ||||
collection_name: str, | collection_name: str, | ||||
deposit: Deposit, | deposit: Deposit, | ||||
replace_metadata: bool = False, | replace_metadata: bool = False, | ||||
replace_archives: bool = False, | replace_archives: bool = False, | ||||
check_slug_is_present: bool = False, | |||||
) -> Receipt: | ) -> Receipt: | ||||
"""Multipart upload supported with exactly: | """Multipart upload supported with exactly: | ||||
- 1 archive (zip) | - 1 archive (zip) | ||||
- 1 atom entry | - 1 atom entry | ||||
Other than such a request, a 415 response is returned. | Other than such a request, a 415 response is returned. | ||||
Args: | Args: | ||||
request: the request holding information to parse | request: the request holding information to parse | ||||
and inject in db | and inject in db | ||||
headers: parsed request headers | headers: parsed request headers | ||||
collection_name: the associated client | collection_name: the associated client | ||||
deposit: deposit to be updated | deposit: deposit to be updated | ||||
replace_metadata: 'Update or add' request to existing | replace_metadata: 'Update or add' request to existing | ||||
deposit. If False (default), this adds new metadata request to | deposit. If False (default), this adds new metadata request to | ||||
existing ones. Otherwise, this will replace existing metadata. | existing ones. Otherwise, this will replace existing metadata. | ||||
replace_archives: 'Update or add' request to existing | replace_archives: 'Update or add' request to existing | ||||
deposit. If False (default), this adds new archive request to | deposit. If False (default), this adds new archive request to | ||||
existing ones. Otherwise, this will replace existing archives. | existing ones. Otherwise, this will replace existing archives. | ||||
ones. | ones. | ||||
check_slug_is_present: Check for the slug header if True and raise | |||||
if not present | |||||
Raises: | Raises: | ||||
- 400 (bad request) if the request is not providing an external | - 400 (bad request) if the request is not providing an external | ||||
identifier | identifier | ||||
- 412 (precondition failed) if the potentially md5 hash provided | - 412 (precondition failed) if the potentially md5 hash provided | ||||
mismatch the reality of the archive | mismatch the reality of the archive | ||||
- 413 (request entity too large) if the length of the | - 413 (request entity too large) if the length of the | ||||
archive exceeds the max size configured | archive exceeds the max size configured | ||||
- 415 (unsupported media type) if a wrong media type is provided | - 415 (unsupported media type) if a wrong media type is provided | ||||
""" | """ | ||||
slug = headers.slug | |||||
if check_slug_is_present and not slug: | |||||
raise_missing_slug_error() | |||||
content_types_present = set() | content_types_present = set() | ||||
data: Dict[str, Optional[Any]] = { | data: Dict[str, Optional[Any]] = { | ||||
"application/zip": None, # expected either zip | "application/zip": None, # expected either zip | ||||
"application/x-tar": None, # or x-tar | "application/x-tar": None, # or x-tar | ||||
"application/atom+xml": None, | "application/atom+xml": None, | ||||
} | } | ||||
for key, value in request.FILES.items(): | for key, value in request.FILES.items(): | ||||
▲ Show 20 Lines • Show All 161 Lines • ▼ Show 20 Lines | class APIBase(APIConfig, AuthenticatedAPIView, metaclass=ABCMeta): | ||||
def _atom_entry( | def _atom_entry( | ||||
self, | self, | ||||
request: Request, | request: Request, | ||||
headers: ParsedRequestHeaders, | headers: ParsedRequestHeaders, | ||||
collection_name: str, | collection_name: str, | ||||
deposit: Deposit, | deposit: Deposit, | ||||
replace_metadata: bool = False, | replace_metadata: bool = False, | ||||
replace_archives: bool = False, | replace_archives: bool = False, | ||||
check_slug_is_present: bool = False, | |||||
) -> Receipt: | ) -> Receipt: | ||||
"""Atom entry deposit. | """Atom entry deposit. | ||||
Args: | Args: | ||||
request: the request holding information to parse | request: the request holding information to parse | ||||
and inject in db | and inject in db | ||||
headers: parsed request headers | headers: parsed request headers | ||||
collection_name: the associated client | collection_name: the associated client | ||||
deposit: deposit to be updated | deposit: deposit to be updated | ||||
replace_metadata: 'Update or add' request to existing | replace_metadata: 'Update or add' request to existing | ||||
deposit. If False (default), this adds new metadata request to | deposit. If False (default), this adds new metadata request to | ||||
existing ones. Otherwise, this will replace existing metadata. | existing ones. Otherwise, this will replace existing metadata. | ||||
replace_archives: 'Update or add' request to existing | replace_archives: 'Update or add' request to existing | ||||
deposit. If False (default), this adds new archive request to | deposit. If False (default), this adds new archive request to | ||||
existing ones. Otherwise, this will replace existing archives. | existing ones. Otherwise, this will replace existing archives. | ||||
ones. | ones. | ||||
check_slug_is_present: Check for the slug header if True and raise | |||||
if not present | |||||
Raises: | Raises: | ||||
- 400 (bad request) if the request is not providing an external | - 400 (bad request) if the request is not providing an external | ||||
identifier | identifier | ||||
- 400 (bad request) if the request's body is empty | - 400 (bad request) if the request's body is empty | ||||
- 415 (unsupported media type) if a wrong media type is provided | - 415 (unsupported media type) if a wrong media type is provided | ||||
""" | """ | ||||
Show All 12 Lines | ) -> Receipt: | ||||
BAD_REQUEST, | BAD_REQUEST, | ||||
"Empty body request is not supported", | "Empty body request is not supported", | ||||
"Atom entry deposit is supposed to send for metadata. " | "Atom entry deposit is supposed to send for metadata. " | ||||
"If the body is empty, there is no metadata.", | "If the body is empty, there is no metadata.", | ||||
) | ) | ||||
if ( | if ( | ||||
"atom:external_identifier" in metadata | "atom:external_identifier" in metadata | ||||
and headers.slug | |||||
and metadata["atom:external_identifier"] != headers.slug | and metadata["atom:external_identifier"] != headers.slug | ||||
): | ): | ||||
# TODO: When clients stopped using it, raise this error | # TODO: When clients stopped using it, raise this error | ||||
# even when they are equal. | # even when they are equal. | ||||
raise DepositError( | raise DepositError( | ||||
BAD_REQUEST, | BAD_REQUEST, | ||||
"The 'external_identifier' tag is deprecated, " | "The 'external_identifier' tag is deprecated, " | ||||
"the Slug header should be used instead.", | "the Slug header should be used instead.", | ||||
) | ) | ||||
# Determine if we are in the metadata-only deposit case | # Determine if we are in the metadata-only deposit case | ||||
try: | try: | ||||
swhid = parse_swh_reference(metadata) | swhid = parse_swh_reference(metadata) | ||||
except ValidationError as e: | except ValidationError as e: | ||||
raise DepositError( | raise DepositError( | ||||
PARSING_ERROR, "Invalid SWHID reference", str(e), | PARSING_ERROR, "Invalid SWHID reference", str(e), | ||||
) | ) | ||||
if swhid is None and check_slug_is_present and not headers.slug: | |||||
raise_missing_slug_error() | |||||
self._deposit_put( | self._deposit_put( | ||||
deposit=deposit, in_progress=headers.in_progress, | deposit=deposit, in_progress=headers.in_progress, | ||||
) | ) | ||||
if swhid is not None: | if swhid is not None: | ||||
swhid, swhid_ref, depo, depo_request = self._store_metadata_deposit( | swhid, swhid_ref, depo, depo_request = self._store_metadata_deposit( | ||||
deposit, swhid, metadata, raw_metadata | deposit, swhid, metadata, raw_metadata | ||||
) | ) | ||||
▲ Show 20 Lines • Show All 367 Lines • Show Last 20 Lines |