diff --git a/swh/deposit/__init__.py b/swh/deposit/__init__.py
index e69de29b..65c78f3d 100644
--- a/swh/deposit/__init__.py
+++ b/swh/deposit/__init__.py
@@ -0,0 +1,11 @@
+# Copyright (C) 2020 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import pkg_resources
+
+try:
+ __version__ = pkg_resources.get_distribution("swh.deposit").version
+except pkg_resources.DistributionNotFound:
+ __version__ = "devel"
diff --git a/swh/deposit/api/__init__.py b/swh/deposit/api/__init__.py
index 65c78f3d..245db776 100644
--- a/swh/deposit/api/__init__.py
+++ b/swh/deposit/api/__init__.py
@@ -1,11 +1,4 @@
# Copyright (C) 2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-
-import pkg_resources
-
-try:
- __version__ = pkg_resources.get_distribution("swh.deposit").version
-except pkg_resources.DistributionNotFound:
- __version__ = "devel"
diff --git a/swh/deposit/api/common.py b/swh/deposit/api/common.py
index 6bed49c5..38afddcf 100644
--- a/swh/deposit/api/common.py
+++ b/swh/deposit/api/common.py
@@ -1,1033 +1,1041 @@
# Copyright (C) 2017-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from abc import ABCMeta, abstractmethod
import datetime
import hashlib
import json
from typing import Any, Dict, Optional, Sequence, Tuple, Type, Union
from django.http import FileResponse, HttpResponse
from django.shortcuts import render
from django.urls import reverse
from django.utils import timezone
from rest_framework import status
from rest_framework.authentication import BaseAuthentication, BasicAuthentication
from rest_framework.permissions import BasePermission, IsAuthenticated
from rest_framework.request import Request
from rest_framework.views import APIView
from swh.model import hashutil
from swh.scheduler.utils import create_oneshot_task_dict
from ..config import (
ARCHIVE_KEY,
ARCHIVE_TYPE,
CONT_FILE_IRI,
DEPOSIT_STATUS_DEPOSITED,
DEPOSIT_STATUS_LOAD_SUCCESS,
DEPOSIT_STATUS_PARTIAL,
EDIT_SE_IRI,
EM_IRI,
METADATA_KEY,
METADATA_TYPE,
RAW_METADATA_KEY,
STATE_IRI,
APIConfig,
)
from ..errors import (
BAD_REQUEST,
CHECKSUM_MISMATCH,
ERROR_CONTENT,
FORBIDDEN,
MAX_UPLOAD_SIZE_EXCEEDED,
MEDIATION_NOT_ALLOWED,
METHOD_NOT_ALLOWED,
NOT_FOUND,
PARSING_ERROR,
ParserError,
make_error_dict,
make_error_response,
make_error_response_from_dict,
)
from ..models import Deposit, DepositClient, DepositCollection, DepositRequest
from ..parsers import parse_xml
ACCEPT_PACKAGINGS = ["http://purl.org/net/sword/package/SimpleZip"]
ACCEPT_ARCHIVE_CONTENT_TYPES = ["application/zip", "application/x-tar"]
class AuthenticatedAPIView(APIView):
"""Mixin intended as a based API view to enforce the basic
authentication check
"""
authentication_classes: Sequence[Type[BaseAuthentication]] = (BasicAuthentication,)
permission_classes: Sequence[Type[BasePermission]] = (IsAuthenticated,)
class APIBase(APIConfig, AuthenticatedAPIView, metaclass=ABCMeta):
"""Base deposit request class sharing multiple common behaviors.
"""
def _read_headers(self, request: Request) -> Dict[str, Any]:
"""Read and unify the necessary headers from the request (those are
not stored in the same location or not properly formatted).
Args:
request (Request): Input request
Returns:
Dictionary with the following keys (some associated values may be
None):
- content-type
- content-length
- in-progress
- content-disposition
- packaging
- slug
- on-behalf-of
"""
meta = request._request.META
content_type = request.content_type
content_length = meta.get("CONTENT_LENGTH")
if content_length and isinstance(content_length, str):
content_length = int(content_length)
# final deposit if not provided
in_progress = meta.get("HTTP_IN_PROGRESS", False)
content_disposition = meta.get("HTTP_CONTENT_DISPOSITION")
if isinstance(in_progress, str):
in_progress = in_progress.lower() == "true"
content_md5sum = meta.get("HTTP_CONTENT_MD5")
if content_md5sum:
content_md5sum = bytes.fromhex(content_md5sum)
packaging = meta.get("HTTP_PACKAGING")
slug = meta.get("HTTP_SLUG")
on_behalf_of = meta.get("HTTP_ON_BEHALF_OF")
metadata_relevant = meta.get("HTTP_METADATA_RELEVANT")
+ swhid = meta.get("HTTP_X_CHECK_SWHID")
+
return {
"content-type": content_type,
"content-length": content_length,
"in-progress": in_progress,
"content-disposition": content_disposition,
"content-md5sum": content_md5sum,
"packaging": packaging,
"slug": slug,
"on-behalf-of": on_behalf_of,
"metadata-relevant": metadata_relevant,
+ "swhid": swhid,
}
def _compute_md5(self, filehandler) -> bytes:
"""Compute uploaded file's md5 sum.
Args:
filehandler (InMemoryUploadedFile): the file to compute the md5
hash
Returns:
the md5 checksum (str)
"""
h = hashlib.md5()
for chunk in filehandler:
h.update(chunk)
return h.digest()
def _deposit_put(
self,
request: Request,
deposit_id: Optional[int] = None,
in_progress: bool = False,
external_id: Optional[str] = None,
) -> Deposit:
"""Save/Update a deposit in db.
Args:
request: request data
deposit_id: deposit identifier
in_progress: deposit status
external_id: external identifier to associate to the deposit
Returns:
The Deposit instance saved or updated.
"""
complete_date: Optional[datetime.datetime] = None
deposit_parent: Optional[Deposit] = None
if in_progress is False:
complete_date = timezone.now()
status_type = DEPOSIT_STATUS_DEPOSITED
else:
status_type = DEPOSIT_STATUS_PARTIAL
if not deposit_id:
try:
# find a deposit parent (same external id, status load to success)
deposit_parent = (
Deposit.objects.filter(
external_id=external_id, status=DEPOSIT_STATUS_LOAD_SUCCESS
)
.order_by("-id")[0:1]
.get()
) # noqa
except Deposit.DoesNotExist:
# then no parent for that deposit, deposit_parent already None
pass
assert external_id is not None
deposit = Deposit(
collection=self._collection,
external_id=external_id,
complete_date=complete_date,
status=status_type,
client=self._client,
parent=deposit_parent,
)
else:
deposit = Deposit.objects.get(pk=deposit_id)
# update metadata
deposit.complete_date = complete_date
deposit.status = status_type
if self.config["checks"]:
deposit.save() # needed to have a deposit id
scheduler = self.scheduler
if deposit.status == DEPOSIT_STATUS_DEPOSITED and not deposit.check_task_id:
task = create_oneshot_task_dict(
"check-deposit",
collection=deposit.collection.name,
deposit_id=deposit.id,
)
check_task_id = scheduler.create_tasks([task])[0]["id"]
deposit.check_task_id = check_task_id
deposit.save()
return deposit
def _deposit_request_put(
self,
deposit: Deposit,
deposit_request_data: Dict[str, Any],
replace_metadata: bool = False,
replace_archives: bool = False,
- ) -> None:
+ ) -> DepositRequest:
"""Save a deposit request with metadata attached to a deposit.
Args:
deposit: The deposit concerned by the request
deposit_request_data: The dictionary with at most 2 deposit
request types (archive, metadata) to associate to the deposit
replace_metadata: Flag defining if we add or update
existing metadata to the deposit
replace_archives: Flag defining if we add or update
archives to existing deposit
Returns:
- None
+ the DepositRequest object stored in the backend
"""
if replace_metadata:
DepositRequest.objects.filter(deposit=deposit, type=METADATA_TYPE).delete()
if replace_archives:
DepositRequest.objects.filter(deposit=deposit, type=ARCHIVE_TYPE).delete()
deposit_request = None
archive_file = deposit_request_data.get(ARCHIVE_KEY)
if archive_file:
deposit_request = DepositRequest(
type=ARCHIVE_TYPE, deposit=deposit, archive=archive_file
)
deposit_request.save()
metadata = deposit_request_data.get(METADATA_KEY)
if metadata:
raw_metadata = deposit_request_data[RAW_METADATA_KEY]
deposit_request = DepositRequest(
type=METADATA_TYPE,
deposit=deposit,
metadata=metadata,
raw_metadata=raw_metadata.decode("utf-8"),
)
deposit_request.save()
assert deposit_request is not None
+ return deposit_request
def _delete_archives(self, collection_name: str, deposit_id: int) -> Dict:
"""Delete archive references from the deposit id.
"""
try:
deposit = Deposit.objects.get(pk=deposit_id)
except Deposit.DoesNotExist:
return make_error_dict(
NOT_FOUND, f"The deposit {deposit_id} does not exist"
)
DepositRequest.objects.filter(deposit=deposit, type=ARCHIVE_TYPE).delete()
return {}
def _delete_deposit(self, collection_name: str, deposit_id: int) -> Dict:
"""Delete deposit reference.
Args:
collection_name: Client's collection
deposit_id: The deposit to delete
Returns
Empty dict when ok.
Dict with error key to describe the failure.
"""
try:
deposit = Deposit.objects.get(pk=deposit_id)
except Deposit.DoesNotExist:
return make_error_dict(
NOT_FOUND, f"The deposit {deposit_id} does not exist"
)
if deposit.collection.name != collection_name:
summary = "Cannot delete a deposit from another collection"
description = "Deposit %s does not belong to the collection %s" % (
deposit_id,
collection_name,
)
return make_error_dict(
BAD_REQUEST, summary=summary, verbose_description=description
)
DepositRequest.objects.filter(deposit=deposit).delete()
deposit.delete()
return {}
def _check_preconditions_on(
self, filehandler, md5sum: str, content_length: Optional[int] = None
) -> Optional[Dict]:
"""Check preconditions on provided file are respected. That is the
length and/or the md5sum hash match the file's content.
Args:
filehandler (InMemoryUploadedFile): The file to check
md5sum: md5 hash expected from the file's content
content_length: the expected length if provided.
Returns:
Either none if no error or a dictionary with a key error
detailing the problem.
"""
max_upload_size = self.config["max_upload_size"]
if content_length:
if content_length > max_upload_size:
return make_error_dict(
MAX_UPLOAD_SIZE_EXCEEDED,
f"Upload size limit exceeded (max {max_upload_size} bytes)."
"Please consider sending the archive in multiple steps.",
)
length = filehandler.size
if length != content_length:
return make_error_dict(
status.HTTP_412_PRECONDITION_FAILED, "Wrong length"
)
if md5sum:
_md5sum = self._compute_md5(filehandler)
if _md5sum != md5sum:
return make_error_dict(
CHECKSUM_MISMATCH,
"Wrong md5 hash",
f"The checksum sent {hashutil.hash_to_hex(md5sum)} and the actual "
f"checksum {hashutil.hash_to_hex(_md5sum)} does not match.",
)
return None
def _binary_upload(
self,
request: Request,
headers: Dict[str, Any],
collection_name: str,
deposit_id: Optional[int] = None,
replace_metadata: bool = False,
replace_archives: bool = False,
) -> Dict[str, Any]:
"""Binary upload routine.
Other than such a request, a 415 response is returned.
Args:
request (Request): the request holding information to parse
and inject in db
headers (dict): request headers formatted
collection_name (str): the associated client
deposit_id (id): deposit identifier if provided
replace_metadata (bool): 'Update or add' request to existing
deposit. If False (default), this adds new metadata request to
existing ones. Otherwise, this will replace existing metadata.
replace_archives (bool): 'Update or add' request to existing
deposit. If False (default), this adds new archive request to
existing ones. Otherwise, this will replace existing archives.
ones.
Returns:
In the optimal case a dict with the following keys:
- deposit_id (int): Deposit identifier
- deposit_date (date): Deposit date
- archive: None (no archive is provided here)
Otherwise, a dictionary with the key error and the
associated failures, either:
- 400 (bad request) if the request is not providing an external
identifier
- 413 (request entity too large) if the length of the
archive exceeds the max size configured
- 412 (precondition failed) if the length or md5 hash provided
mismatch the reality of the archive
- 415 (unsupported media type) if a wrong media type is provided
"""
content_length = headers["content-length"]
if not content_length:
return make_error_dict(
BAD_REQUEST,
"CONTENT_LENGTH header is mandatory",
"For archive deposit, the CONTENT_LENGTH header must be sent.",
)
content_disposition = headers["content-disposition"]
if not content_disposition:
return make_error_dict(
BAD_REQUEST,
"CONTENT_DISPOSITION header is mandatory",
"For archive deposit, the CONTENT_DISPOSITION header must be sent.",
)
packaging = headers["packaging"]
if packaging and packaging not in ACCEPT_PACKAGINGS:
return make_error_dict(
BAD_REQUEST,
f"Only packaging {ACCEPT_PACKAGINGS} is supported",
f"The packaging provided {packaging} is not supported",
)
filehandler = request.FILES["file"]
precondition_status_response = self._check_preconditions_on(
filehandler, headers["content-md5sum"], content_length
)
if precondition_status_response:
return precondition_status_response
external_id = headers["slug"]
# actual storage of data
archive_metadata = filehandler
deposit = self._deposit_put(
request,
deposit_id=deposit_id,
in_progress=headers["in-progress"],
external_id=external_id,
)
self._deposit_request_put(
deposit,
{ARCHIVE_KEY: archive_metadata},
replace_metadata=replace_metadata,
replace_archives=replace_archives,
)
return {
"deposit_id": deposit.id,
"deposit_date": deposit.reception_date,
"status": deposit.status,
"archive": filehandler.name,
}
def _read_metadata(self, metadata_stream) -> Tuple[bytes, Dict[str, Any]]:
"""Given a metadata stream, reads the metadata and returns both the
parsed and the raw metadata.
"""
raw_metadata = metadata_stream.read()
metadata = parse_xml(raw_metadata)
return raw_metadata, metadata
def _multipart_upload(
self,
request: Request,
headers: Dict[str, Any],
collection_name: str,
deposit_id: Optional[int] = None,
replace_metadata: bool = False,
replace_archives: bool = False,
) -> Dict:
"""Multipart upload supported with exactly:
- 1 archive (zip)
- 1 atom entry
Other than such a request, a 415 response is returned.
Args:
request (Request): the request holding information to parse
and inject in db
headers: request headers formatted
collection_name: the associated client
deposit_id: deposit identifier if provided
replace_metadata: 'Update or add' request to existing
deposit. If False (default), this adds new metadata request to
existing ones. Otherwise, this will replace existing metadata.
replace_archives: 'Update or add' request to existing
deposit. If False (default), this adds new archive request to
existing ones. Otherwise, this will replace existing archives.
ones.
Returns:
In the optimal case a dict with the following keys:
- deposit_id (int): Deposit identifier
- deposit_date (date): Deposit date
- archive: None (no archive is provided here)
Otherwise, a dictionary with the key error and the
associated failures, either:
- 400 (bad request) if the request is not providing an external
identifier
- 412 (precondition failed) if the potentially md5 hash provided
mismatch the reality of the archive
- 413 (request entity too large) if the length of the
archive exceeds the max size configured
- 415 (unsupported media type) if a wrong media type is provided
"""
external_id = headers["slug"]
content_types_present = set()
data: Dict[str, Optional[Any]] = {
"application/zip": None, # expected either zip
"application/x-tar": None, # or x-tar
"application/atom+xml": None,
}
for key, value in request.FILES.items():
fh = value
content_type = fh.content_type
if content_type in content_types_present:
return make_error_dict(
ERROR_CONTENT,
"Only 1 application/zip (or application/x-tar) archive "
"and 1 atom+xml entry is supported (as per sword2.0 "
"specification)",
"You provided more than 1 application/(zip|x-tar) "
"or more than 1 application/atom+xml content-disposition "
"header in the multipart deposit",
)
content_types_present.add(content_type)
assert content_type is not None
data[content_type] = fh
if len(content_types_present) != 2:
return make_error_dict(
ERROR_CONTENT,
"You must provide both 1 application/zip (or "
"application/x-tar) and 1 atom+xml entry for multipart "
"deposit",
"You need to provide only 1 application/(zip|x-tar) "
"and 1 application/atom+xml content-disposition header "
"in the multipart deposit",
)
filehandler = data["application/zip"]
if not filehandler:
filehandler = data["application/x-tar"]
precondition_status_response = self._check_preconditions_on(
filehandler, headers["content-md5sum"]
)
if precondition_status_response:
return precondition_status_response
try:
raw_metadata, metadata = self._read_metadata(data["application/atom+xml"])
except ParserError:
return make_error_dict(
PARSING_ERROR,
"Malformed xml metadata",
"The xml received is malformed. "
"Please ensure your metadata file is correctly formatted.",
)
# actual storage of data
deposit = self._deposit_put(
request,
deposit_id=deposit_id,
in_progress=headers["in-progress"],
external_id=external_id,
)
deposit_request_data = {
ARCHIVE_KEY: filehandler,
METADATA_KEY: metadata,
RAW_METADATA_KEY: raw_metadata,
}
self._deposit_request_put(
deposit, deposit_request_data, replace_metadata, replace_archives
)
assert filehandler is not None
return {
"deposit_id": deposit.id,
"deposit_date": deposit.reception_date,
"archive": filehandler.name,
"status": deposit.status,
}
def _atom_entry(
self,
request: Request,
headers: Dict[str, Any],
collection_name: str,
deposit_id: Optional[int] = None,
replace_metadata: bool = False,
replace_archives: bool = False,
) -> Dict[str, Any]:
"""Atom entry deposit.
Args:
- request (Request): the request holding information to parse
+ request: the request holding information to parse
and inject in db
headers: request headers formatted
collection_name: the associated client
deposit_id: deposit identifier if provided
replace_metadata: 'Update or add' request to existing
deposit. If False (default), this adds new metadata request to
existing ones. Otherwise, this will replace existing metadata.
replace_archives: 'Update or add' request to existing
deposit. If False (default), this adds new archive request to
existing ones. Otherwise, this will replace existing archives.
ones.
Returns:
In the optimal case a dict with the following keys:
- deposit_id: deposit id associated to the deposit
- deposit_date: date of the deposit
- archive: None (no archive is provided here)
Otherwise, a dictionary with the key error and the
associated failures, either:
- 400 (bad request) if the request is not providing an external
identifier
- 400 (bad request) if the request's body is empty
- 415 (unsupported media type) if a wrong media type is provided
"""
try:
raw_metadata, metadata = self._read_metadata(request.data)
except ParserError:
return make_error_dict(
BAD_REQUEST,
"Malformed xml metadata",
"The xml received is malformed. "
"Please ensure your metadata file is correctly formatted.",
)
if not metadata:
return make_error_dict(
BAD_REQUEST,
"Empty body request is not supported",
"Atom entry deposit is supposed to send for metadata. "
"If the body is empty, there is no metadata.",
)
external_id = metadata.get("external_identifier", headers["slug"])
# TODO: Determine if we are in the metadata-only deposit case. If it is, then
# save deposit and deposit request typed 'metadata' and send metadata to the
# metadata storage. Otherwise, do as existing deposit.
deposit = self._deposit_put(
request,
deposit_id=deposit_id,
in_progress=headers["in-progress"],
external_id=external_id,
)
self._deposit_request_put(
deposit,
{METADATA_KEY: metadata, RAW_METADATA_KEY: raw_metadata},
replace_metadata,
replace_archives,
)
return {
"deposit_id": deposit.id,
"deposit_date": deposit.reception_date,
"archive": None,
"status": deposit.status,
}
def _empty_post(
self, request: Request, headers: Dict, collection_name: str, deposit_id: int
) -> Dict[str, Any]:
"""Empty post to finalize an empty deposit.
Args:
request: the request holding information to parse
and inject in db
headers: request headers formatted
collection_name: the associated client
deposit_id: deposit identifier
Returns:
Dictionary of result with the deposit's id, the date
it was completed and no archive.
"""
deposit = Deposit.objects.get(pk=deposit_id)
deposit.complete_date = timezone.now()
deposit.status = DEPOSIT_STATUS_DEPOSITED
deposit.save()
return {
"deposit_id": deposit_id,
"deposit_date": deposit.complete_date,
"status": deposit.status,
"archive": None,
}
def _make_iris(
self, request: Request, collection_name: str, deposit_id: int
) -> Dict[str, Any]:
"""Define the IRI endpoints
Args:
request (Request): The initial request
collection_name (str): client/collection's name
deposit_id (id): Deposit identifier
Returns:
Dictionary of keys with the iris' urls.
"""
args = [collection_name, deposit_id]
return {
iri: request.build_absolute_uri(reverse(iri, args=args))
for iri in [EM_IRI, EDIT_SE_IRI, CONT_FILE_IRI, STATE_IRI]
}
def additional_checks(
self,
request: Request,
headers: Dict[str, Any],
collection_name: str,
deposit_id: Optional[int] = None,
) -> Dict[str, Any]:
"""Permit the child class to enrich additional checks.
Returns:
dict with 'error' detailing the problem.
"""
return {}
def checks(
self, request: Request, collection_name: str, deposit_id: Optional[int] = None
) -> Dict[str, Any]:
try:
self._collection = DepositCollection.objects.get(name=collection_name)
except DepositCollection.DoesNotExist:
return make_error_dict(
NOT_FOUND, f"Unknown collection name {collection_name}"
)
assert self._collection is not None
username = request.user.username
if username: # unauthenticated request can have the username empty
try:
self._client: DepositClient = DepositClient.objects.get( # type: ignore
username=username
)
except DepositClient.DoesNotExist:
return make_error_dict(NOT_FOUND, f"Unknown client name {username}")
collection_id = self._collection.id
collections = self._client.collections
assert collections is not None
if collection_id not in collections:
return make_error_dict(
FORBIDDEN,
f"Client {username} cannot access collection {collection_name}",
)
+ headers = self._read_headers(request)
+
if deposit_id:
try:
deposit = Deposit.objects.get(pk=deposit_id)
except Deposit.DoesNotExist:
return make_error_dict(
NOT_FOUND, f"Deposit with id {deposit_id} does not exist"
)
- checks = self.restrict_access(request, deposit)
+ assert deposit is not None
+ checks = self.restrict_access(request, headers, deposit)
if checks:
return checks
- headers = self._read_headers(request)
if headers["on-behalf-of"]:
return make_error_dict(MEDIATION_NOT_ALLOWED, "Mediation is not supported.")
checks = self.additional_checks(request, headers, collection_name, deposit_id)
if "error" in checks:
return checks
return {"headers": headers}
def restrict_access(
- self, request: Request, deposit: Optional[Deposit] = None
+ self, request: Request, headers: Dict, deposit: Deposit
) -> Dict[str, Any]:
- if deposit:
- if request.method != "GET" and deposit.status != DEPOSIT_STATUS_PARTIAL:
- summary = "You can only act on deposit with status '%s'" % (
- DEPOSIT_STATUS_PARTIAL,
- )
- description = f"This deposit has status '{deposit.status}'"
- return make_error_dict(
- BAD_REQUEST, summary=summary, verbose_description=description
- )
+ """Allow modifications on deposit with status 'partial' only, reject the rest.
+
+ """
+ if request.method != "GET" and deposit.status != DEPOSIT_STATUS_PARTIAL:
+ summary = "You can only act on deposit with status '%s'" % (
+ DEPOSIT_STATUS_PARTIAL,
+ )
+ description = f"This deposit has status '{deposit.status}'"
+ return make_error_dict(
+ BAD_REQUEST, summary=summary, verbose_description=description
+ )
return {}
def _basic_not_allowed_method(self, request: Request, method: str):
return make_error_response(
request,
METHOD_NOT_ALLOWED,
f"{method} method is not supported on this endpoint",
)
def get(
self, request: Request, collection_name: str, deposit_id: int
) -> Union[HttpResponse, FileResponse]:
return self._basic_not_allowed_method(request, "GET")
def post(
self, request: Request, collection_name: str, deposit_id: Optional[int] = None
) -> HttpResponse:
return self._basic_not_allowed_method(request, "POST")
def put(
self, request: Request, collection_name: str, deposit_id: int
) -> HttpResponse:
return self._basic_not_allowed_method(request, "PUT")
def delete(
self, request: Request, collection_name: str, deposit_id: Optional[int] = None
) -> HttpResponse:
return self._basic_not_allowed_method(request, "DELETE")
class APIGet(APIBase, metaclass=ABCMeta):
"""Mixin for class to support GET method.
"""
def get(
self, request: Request, collection_name: str, deposit_id: int
) -> Union[HttpResponse, FileResponse]:
"""Endpoint to create/add resources to deposit.
Returns:
200 response when no error during routine occurred
400 if the deposit does not belong to the collection
404 if the deposit or the collection does not exist
"""
checks = self.checks(request, collection_name, deposit_id)
if "error" in checks:
return make_error_response_from_dict(request, checks["error"])
r = self.process_get(request, collection_name, deposit_id)
status, content, content_type = r
if content_type == "swh/generator":
with content as path:
return FileResponse(
open(path, "rb"), status=status, content_type="application/zip"
)
if content_type == "application/json":
return HttpResponse(
json.dumps(content), status=status, content_type=content_type
)
return HttpResponse(content, status=status, content_type=content_type)
@abstractmethod
def process_get(
self, request: Request, collection_name: str, deposit_id: int
) -> Tuple[int, Any, str]:
"""Routine to deal with the deposit's get processing.
Returns:
Tuple status, stream of content, content-type
"""
pass
class APIPost(APIBase, metaclass=ABCMeta):
"""Mixin for class to support DELETE method.
"""
def post(
self, request: Request, collection_name: str, deposit_id: Optional[int] = None
) -> HttpResponse:
"""Endpoint to create/add resources to deposit.
Returns:
204 response when no error during routine occurred.
400 if the deposit does not belong to the collection
404 if the deposit or the collection does not exist
"""
checks = self.checks(request, collection_name, deposit_id)
if "error" in checks:
return make_error_response_from_dict(request, checks["error"])
headers = checks["headers"]
_status, _iri_key, data = self.process_post(
request, headers, collection_name, deposit_id
)
error = data.get("error")
if error:
return make_error_response_from_dict(request, error)
data["packagings"] = ACCEPT_PACKAGINGS
iris = self._make_iris(request, collection_name, data["deposit_id"])
data.update(iris)
response = render(
request,
"deposit/deposit_receipt.xml",
context=data,
content_type="application/xml",
status=_status,
)
response._headers["location"] = "Location", data[_iri_key] # type: ignore
return response
@abstractmethod
def process_post(
self,
request,
headers: Dict,
collection_name: str,
deposit_id: Optional[int] = None,
) -> Tuple[int, str, Dict]:
"""Routine to deal with the deposit's processing.
Returns
Tuple of:
- response status code (200, 201, etc...)
- key iri (EM_IRI, EDIT_SE_IRI, etc...)
- dictionary of the processing result
"""
pass
class APIPut(APIBase, metaclass=ABCMeta):
"""Mixin for class to support PUT method.
"""
def put(
self, request: Request, collection_name: str, deposit_id: int
) -> HttpResponse:
"""Endpoint to update deposit resources.
Returns:
204 response when no error during routine occurred.
400 if the deposit does not belong to the collection
404 if the deposit or the collection does not exist
"""
checks = self.checks(request, collection_name, deposit_id)
if "error" in checks:
return make_error_response_from_dict(request, checks["error"])
headers = checks["headers"]
data = self.process_put(request, headers, collection_name, deposit_id)
error = data.get("error")
if error:
return make_error_response_from_dict(request, error)
return HttpResponse(status=status.HTTP_204_NO_CONTENT)
@abstractmethod
def process_put(
self, request: Request, headers: Dict, collection_name: str, deposit_id: int
) -> Dict[str, Any]:
"""Routine to deal with updating a deposit in some way.
Returns
dictionary of the processing result
"""
pass
class APIDelete(APIBase, metaclass=ABCMeta):
"""Mixin for class to support DELETE method.
"""
def delete(
self, request: Request, collection_name: str, deposit_id: Optional[int] = None
) -> HttpResponse:
"""Endpoint to delete some deposit's resources (archives, deposit).
Returns:
204 response when no error during routine occurred.
400 if the deposit does not belong to the collection
404 if the deposit or the collection does not exist
"""
checks = self.checks(request, collection_name, deposit_id)
if "error" in checks:
return make_error_response_from_dict(request, checks["error"])
assert deposit_id is not None
data = self.process_delete(request, collection_name, deposit_id)
error = data.get("error")
if error:
return make_error_response_from_dict(request, error)
return HttpResponse(status=status.HTTP_204_NO_CONTENT)
@abstractmethod
def process_delete(
self, request: Request, collection_name: str, deposit_id: int
) -> Dict:
"""Routine to delete a resource.
This is mostly not allowed except for the
EM_IRI (cf. .api.deposit_update.APIUpdateArchive)
"""
return {}
diff --git a/swh/deposit/api/deposit_update.py b/swh/deposit/api/deposit_update.py
index 07d00e92..c2e54dc7 100644
--- a/swh/deposit/api/deposit_update.py
+++ b/swh/deposit/api/deposit_update.py
@@ -1,183 +1,345 @@
# Copyright (C) 2017-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from typing import Any, Dict, Optional, Tuple
from rest_framework import status
-
-from ..config import CONT_FILE_IRI, EDIT_SE_IRI, EM_IRI
-from ..errors import BAD_REQUEST, make_error_dict
+from rest_framework.request import Request
+
+from swh.deposit.models import Deposit
+from swh.model.hashutil import hash_to_bytes
+from swh.model.identifiers import parse_swhid
+from swh.model.model import (
+ MetadataAuthority,
+ MetadataAuthorityType,
+ MetadataFetcher,
+ MetadataTargetType,
+ RawExtrinsicMetadata,
+)
+from swh.storage import get_storage
+from swh.storage.interface import StorageInterface
+
+from ..config import (
+ CONT_FILE_IRI,
+ DEPOSIT_STATUS_LOAD_SUCCESS,
+ EDIT_SE_IRI,
+ EM_IRI,
+ METADATA_KEY,
+ RAW_METADATA_KEY,
+)
+from ..errors import BAD_REQUEST, ParserError, make_error_dict
from ..parsers import (
SWHAtomEntryParser,
SWHFileUploadTarParser,
SWHFileUploadZipParser,
SWHMultiPartParser,
)
from .common import ACCEPT_ARCHIVE_CONTENT_TYPES, APIDelete, APIPost, APIPut
class APIUpdateArchive(APIPost, APIPut, APIDelete):
"""Deposit request class defining api endpoints for sword deposit.
What's known as 'EM IRI' in the sword specification.
HTTP verbs supported: PUT, POST, DELETE
"""
parser_classes = (
SWHFileUploadZipParser,
SWHFileUploadTarParser,
)
def process_put(
self, req, headers, collection_name: str, deposit_id: int
) -> Dict[str, Any]:
"""Replace existing content for the existing deposit.
source: http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_editingcontent_binary # noqa
Returns:
204 No content
"""
if req.content_type not in ACCEPT_ARCHIVE_CONTENT_TYPES:
msg = "Packaging format supported is restricted to %s" % (
", ".join(ACCEPT_ARCHIVE_CONTENT_TYPES)
)
return make_error_dict(BAD_REQUEST, msg)
return self._binary_upload(
req, headers, collection_name, deposit_id=deposit_id, replace_archives=True
)
def process_post(
self, req, headers: Dict, collection_name: str, deposit_id: Optional[int] = None
) -> Tuple[int, str, Dict]:
"""Add new content to the existing deposit.
source: http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_addingcontent_mediaresource # noqa
Returns:
201 Created
Headers: Location: [Cont-File-IRI]
Body: [optional Deposit Receipt]
"""
if req.content_type not in ACCEPT_ARCHIVE_CONTENT_TYPES:
msg = "Packaging format supported is restricted to %s" % (
", ".join(ACCEPT_ARCHIVE_CONTENT_TYPES)
)
unused = 0
return unused, "unused", make_error_dict(BAD_REQUEST, msg)
return (
status.HTTP_201_CREATED,
CONT_FILE_IRI,
self._binary_upload(req, headers, collection_name, deposit_id),
)
def process_delete(self, req, collection_name: str, deposit_id: int) -> Dict:
"""Delete content (archives) from existing deposit.
source: http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_deletingcontent # noqa
Returns:
204 Created
"""
return self._delete_archives(collection_name, deposit_id)
class APIUpdateMetadata(APIPost, APIPut, APIDelete):
"""Deposit request class defining api endpoints for sword deposit.
What's known as 'Edit IRI' (and SE IRI) in the sword specification.
HTTP verbs supported: POST (SE IRI), PUT (Edit IRI), DELETE
"""
parser_classes = (SWHMultiPartParser, SWHAtomEntryParser)
+ def __init__(self):
+ super().__init__()
+ self.storage_metadata: StorageInterface = get_storage(
+ **self.config["storage_metadata"]
+ )
+
+ def restrict_access(
+ self, request: Request, headers: Dict, deposit: Deposit
+ ) -> Dict[str, Any]:
+ """Relax restriction access to allow metadata update on deposit with status "done" when
+ a swhid is provided.
+
+ """
+ if (
+ request.method == "PUT"
+ and headers["swhid"] is not None
+ and deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS
+ ):
+ # Allow metadata update on deposit with status "done" when swhid provided
+ return {}
+ # otherwise, let the standard access restriction check occur
+ return super().restrict_access(request, headers, deposit)
+
def process_put(
- self, req, headers: Dict, collection_name: str, deposit_id: int
+ self, request, headers: Dict, collection_name: str, deposit_id: int
) -> Dict[str, Any]:
- """Replace existing deposit's metadata/archive with new ones.
+ """This allows the following scenarios:
+
+ - multipart: replace all the deposit (status partial) metadata and archive
+ with the provided ones.
+ - atom: replace all the deposit (status partial) metadata with the
+ provided ones.
+ - with swhid, atom: Add new metatada to deposit (status done) with provided ones
+ and push such metadata to the metadata storage directly.
source:
- http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_editingcontent_metadata
- http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_editingcontent_multipart
+ Raises:
+ 400 if any of the following occur:
+ - the swhid provided and the deposit swhid do not match
+ - the provided metadata xml file is malformed
+ - the provided xml atom entry is empty
+ - the provided swhid does not exist in the archive
+
Returns:
204 No content
""" # noqa
- if req.content_type.startswith("multipart/"):
- return self._multipart_upload(
- req,
+ swhid = headers.get("swhid")
+ if swhid is None:
+ if request.content_type.startswith("multipart/"):
+ return self._multipart_upload(
+ request,
+ headers,
+ collection_name,
+ deposit_id=deposit_id,
+ replace_archives=True,
+ replace_metadata=True,
+ )
+ # standard metadata update (replace all metadata already provided to the
+ # deposit by the new ones)
+ return self._atom_entry(
+ request,
headers,
collection_name,
deposit_id=deposit_id,
- replace_archives=True,
replace_metadata=True,
)
- return self._atom_entry(
- req, headers, collection_name, deposit_id=deposit_id, replace_metadata=True
+
+ # Update metadata on a deposit already ingested
+ # Write to the metadata storage (and the deposit backend)
+ # no ingestion triggered
+
+ deposit = Deposit.objects.get(pk=deposit_id)
+ assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS
+
+ if swhid != deposit.swh_id:
+ return make_error_dict(
+ BAD_REQUEST,
+ f"Mismatched provided SWHID {swhid} with deposit's {deposit.swh_id}.",
+ "The provided SWHID does not match the deposit to update. "
+ "Please ensure you send the correct deposit SWHID.",
+ )
+
+ try:
+ raw_metadata, metadata = self._read_metadata(request.data)
+ except ParserError:
+ return make_error_dict(
+ BAD_REQUEST,
+ "Malformed xml metadata",
+ "The xml received is malformed. "
+ "Please ensure your metadata file is correctly formatted.",
+ )
+
+ if not metadata:
+ return make_error_dict(
+ BAD_REQUEST,
+ "Empty body request is not supported",
+ "Atom entry deposit is supposed to send for metadata. "
+ "If the body is empty, there is no metadata.",
+ )
+
+ metadata_authority = MetadataAuthority(
+ type=MetadataAuthorityType.DEPOSIT_CLIENT,
+ url=deposit.client.provider_url,
+ metadata={"name": deposit.client.last_name},
)
+ metadata_fetcher = MetadataFetcher(
+ name=self.tool["name"],
+ version=self.tool["version"],
+ metadata=self.tool["configuration"],
+ )
+
+ deposit_swhid = parse_swhid(swhid)
+
+ directory_id = hash_to_bytes(deposit_swhid.object_id)
+
+ # check the swhid exists in the archive
+ directories_missing = list(
+ self.storage_metadata.directory_missing([directory_id])
+ )
+ if len(directories_missing) > 0:
+ return make_error_dict(
+ BAD_REQUEST,
+ f"Unknown directory SWHID {swhid} reference",
+ "The SWHID provided is not a known directory SWHID in SWH archive. "
+ "Please provide an existing SWHID.",
+ )
+
+ # replace metadata within the deposit backend
+ deposit_request_data = {
+ METADATA_KEY: metadata,
+ RAW_METADATA_KEY: raw_metadata,
+ }
+
+ # actually add the metadata to the completed deposit
+ deposit_request = self._deposit_request_put(deposit, deposit_request_data)
+ # store that metadata to the metadata storage
+ metadata_object = RawExtrinsicMetadata(
+ type=MetadataTargetType.DIRECTORY,
+ id=deposit_swhid,
+ discovery_date=deposit_request.date,
+ authority=metadata_authority,
+ fetcher=metadata_fetcher,
+ format="sword-v2-atom-codemeta",
+ metadata=raw_metadata,
+ )
+
+ # write to metadata storage
+ self.storage_metadata.metadata_authority_add([metadata_authority])
+ self.storage_metadata.metadata_fetcher_add([metadata_fetcher])
+ self.storage_metadata.raw_extrinsic_metadata_add([metadata_object])
+
+ return {
+ "deposit_id": deposit_id,
+ "deposit_date": deposit_request.date,
+ "status": deposit.status,
+ "archive": None,
+ }
+
def process_post(
self,
request,
headers: Dict,
collection_name: str,
deposit_id: Optional[int] = None,
) -> Tuple[int, str, Dict]:
"""Add new metadata/archive to existing deposit.
+ This allows the following scenarios to occur:
+
+ - multipart: Add new metadata and archive to a deposit in status partial with
+ the provided ones.
+
+ - empty atom: Allows to finalize a deposit in status partial (transition to
+ deposited).
+
source:
- http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_addingcontent_metadata
- http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_addingcontent_multipart
-
- This also deals with an empty post corner case to finalize a
- deposit.
+ - http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#continueddeposit_complete
Returns:
In optimal case for a multipart and atom-entry update, a
201 Created response. The body response will hold a
deposit. And the response headers will contain an entry
'Location' with the EM-IRI.
For the empty post case, this returns a 200.
""" # noqa
assert deposit_id is not None
if request.content_type.startswith("multipart/"):
data = self._multipart_upload(
request, headers, collection_name, deposit_id=deposit_id
)
return (status.HTTP_201_CREATED, EM_IRI, data)
content_length = headers["content-length"] or 0
if content_length == 0 and headers["in-progress"] is False:
# check for final empty post
- # source: http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html
- # #continueddeposit_complete
data = self._empty_post(request, headers, collection_name, deposit_id)
return (status.HTTP_200_OK, EDIT_SE_IRI, data)
data = self._atom_entry(
request, headers, collection_name, deposit_id=deposit_id
)
return (status.HTTP_201_CREATED, EM_IRI, data)
def process_delete(self, req, collection_name: str, deposit_id: int) -> Dict:
"""Delete the container (deposit).
source: http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html#protocoloperations_deleteconteiner # noqa
"""
return self._delete_deposit(collection_name, deposit_id)
diff --git a/swh/deposit/api/private/deposit_read.py b/swh/deposit/api/private/deposit_read.py
index 51b6636e..36124045 100644
--- a/swh/deposit/api/private/deposit_read.py
+++ b/swh/deposit/api/private/deposit_read.py
@@ -1,195 +1,188 @@
# Copyright (C) 2017-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from contextlib import contextmanager
import os
import shutil
import tempfile
from typing import Any, Dict, Tuple
from rest_framework import status
from swh.core import tarball
-from swh.deposit.api import __version__
from swh.deposit.utils import normalize_date
from swh.model import identifiers
+from swh.model.model import MetadataAuthorityType
from . import APIPrivateView, DepositReadMixin
from ...config import ARCHIVE_TYPE, SWH_PERSON
from ...models import Deposit
from ..common import APIGet
@contextmanager
def aggregate_tarballs(extraction_dir, archive_paths):
"""Aggregate multiple tarballs into one and returns this new archive's
path.
Args:
extraction_dir (path): Path to use for the tarballs computation
archive_paths ([str]): Deposit's archive paths
Returns:
Tuple (directory to clean up, archive path (aggregated or not))
"""
# rebuild one zip archive from (possibly) multiple ones
os.makedirs(extraction_dir, 0o755, exist_ok=True)
dir_path = tempfile.mkdtemp(prefix="swh.deposit-", dir=extraction_dir)
# root folder to build an aggregated tarball
aggregated_tarball_rootdir = os.path.join(dir_path, "aggregate")
os.makedirs(aggregated_tarball_rootdir, 0o755, exist_ok=True)
# uncompress in a temporary location all archives
for archive_path in archive_paths:
tarball.uncompress(archive_path, aggregated_tarball_rootdir)
# Aggregate into one big tarball the multiple smaller ones
temp_tarpath = shutil.make_archive(
aggregated_tarball_rootdir, "zip", aggregated_tarball_rootdir
)
# can already clean up temporary directory
shutil.rmtree(aggregated_tarball_rootdir)
try:
yield temp_tarpath
finally:
shutil.rmtree(dir_path)
class APIReadArchives(APIPrivateView, APIGet, DepositReadMixin):
"""Dedicated class to read a deposit's raw archives content.
Only GET is supported.
"""
def __init__(self):
super().__init__()
self.extraction_dir = self.config["extraction_dir"]
if not os.path.exists(self.extraction_dir):
os.makedirs(self.extraction_dir)
def process_get(
self, request, collection_name: str, deposit_id: int
) -> Tuple[int, Any, str]:
"""Build a unique tarball from the multiple received and stream that
content to the client.
Args:
request (Request):
collection_name: Collection owning the deposit
deposit_id: Deposit concerned by the reading
Returns:
Tuple status, stream of content, content-type
"""
archive_paths = [
r.archive.path
for r in self._deposit_requests(deposit_id, request_type=ARCHIVE_TYPE)
]
return (
status.HTTP_200_OK,
aggregate_tarballs(self.extraction_dir, archive_paths),
"swh/generator",
)
class APIReadMetadata(APIPrivateView, APIGet, DepositReadMixin):
"""Class in charge of aggregating metadata on a deposit.
"""
- def __init__(self):
- super().__init__()
- self.provider = self.config["provider"]
- self.tool = {
- "name": "swh-deposit",
- "version": __version__,
- "configuration": {"sword_version": "2"},
- }
-
def _normalize_dates(self, deposit, metadata):
"""Normalize the date to use as a tuple of author date, committer date
from the incoming metadata.
Args:
deposit (Deposit): Deposit model representation
metadata (Dict): Metadata dict representation
Returns:
Tuple of author date, committer date. Those dates are
swh normalized.
"""
commit_date = metadata.get("codemeta:datePublished")
author_date = metadata.get("codemeta:dateCreated")
if author_date and commit_date:
pass
elif commit_date:
author_date = commit_date
elif author_date:
commit_date = author_date
else:
author_date = deposit.complete_date
commit_date = deposit.complete_date
return (normalize_date(author_date), normalize_date(commit_date))
def metadata_read(self, deposit):
"""Read and aggregate multiple data on deposit into one unified data
dictionary.
Args:
deposit (Deposit): Deposit concerned by the data aggregation.
Returns:
Dictionary of data representing the deposit to inject in swh.
"""
metadata = self._metadata_get(deposit)
# Read information metadata
data = {"origin": {"type": "deposit", "url": deposit.origin_url,}}
- # metadata provider
- self.provider["provider_name"] = deposit.client.last_name
- self.provider["provider_url"] = deposit.client.provider_url
-
author_date, commit_date = self._normalize_dates(deposit, metadata)
if deposit.parent:
swh_persistent_id = deposit.parent.swh_id
swhid = identifiers.parse_swhid(swh_persistent_id)
parent_revision = swhid.object_id
parents = [parent_revision]
else:
parents = []
data["origin_metadata"] = {
- "provider": self.provider,
+ # metadata provider
+ "provider": {
+ "provider_name": deposit.client.last_name,
+ "provider_url": deposit.client.provider_url,
+ "provider_type": MetadataAuthorityType.DEPOSIT_CLIENT.value,
+ "metadata": {},
+ },
"tool": self.tool,
"metadata": metadata,
}
data["deposit"] = {
"id": deposit.id,
"client": deposit.client.username,
"collection": deposit.collection.name,
"author": SWH_PERSON,
"author_date": author_date,
"committer": SWH_PERSON,
"committer_date": commit_date,
"revision_parents": parents,
}
return data
def process_get(
self, request, collection_name: str, deposit_id: int
) -> Tuple[int, Dict, str]:
deposit = Deposit.objects.get(pk=deposit_id)
data = self.metadata_read(deposit)
return status.HTTP_200_OK, data if data else {}, "application/json"
diff --git a/swh/deposit/config.py b/swh/deposit/config.py
index 16221dfd..e67dd030 100644
--- a/swh/deposit/config.py
+++ b/swh/deposit/config.py
@@ -1,99 +1,105 @@
# Copyright (C) 2017-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import os
from typing import Any, Dict
from swh.core import config
+from swh.deposit import __version__
from swh.scheduler import get_scheduler
from swh.scheduler.interface import SchedulerInterface
# IRIs (Internationalized Resource identifier) sword 2.0 specified
EDIT_SE_IRI = "edit_se_iri"
EM_IRI = "em_iri"
CONT_FILE_IRI = "cont_file_iri"
SD_IRI = "servicedocument"
COL_IRI = "upload"
STATE_IRI = "state_iri"
PRIVATE_GET_RAW_CONTENT = "private-download"
PRIVATE_CHECK_DEPOSIT = "check-deposit"
PRIVATE_PUT_DEPOSIT = "private-update"
PRIVATE_GET_DEPOSIT_METADATA = "private-read"
PRIVATE_LIST_DEPOSITS = "private-deposit-list"
ARCHIVE_KEY = "archive"
METADATA_KEY = "metadata"
RAW_METADATA_KEY = "raw-metadata"
ARCHIVE_TYPE = "archive"
METADATA_TYPE = "metadata"
AUTHORIZED_PLATFORMS = ["development", "production", "testing"]
DEPOSIT_STATUS_REJECTED = "rejected"
DEPOSIT_STATUS_PARTIAL = "partial"
DEPOSIT_STATUS_DEPOSITED = "deposited"
DEPOSIT_STATUS_VERIFIED = "verified"
DEPOSIT_STATUS_LOAD_SUCCESS = "done"
DEPOSIT_STATUS_LOAD_FAILURE = "failed"
# Revision author for deposit
SWH_PERSON = {
"name": "Software Heritage",
"fullname": "Software Heritage",
"email": "robot@softwareheritage.org",
}
DEFAULT_CONFIG = {
"max_upload_size": 209715200,
"checks": True,
}
def setup_django_for(platform=None, config_file=None):
"""Setup function for command line tools (swh.deposit.create_user) to
initialize the needed db access.
Note:
Do not import any django related module prior to this function
call. Otherwise, this will raise an
django.core.exceptions.ImproperlyConfigured error message.
Args:
platform (str): the platform the scheduling is running
config_file (str): Extra configuration file (typically for the
production platform)
Raises:
ValueError in case of wrong platform inputs.
"""
if platform is not None:
if platform not in AUTHORIZED_PLATFORMS:
raise ValueError("Platform should be one of %s" % AUTHORIZED_PLATFORMS)
if "DJANGO_SETTINGS_MODULE" not in os.environ:
os.environ["DJANGO_SETTINGS_MODULE"] = "swh.deposit.settings.%s" % platform
if config_file:
os.environ.setdefault("SWH_CONFIG_FILENAME", config_file)
import django
django.setup()
class APIConfig:
"""API Configuration centralized class. This loads explicitly the configuration file out
of the SWH_CONFIG_FILENAME environment variable.
"""
def __init__(self):
config_file = os.environ["SWH_CONFIG_FILENAME"]
conf = config.read_raw_config(config.config_basepath(config_file))
self.config: Dict[str, Any] = config.merge_configs(DEFAULT_CONFIG, conf)
self.scheduler: SchedulerInterface = get_scheduler(**self.config["scheduler"])
+ self.tool = {
+ "name": "swh-deposit",
+ "version": __version__,
+ "configuration": {"sword_version": "2"},
+ }
diff --git a/swh/deposit/tests/api/test_deposit_atom.py b/swh/deposit/tests/api/test_deposit_atom.py
index 2214b551..f551a317 100644
--- a/swh/deposit/tests/api/test_deposit_atom.py
+++ b/swh/deposit/tests/api/test_deposit_atom.py
@@ -1,328 +1,326 @@
# Copyright (C) 2017-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from io import BytesIO
from django.urls import reverse
import pytest
from rest_framework import status
from swh.deposit.config import COL_IRI, DEPOSIT_STATUS_DEPOSITED
from swh.deposit.models import Deposit, DepositCollection, DepositRequest
from swh.deposit.parsers import parse_xml
def test_post_deposit_atom_201_even_with_decimal(
authenticated_client, deposit_collection, atom_dataset
):
"""Posting an initial atom entry should return 201 with deposit receipt
"""
atom_error_with_decimal = atom_dataset["error-with-decimal"]
response = authenticated_client.post(
reverse(COL_IRI, args=[deposit_collection.name]),
content_type="application/atom+xml;type=entry",
data=atom_error_with_decimal,
HTTP_SLUG="external-id",
HTTP_IN_PROGRESS="false",
)
# then
assert response.status_code == status.HTTP_201_CREATED
response_content = parse_xml(BytesIO(response.content))
deposit_id = response_content["deposit_id"]
deposit = Deposit.objects.get(pk=deposit_id)
dr = DepositRequest.objects.get(deposit=deposit)
assert dr.metadata is not None
sw_version = dr.metadata.get("codemeta:softwareVersion")
assert sw_version == "10.4"
def test_post_deposit_atom_400_with_empty_body(
authenticated_client, deposit_collection, atom_dataset
):
"""Posting empty body request should return a 400 response
"""
response = authenticated_client.post(
reverse(COL_IRI, args=[deposit_collection.name]),
content_type="application/atom+xml;type=entry",
data=atom_dataset["entry-data-empty-body"],
- HTTP_SLUG="something",
)
assert response.status_code == status.HTTP_400_BAD_REQUEST
- assert b"Empty body request is not supported" in response.content
def test_post_deposit_atom_400_badly_formatted_atom(
authenticated_client, deposit_collection, atom_dataset
):
"""Posting a badly formatted atom should return a 400 response
"""
response = authenticated_client.post(
reverse(COL_IRI, args=[deposit_collection.name]),
content_type="application/atom+xml;type=entry",
data=atom_dataset["entry-data-badly-formatted"],
)
assert response.status_code == status.HTTP_400_BAD_REQUEST
def test_post_deposit_atom_parsing_error(
authenticated_client, deposit_collection, atom_dataset
):
"""Posting parsing error prone atom should return 400
"""
response = authenticated_client.post(
reverse(COL_IRI, args=[deposit_collection.name]),
content_type="application/atom+xml;type=entry",
data=atom_dataset["entry-data-parsing-error-prone"],
)
assert response.status_code == status.HTTP_400_BAD_REQUEST
def test_post_deposit_atom_no_slug_header(
authenticated_client, deposit_collection, atom_dataset
):
"""Posting an atom entry without a slug header should return a 400
"""
url = reverse(COL_IRI, args=[deposit_collection.name])
# when
response = authenticated_client.post(
url,
content_type="application/atom+xml;type=entry",
data=atom_dataset["entry-data0"],
# + headers
HTTP_IN_PROGRESS="false",
)
assert b"Missing SLUG header" in response.content
assert response.status_code == status.HTTP_400_BAD_REQUEST
def test_post_deposit_atom_unknown_collection(authenticated_client, atom_dataset):
"""Posting an atom entry to an unknown collection should return a 404
"""
unknown_collection = "unknown-one"
with pytest.raises(DepositCollection.DoesNotExist):
DepositCollection.objects.get(name=unknown_collection)
response = authenticated_client.post(
reverse(COL_IRI, args=[unknown_collection]), # <- unknown collection
content_type="application/atom+xml;type=entry",
data=atom_dataset["entry-data0"],
HTTP_SLUG="something",
)
assert response.status_code == status.HTTP_404_NOT_FOUND
def test_post_deposit_atom_entry_initial(
authenticated_client, deposit_collection, atom_dataset
):
"""Posting an initial atom entry should return 201 with deposit receipt
"""
# given
external_id = "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a"
with pytest.raises(Deposit.DoesNotExist):
Deposit.objects.get(external_id=external_id)
atom_entry_data = atom_dataset["entry-data0"] % external_id
# when
response = authenticated_client.post(
reverse(COL_IRI, args=[deposit_collection.name]),
content_type="application/atom+xml;type=entry",
data=atom_entry_data,
HTTP_SLUG=external_id,
HTTP_IN_PROGRESS="false",
)
# then
assert response.status_code == status.HTTP_201_CREATED
response_content = parse_xml(BytesIO(response.content))
deposit_id = response_content["deposit_id"]
deposit = Deposit.objects.get(pk=deposit_id)
assert deposit.collection == deposit_collection
assert deposit.external_id == external_id
assert deposit.status == DEPOSIT_STATUS_DEPOSITED
# one associated request to a deposit
deposit_request = DepositRequest.objects.get(deposit=deposit)
assert deposit_request.metadata is not None
assert deposit_request.raw_metadata == atom_entry_data
assert bool(deposit_request.archive) is False
def test_post_deposit_atom_entry_with_codemeta(
authenticated_client, deposit_collection, atom_dataset
):
"""Posting an initial atom entry should return 201 with deposit receipt
"""
# given
external_id = "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a"
with pytest.raises(Deposit.DoesNotExist):
Deposit.objects.get(external_id=external_id)
atom_entry_data = atom_dataset["codemeta-sample"] % external_id
# when
response = authenticated_client.post(
reverse(COL_IRI, args=[deposit_collection.name]),
content_type="application/atom+xml;type=entry",
data=atom_entry_data,
HTTP_SLUG=external_id,
HTTP_IN_PROGRESS="false",
)
# then
assert response.status_code == status.HTTP_201_CREATED
response_content = parse_xml(BytesIO(response.content))
deposit_id = response_content["deposit_id"]
deposit = Deposit.objects.get(pk=deposit_id)
assert deposit.collection == deposit_collection
assert deposit.external_id == external_id
assert deposit.status == DEPOSIT_STATUS_DEPOSITED
# one associated request to a deposit
deposit_request = DepositRequest.objects.get(deposit=deposit)
assert deposit_request.metadata is not None
assert deposit_request.raw_metadata == atom_entry_data
assert bool(deposit_request.archive) is False
def test_post_deposit_atom_entry_tei(
authenticated_client, deposit_collection, atom_dataset
):
"""Posting initial atom entry as TEI should return 201 with receipt
"""
# given
external_id = "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a"
with pytest.raises(Deposit.DoesNotExist):
Deposit.objects.get(external_id=external_id)
atom_entry_data = atom_dataset["tei-sample"]
# when
response = authenticated_client.post(
reverse(COL_IRI, args=[deposit_collection.name]),
content_type="application/atom+xml;type=entry",
data=atom_entry_data,
HTTP_SLUG=external_id,
HTTP_IN_PROGRESS="false",
)
# then
assert response.status_code == status.HTTP_201_CREATED
response_content = parse_xml(BytesIO(response.content))
deposit_id = response_content["deposit_id"]
deposit = Deposit.objects.get(pk=deposit_id)
assert deposit.collection == deposit_collection
assert deposit.external_id == external_id
assert deposit.status == DEPOSIT_STATUS_DEPOSITED
# one associated request to a deposit
deposit_request = DepositRequest.objects.get(deposit=deposit)
assert deposit_request.metadata is not None
assert deposit_request.raw_metadata == atom_entry_data
assert bool(deposit_request.archive) is False
def test_post_deposit_atom_entry_multiple_steps(
authenticated_client, deposit_collection, atom_dataset
):
"""After initial deposit, updating a deposit should return a 201
"""
# given
external_id = "urn:uuid:2225c695-cfb8-4ebb-aaaa-80da344efa6a"
with pytest.raises(Deposit.DoesNotExist):
deposit = Deposit.objects.get(external_id=external_id)
# when
response = authenticated_client.post(
reverse(COL_IRI, args=[deposit_collection.name]),
content_type="application/atom+xml;type=entry",
data=atom_dataset["entry-data1"],
HTTP_IN_PROGRESS="True",
HTTP_SLUG=external_id,
)
# then
assert response.status_code == status.HTTP_201_CREATED
response_content = parse_xml(BytesIO(response.content))
deposit_id = int(response_content["deposit_id"])
deposit = Deposit.objects.get(pk=deposit_id)
assert deposit.collection == deposit_collection
assert deposit.external_id == external_id
assert deposit.status == "partial"
# one associated request to a deposit
deposit_requests = DepositRequest.objects.filter(deposit=deposit)
assert len(deposit_requests) == 1
atom_entry_data = atom_dataset["entry-data-minimal"] % external_id.encode(
"utf-8"
) # noqa
update_uri = response._headers["location"][1]
# when updating the first deposit post
response = authenticated_client.post(
update_uri,
content_type="application/atom+xml;type=entry",
data=atom_entry_data,
HTTP_IN_PROGRESS="False",
)
# then
assert response.status_code == status.HTTP_201_CREATED
response_content = parse_xml(BytesIO(response.content))
deposit_id = int(response_content["deposit_id"])
deposit = Deposit.objects.get(pk=deposit_id)
assert deposit.collection == deposit_collection
assert deposit.external_id == external_id
assert deposit.status == DEPOSIT_STATUS_DEPOSITED
assert len(Deposit.objects.all()) == 1
# now 2 associated requests to a same deposit
deposit_requests = DepositRequest.objects.filter(deposit=deposit).order_by("id")
assert len(deposit_requests) == 2
atom_entry_data1 = atom_dataset["entry-data1"]
expected_meta = [
{"metadata": parse_xml(atom_entry_data1), "raw_metadata": atom_entry_data1},
{"metadata": parse_xml(atom_entry_data), "raw_metadata": atom_entry_data},
]
for i, deposit_request in enumerate(deposit_requests):
actual_metadata = deposit_request.metadata
assert actual_metadata == expected_meta[i]["metadata"]
assert deposit_request.raw_metadata == expected_meta[i]["raw_metadata"]
assert bool(deposit_request.archive) is False
diff --git a/swh/deposit/tests/api/test_deposit_private_read_metadata.py b/swh/deposit/tests/api/test_deposit_private_read_metadata.py
index ec62dc73..76d1efde 100644
--- a/swh/deposit/tests/api/test_deposit_private_read_metadata.py
+++ b/swh/deposit/tests/api/test_deposit_private_read_metadata.py
@@ -1,551 +1,551 @@
# Copyright (C) 2017-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from django.urls import reverse
from rest_framework import status
-from swh.deposit.api import __version__
+from swh.deposit import __version__
from swh.deposit.config import EDIT_SE_IRI, PRIVATE_GET_DEPOSIT_METADATA, SWH_PERSON
from swh.deposit.models import Deposit
PRIVATE_GET_DEPOSIT_METADATA_NC = PRIVATE_GET_DEPOSIT_METADATA + "-nc"
def private_get_raw_url_endpoints(collection, deposit):
"""There are 2 endpoints to check (one with collection, one without)"""
deposit_id = deposit if isinstance(deposit, int) else deposit.id
return [
reverse(PRIVATE_GET_DEPOSIT_METADATA, args=[collection.name, deposit_id]),
reverse(PRIVATE_GET_DEPOSIT_METADATA_NC, args=[deposit_id]),
]
def update_deposit(authenticated_client, collection, deposit, atom_dataset):
for atom_data in ["entry-data2", "entry-data3"]:
update_deposit_with_metadata(
authenticated_client, collection, deposit, atom_dataset[atom_data]
)
return deposit
def update_deposit_with_metadata(authenticated_client, collection, deposit, metadata):
# update deposit's metadata
response = authenticated_client.post(
reverse(EDIT_SE_IRI, args=[collection.name, deposit.id]),
content_type="application/atom+xml;type=entry",
data=metadata,
HTTP_SLUG=deposit.external_id,
HTTP_IN_PROGRESS=True,
)
assert response.status_code == status.HTTP_201_CREATED
return deposit
def test_read_metadata(
authenticated_client, deposit_collection, partial_deposit, atom_dataset
):
"""Private metadata read api to existing deposit should return metadata
"""
deposit = partial_deposit
deposit.external_id = "some-external-id"
deposit.save()
deposit = update_deposit(
authenticated_client, deposit_collection, deposit, atom_dataset
)
for url in private_get_raw_url_endpoints(deposit_collection, deposit):
response = authenticated_client.get(url)
assert response.status_code == status.HTTP_200_OK
assert response._headers["content-type"][1] == "application/json"
data = response.json()
expected_meta = {
"origin": {
"type": "deposit",
"url": "https://hal-test.archives-ouvertes.fr/some-external-id",
},
"origin_metadata": {
"metadata": {
"@xmlns": ["http://www.w3.org/2005/Atom"],
"author": ["some awesome author", "another one", "no one"],
"codemeta:dateCreated": "2017-10-07T15:17:08Z",
"external_identifier": "some-external-id",
"url": "https://hal-test.archives-ouvertes.fr/some-external-id", # noqa
},
"provider": {
"metadata": {},
"provider_name": "",
"provider_type": "deposit_client",
"provider_url": "https://hal-test.archives-ouvertes.fr/",
},
"tool": {
"configuration": {"sword_version": "2"},
"name": "swh-deposit",
"version": __version__,
},
},
"deposit": {
"author": SWH_PERSON,
"committer": SWH_PERSON,
"committer_date": {
"negative_utc": False,
"offset": 0,
"timestamp": {"microseconds": 0, "seconds": 1507389428},
},
"author_date": {
"negative_utc": False,
"offset": 0,
"timestamp": {"microseconds": 0, "seconds": 1507389428},
},
"client": "test",
"id": deposit.id,
"collection": "test",
"revision_parents": [],
},
}
assert data == expected_meta
def test_read_metadata_revision_with_parent(
authenticated_client, deposit_collection, partial_deposit, atom_dataset
):
"""Private read metadata to a deposit (with parent) returns metadata
"""
deposit = partial_deposit
deposit.external_id = "some-external-id"
deposit.save()
deposit = update_deposit(
authenticated_client, deposit_collection, deposit, atom_dataset
)
rev_id = "da78a9d4cf1d5d29873693fd496142e3a18c20fa"
swh_id = "swh:1:rev:%s" % rev_id
fake_parent = Deposit(
swh_id=swh_id, client=deposit.client, collection=deposit.collection
)
fake_parent.save()
deposit.parent = fake_parent
deposit.save()
for url in private_get_raw_url_endpoints(deposit_collection, deposit):
response = authenticated_client.get(url)
assert response.status_code == status.HTTP_200_OK
assert response._headers["content-type"][1] == "application/json"
data = response.json()
expected_meta = {
"origin": {
"type": "deposit",
"url": "https://hal-test.archives-ouvertes.fr/some-external-id",
},
"origin_metadata": {
"metadata": {
"@xmlns": ["http://www.w3.org/2005/Atom"],
"author": ["some awesome author", "another one", "no one"],
"codemeta:dateCreated": "2017-10-07T15:17:08Z",
"external_identifier": "some-external-id",
"url": "https://hal-test.archives-ouvertes.fr/some-external-id", # noqa
},
"provider": {
"metadata": {},
"provider_name": "",
"provider_type": "deposit_client",
"provider_url": "https://hal-test.archives-ouvertes.fr/",
},
"tool": {
"configuration": {"sword_version": "2"},
"name": "swh-deposit",
"version": __version__,
},
},
"deposit": {
"author": SWH_PERSON,
"committer": SWH_PERSON,
"committer_date": {
"negative_utc": False,
"offset": 0,
"timestamp": {"microseconds": 0, "seconds": 1507389428},
},
"author_date": {
"negative_utc": False,
"offset": 0,
"timestamp": {"microseconds": 0, "seconds": 1507389428},
},
"client": "test",
"id": deposit.id,
"collection": "test",
"revision_parents": [rev_id],
},
}
assert data == expected_meta
def test_read_metadata_3(
authenticated_client, deposit_collection, partial_deposit, atom_dataset
):
"""date(Created|Published) provided, uses author/committer date
"""
deposit = partial_deposit
deposit.external_id = "hal-01243065"
deposit.save()
deposit = update_deposit(
authenticated_client, deposit_collection, deposit, atom_dataset
)
# add metadata to the deposit with datePublished and dateCreated
codemeta_entry_data = (
atom_dataset["metadata"]
% """
2015-04-06T17:08:47+02:00
2017-05-03T16:08:47+02:00
"""
)
update_deposit_with_metadata(
authenticated_client, deposit_collection, deposit, codemeta_entry_data
)
for url in private_get_raw_url_endpoints(deposit_collection, deposit):
response = authenticated_client.get(url)
assert response.status_code == status.HTTP_200_OK
assert response._headers["content-type"][1] == "application/json"
data = response.json()
metadata = {
"@xmlns": ["http://www.w3.org/2005/Atom"],
"@xmlns:codemeta": "https://doi.org/10.5063/SCHEMA/CODEMETA-2.0",
"author": [
"some awesome author",
"another one",
"no one",
{"email": "hal@ccsd.cnrs.fr", "name": "HAL"},
],
"client": "hal",
"codemeta:applicationCategory": "test",
"codemeta:author": {"codemeta:name": "Morane Gruenpeter"},
"codemeta:dateCreated": [
"2017-10-07T15:17:08Z",
"2015-04-06T17:08:47+02:00",
],
"codemeta:datePublished": "2017-05-03T16:08:47+02:00",
"codemeta:description": "this is the description",
"codemeta:developmentStatus": "stable",
"codemeta:keywords": "DSP programming",
"codemeta:license": [
{"codemeta:name": "GNU General Public License v3.0 only"},
{
"codemeta:name": "CeCILL "
"Free "
"Software "
"License "
"Agreement "
"v1.1"
},
],
"codemeta:programmingLanguage": ["php", "python", "C"],
"codemeta:runtimePlatform": "phpstorm",
"codemeta:url": "https://hal-test.archives-ouvertes.fr/hal-01243065", # noqa
"codemeta:version": "1",
"external_identifier": ["some-external-id", "hal-01243065"],
"id": "hal-01243065",
"title": "Composing a Web of Audio Applications",
"url": "https://hal-test.archives-ouvertes.fr/some-external-id",
}
expected_meta = {
"origin": {
"type": "deposit",
"url": "https://hal-test.archives-ouvertes.fr/hal-01243065",
},
"origin_metadata": {
"metadata": metadata,
"provider": {
"metadata": {},
"provider_name": "",
"provider_type": "deposit_client",
"provider_url": "https://hal-test.archives-ouvertes.fr/",
},
"tool": {
"configuration": {"sword_version": "2"},
"name": "swh-deposit",
"version": __version__,
},
},
"deposit": {
"author": SWH_PERSON,
"committer": SWH_PERSON,
"committer_date": {
"negative_utc": False,
"offset": 120,
"timestamp": {"microseconds": 0, "seconds": 1493820527},
},
"author_date": {
"negative_utc": False,
"offset": 0,
"timestamp": {"microseconds": 0, "seconds": 1507389428},
},
"client": deposit_collection.name,
"id": deposit.id,
"collection": deposit_collection.name,
"revision_parents": [],
},
}
assert data == expected_meta
def test_read_metadata_4(
authenticated_client, deposit_collection, atom_dataset, partial_deposit
):
"""dateCreated/datePublished not provided, revision uses complete_date
"""
deposit = partial_deposit
codemeta_entry_data = atom_dataset["metadata"] % ""
deposit = update_deposit_with_metadata(
authenticated_client, deposit_collection, deposit, codemeta_entry_data
)
# will use the deposit completed date as fallback date
deposit.complete_date = "2016-04-06"
deposit.save()
for url in private_get_raw_url_endpoints(deposit_collection, deposit):
response = authenticated_client.get(url)
assert response.status_code == status.HTTP_200_OK
assert response._headers["content-type"][1] == "application/json"
data = response.json()
metadata = {
"@xmlns": "http://www.w3.org/2005/Atom",
"@xmlns:codemeta": "https://doi.org/10.5063/SCHEMA/CODEMETA-2.0",
"author": {"email": "hal@ccsd.cnrs.fr", "name": "HAL"},
"client": "hal",
"codemeta:applicationCategory": "test",
"codemeta:author": {"codemeta:name": "Morane Gruenpeter"},
"codemeta:description": "this is the description",
"codemeta:developmentStatus": "stable",
"codemeta:keywords": "DSP programming",
"codemeta:license": [
{
"codemeta:name": "GNU "
"General "
"Public "
"License "
"v3.0 "
"only"
},
{
"codemeta:name": "CeCILL "
"Free "
"Software "
"License "
"Agreement "
"v1.1"
},
],
"codemeta:programmingLanguage": ["php", "python", "C"],
"codemeta:runtimePlatform": "phpstorm",
"codemeta:url": "https://hal-test.archives-ouvertes.fr/hal-01243065",
"codemeta:version": "1",
"external_identifier": "hal-01243065",
"id": "hal-01243065",
"title": "Composing a Web of Audio Applications",
}
expected_origin = {
"type": "deposit",
"url": "https://hal-test.archives-ouvertes.fr/%s" % (deposit.external_id),
}
expected_origin_metadata = {
"metadata": metadata,
"provider": {
"metadata": {},
"provider_name": "",
"provider_type": "deposit_client",
"provider_url": "https://hal-test.archives-ouvertes.fr/",
},
"tool": {
"configuration": {"sword_version": "2"},
"name": "swh-deposit",
"version": __version__,
},
}
expected_deposit_info = {
"author": SWH_PERSON,
"committer": SWH_PERSON,
"committer_date": {
"negative_utc": False,
"offset": 0,
"timestamp": {"microseconds": 0, "seconds": 1459900800},
},
"author_date": {
"negative_utc": False,
"offset": 0,
"timestamp": {"microseconds": 0, "seconds": 1459900800},
},
"client": deposit_collection.name,
"id": deposit.id,
"collection": deposit_collection.name,
"revision_parents": [],
}
expected_meta = {
"origin": expected_origin,
"origin_metadata": expected_origin_metadata,
"deposit": expected_deposit_info,
}
assert data == expected_meta
def test_read_metadata_5(
authenticated_client, deposit_collection, partial_deposit, atom_dataset
):
"""dateCreated/datePublished provided, revision uses author/committer
date
If multiple dateCreated provided, the first occurrence (of
dateCreated) is selected. If multiple datePublished provided,
the first occurrence (of datePublished) is selected.
"""
deposit = partial_deposit
# add metadata to the deposit with multiple datePublished/dateCreated
codemeta_entry_data = (
atom_dataset["metadata"]
% """
2015-04-06T17:08:47+02:00
2017-05-03T16:08:47+02:00
2016-04-06T17:08:47+02:00
2018-05-03T16:08:47+02:00
"""
)
deposit = update_deposit_with_metadata(
authenticated_client, deposit_collection, deposit, codemeta_entry_data
)
for url in private_get_raw_url_endpoints(deposit_collection, deposit):
response = authenticated_client.get(url)
assert response.status_code == status.HTTP_200_OK
assert response._headers["content-type"][1] == "application/json"
data = response.json()
expected_origin = {
"type": "deposit",
"url": "https://hal-test.archives-ouvertes.fr/external-id-partial",
}
metadata = {
"@xmlns": "http://www.w3.org/2005/Atom",
"@xmlns:codemeta": "https://doi.org/10.5063/SCHEMA/CODEMETA-2.0",
"author": {"email": "hal@ccsd.cnrs.fr", "name": "HAL"},
"client": "hal",
"codemeta:applicationCategory": "test",
"codemeta:author": {"codemeta:name": "Morane Gruenpeter"},
"codemeta:dateCreated": [
"2015-04-06T17:08:47+02:00",
"2016-04-06T17:08:47+02:00",
],
"codemeta:datePublished": [
"2017-05-03T16:08:47+02:00",
"2018-05-03T16:08:47+02:00",
],
"codemeta:description": "this is the description",
"codemeta:developmentStatus": "stable",
"codemeta:keywords": "DSP programming",
"codemeta:license": [
{
"codemeta:name": "GNU "
"General "
"Public "
"License "
"v3.0 "
"only"
},
{
"codemeta:name": "CeCILL "
"Free "
"Software "
"License "
"Agreement "
"v1.1"
},
],
"codemeta:programmingLanguage": ["php", "python", "C"],
"codemeta:runtimePlatform": "phpstorm",
"codemeta:url": "https://hal-test.archives-ouvertes.fr/hal-01243065", # noqa
"codemeta:version": "1",
"external_identifier": "hal-01243065",
"id": "hal-01243065",
"title": "Composing a Web of Audio Applications",
}
expected_origin_metadata = {
"metadata": metadata,
"provider": {
"metadata": {},
"provider_name": "",
"provider_type": "deposit_client",
"provider_url": "https://hal-test.archives-ouvertes.fr/",
},
"tool": {
"configuration": {"sword_version": "2"},
"name": "swh-deposit",
"version": __version__,
},
}
expected_deposit_info = {
"author": SWH_PERSON,
"committer": SWH_PERSON,
"committer_date": {
"negative_utc": False,
"offset": 120,
"timestamp": {"microseconds": 0, "seconds": 1493820527},
},
"author_date": {
"negative_utc": False,
"offset": 120,
"timestamp": {"microseconds": 0, "seconds": 1428332927},
},
"client": deposit_collection.name,
"id": deposit.id,
"collection": deposit_collection.name,
"revision_parents": [],
}
expected_meta = {
"origin": expected_origin,
"origin_metadata": expected_origin_metadata,
"deposit": expected_deposit_info,
}
assert data == expected_meta
def test_access_to_nonexisting_deposit_returns_404_response(
authenticated_client, deposit_collection,
):
"""Read unknown collection should return a 404 response
"""
unknown_id = 999
try:
Deposit.objects.get(pk=unknown_id)
except Deposit.DoesNotExist:
assert True
for url in private_get_raw_url_endpoints(deposit_collection, unknown_id):
response = authenticated_client.get(url)
assert response.status_code == status.HTTP_404_NOT_FOUND
msg = "Deposit with id %s does not exist" % unknown_id
assert msg in response.content.decode("utf-8")
diff --git a/swh/deposit/tests/api/test_deposit_update.py b/swh/deposit/tests/api/test_deposit_update.py
index fd934d24..88402f07 100644
--- a/swh/deposit/tests/api/test_deposit_update.py
+++ b/swh/deposit/tests/api/test_deposit_update.py
@@ -1,563 +1,761 @@
-# Copyright (C) 2017-2019 The Software Heritage developers
+# Copyright (C) 2017-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from io import BytesIO
from django.core.files.uploadedfile import InMemoryUploadedFile
from django.urls import reverse
from rest_framework import status
from swh.deposit.config import (
DEPOSIT_STATUS_DEPOSITED,
DEPOSIT_STATUS_PARTIAL,
EDIT_SE_IRI,
EM_IRI,
)
from swh.deposit.models import Deposit, DepositCollection, DepositRequest
from swh.deposit.parsers import parse_xml
from swh.deposit.tests.common import check_archive, create_arborescence_archive
+from swh.model.hashutil import hash_to_bytes
+from swh.model.identifiers import parse_swhid, swhid
def test_replace_archive_to_deposit_is_possible(
tmp_path,
partial_deposit,
deposit_collection,
authenticated_client,
sample_archive,
atom_dataset,
):
"""Replace all archive with another one should return a 204 response
"""
tmp_path = str(tmp_path)
# given
deposit = partial_deposit
requests = DepositRequest.objects.filter(deposit=deposit, type="archive")
assert len(list(requests)) == 1
check_archive(sample_archive["name"], requests[0].archive.name)
# we have no metadata for that deposit
requests = list(DepositRequest.objects.filter(deposit=deposit, type="metadata"))
assert len(requests) == 0
response = authenticated_client.post(
reverse(EDIT_SE_IRI, args=[deposit_collection.name, deposit.id]),
content_type="application/atom+xml;type=entry",
data=atom_dataset["entry-data1"],
HTTP_SLUG=deposit.external_id,
HTTP_IN_PROGRESS=True,
)
requests = list(DepositRequest.objects.filter(deposit=deposit, type="metadata"))
assert len(requests) == 1
update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit.id])
external_id = "some-external-id-1"
archive2 = create_arborescence_archive(
tmp_path, "archive2", "file2", b"some other content in file"
)
response = authenticated_client.put(
update_uri,
content_type="application/zip", # as zip
data=archive2["data"],
# + headers
CONTENT_LENGTH=archive2["length"],
HTTP_SLUG=external_id,
HTTP_CONTENT_MD5=archive2["md5sum"],
HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip",
HTTP_IN_PROGRESS="false",
HTTP_CONTENT_DISPOSITION="attachment; filename=%s" % (archive2["name"],),
)
assert response.status_code == status.HTTP_204_NO_CONTENT
requests = DepositRequest.objects.filter(deposit=deposit, type="archive")
assert len(list(requests)) == 1
check_archive(archive2["name"], requests[0].archive.name)
# check we did not touch the other parts
requests = list(DepositRequest.objects.filter(deposit=deposit, type="metadata"))
assert len(requests) == 1
def test_replace_metadata_to_deposit_is_possible(
tmp_path,
authenticated_client,
partial_deposit_with_metadata,
deposit_collection,
atom_dataset,
):
"""Replace all metadata with another one should return a 204 response
"""
# given
deposit = partial_deposit_with_metadata
raw_metadata0 = atom_dataset["entry-data0"] % deposit.external_id.encode("utf-8")
requests_meta = DepositRequest.objects.filter(deposit=deposit, type="metadata")
assert len(requests_meta) == 1
request_meta0 = requests_meta[0]
assert request_meta0.raw_metadata == raw_metadata0
requests_archive0 = DepositRequest.objects.filter(deposit=deposit, type="archive")
assert len(requests_archive0) == 1
update_uri = reverse(EDIT_SE_IRI, args=[deposit_collection.name, deposit.id])
response = authenticated_client.put(
update_uri,
content_type="application/atom+xml;type=entry",
data=atom_dataset["entry-data1"],
)
assert response.status_code == status.HTTP_204_NO_CONTENT
requests_meta = DepositRequest.objects.filter(deposit=deposit, type="metadata")
assert len(requests_meta) == 1
request_meta1 = requests_meta[0]
raw_metadata1 = request_meta1.raw_metadata
assert raw_metadata1 == atom_dataset["entry-data1"]
assert raw_metadata0 != raw_metadata1
assert request_meta0 != request_meta1
# check we did not touch the other parts
requests_archive1 = DepositRequest.objects.filter(deposit=deposit, type="archive")
assert len(requests_archive1) == 1
assert set(requests_archive0) == set(requests_archive1)
def test_add_archive_to_deposit_is_possible(
tmp_path,
authenticated_client,
deposit_collection,
partial_deposit_with_metadata,
sample_archive,
):
"""Add another archive to a deposit return a 201 response
"""
tmp_path = str(tmp_path)
deposit = partial_deposit_with_metadata
requests = DepositRequest.objects.filter(deposit=deposit, type="archive")
assert len(requests) == 1
check_archive(sample_archive["name"], requests[0].archive.name)
requests_meta0 = DepositRequest.objects.filter(deposit=deposit, type="metadata")
assert len(requests_meta0) == 1
update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit.id])
external_id = "some-external-id-1"
archive2 = create_arborescence_archive(
tmp_path, "archive2", "file2", b"some other content in file"
)
response = authenticated_client.post(
update_uri,
content_type="application/zip", # as zip
data=archive2["data"],
# + headers
CONTENT_LENGTH=archive2["length"],
HTTP_SLUG=external_id,
HTTP_CONTENT_MD5=archive2["md5sum"],
HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip",
HTTP_IN_PROGRESS="false",
HTTP_CONTENT_DISPOSITION="attachment; filename=%s" % (archive2["name"],),
)
assert response.status_code == status.HTTP_201_CREATED
requests = DepositRequest.objects.filter(deposit=deposit, type="archive").order_by(
"id"
)
assert len(requests) == 2
# first archive still exists
check_archive(sample_archive["name"], requests[0].archive.name)
# a new one was added
check_archive(archive2["name"], requests[1].archive.name)
# check we did not touch the other parts
requests_meta1 = DepositRequest.objects.filter(deposit=deposit, type="metadata")
assert len(requests_meta1) == 1
assert set(requests_meta0) == set(requests_meta1)
def test_add_metadata_to_deposit_is_possible(
authenticated_client,
deposit_collection,
partial_deposit_with_metadata,
atom_dataset,
):
"""Add metadata with another one should return a 204 response
"""
deposit = partial_deposit_with_metadata
requests = DepositRequest.objects.filter(deposit=deposit, type="metadata")
assert len(requests) == 1
requests_archive0 = DepositRequest.objects.filter(deposit=deposit, type="archive")
assert len(requests_archive0) == 1
update_uri = reverse(EDIT_SE_IRI, args=[deposit_collection.name, deposit.id])
atom_entry = atom_dataset["entry-data1"]
response = authenticated_client.post(
update_uri, content_type="application/atom+xml;type=entry", data=atom_entry
)
assert response.status_code == status.HTTP_201_CREATED
requests = DepositRequest.objects.filter(deposit=deposit, type="metadata").order_by(
"id"
)
assert len(requests) == 2
expected_raw_meta0 = atom_dataset["entry-data0"] % (
deposit.external_id.encode("utf-8")
)
# a new one was added
assert requests[0].raw_metadata == expected_raw_meta0
assert requests[1].raw_metadata == atom_entry
# check we did not touch the other parts
requests_archive1 = DepositRequest.objects.filter(deposit=deposit, type="archive")
assert len(requests_archive1) == 1
assert set(requests_archive0) == set(requests_archive1)
def test_add_both_archive_and_metadata_to_deposit(
authenticated_client,
deposit_collection,
partial_deposit_with_metadata,
atom_dataset,
sample_archive,
):
"""Scenario: Add both a new archive and new metadata to a partial deposit is ok
Response: 201
"""
deposit = partial_deposit_with_metadata
requests = DepositRequest.objects.filter(deposit=deposit, type="metadata")
assert len(requests) == 1
requests_archive0 = DepositRequest.objects.filter(deposit=deposit, type="archive")
assert len(requests_archive0) == 1
update_uri = reverse(EDIT_SE_IRI, args=[deposit_collection.name, deposit.id])
archive = InMemoryUploadedFile(
BytesIO(sample_archive["data"]),
field_name=sample_archive["name"],
name=sample_archive["name"],
content_type="application/x-tar",
size=sample_archive["length"],
charset=None,
)
data_atom_entry = atom_dataset["entry-data1"]
atom_entry = InMemoryUploadedFile(
BytesIO(data_atom_entry.encode("utf-8")),
field_name="atom0",
name="atom0",
content_type='application/atom+xml; charset="utf-8"',
size=len(data_atom_entry),
charset="utf-8",
)
update_uri = reverse(EDIT_SE_IRI, args=[deposit_collection.name, deposit.id])
response = authenticated_client.post(
update_uri,
format="multipart",
data={"archive": archive, "atom_entry": atom_entry,},
)
assert response.status_code == status.HTTP_201_CREATED
requests = DepositRequest.objects.filter(deposit=deposit, type="metadata").order_by(
"id"
)
assert len(requests) == 1 + 1, "New deposit request archive got added"
expected_raw_meta0 = atom_dataset["entry-data0"] % (
deposit.external_id.encode("utf-8")
)
# a new one was added
assert requests[0].raw_metadata == expected_raw_meta0
assert requests[1].raw_metadata == data_atom_entry
# check we did not touch the other parts
requests_archive1 = DepositRequest.objects.filter(deposit=deposit, type="archive")
assert len(requests_archive1) == 1 + 1, "New deposit request metadata got added"
def test_post_metadata_empty_post_finalize_deposit_ok(
authenticated_client,
deposit_collection,
partial_deposit_with_metadata,
atom_dataset,
):
"""Empty atom post entry with header in-progress to false transitions deposit to
'deposited' status
Response: 200
"""
deposit = partial_deposit_with_metadata
assert deposit.status == DEPOSIT_STATUS_PARTIAL
update_uri = reverse(EDIT_SE_IRI, args=[deposit_collection.name, deposit.id])
response = authenticated_client.post(
update_uri,
content_type="application/atom+xml;type=entry",
data="",
size=0,
HTTP_IN_PROGRESS=False,
)
assert response.status_code == status.HTTP_200_OK
deposit = Deposit.objects.get(pk=deposit.id)
assert deposit.status == DEPOSIT_STATUS_DEPOSITED
def test_add_metadata_to_unknown_deposit(
deposit_collection, authenticated_client, atom_dataset
):
"""Replacing metadata to unknown deposit should return a 404 response
"""
unknown_deposit_id = 1000
try:
Deposit.objects.get(pk=unknown_deposit_id)
except Deposit.DoesNotExist:
assert True
url = reverse(EDIT_SE_IRI, args=[deposit_collection, unknown_deposit_id])
response = authenticated_client.post(
url,
content_type="application/atom+xml;type=entry",
data=atom_dataset["entry-data1"],
)
assert response.status_code == status.HTTP_404_NOT_FOUND
response_content = parse_xml(response.content)
assert "Unknown collection name" in response_content["sword:error"]["summary"]
def test_add_metadata_to_unknown_collection(
partial_deposit, authenticated_client, atom_dataset
):
"""Replacing metadata to unknown deposit should return a 404 response
"""
deposit = partial_deposit
unknown_collection_name = "unknown-collection"
try:
DepositCollection.objects.get(name=unknown_collection_name)
except DepositCollection.DoesNotExist:
assert True
url = reverse(EDIT_SE_IRI, args=[unknown_collection_name, deposit.id])
response = authenticated_client.post(
url,
content_type="application/atom+xml;type=entry",
data=atom_dataset["entry-data1"],
)
assert response.status_code == status.HTTP_404_NOT_FOUND
response_content = parse_xml(response.content)
assert "Unknown collection name" in response_content["sword:error"]["summary"]
def test_replace_metadata_to_unknown_deposit(
authenticated_client, deposit_collection, atom_dataset
):
"""Adding metadata to unknown deposit should return a 404 response
"""
unknown_deposit_id = 998
try:
Deposit.objects.get(pk=unknown_deposit_id)
except Deposit.DoesNotExist:
assert True
url = reverse(EDIT_SE_IRI, args=[deposit_collection.name, unknown_deposit_id])
response = authenticated_client.put(
url,
content_type="application/atom+xml;type=entry",
data=atom_dataset["entry-data1"],
)
assert response.status_code == status.HTTP_404_NOT_FOUND
response_content = parse_xml(response.content)
assert (
"Deposit with id %s does not exist" % unknown_deposit_id
== response_content["sword:error"]["summary"]
)
def test_add_archive_to_unknown_deposit(
authenticated_client, deposit_collection, atom_dataset
):
"""Adding metadata to unknown deposit should return a 404 response
"""
unknown_deposit_id = 997
try:
Deposit.objects.get(pk=unknown_deposit_id)
except Deposit.DoesNotExist:
assert True
url = reverse(EM_IRI, args=[deposit_collection.name, unknown_deposit_id])
response = authenticated_client.post(
url, content_type="application/zip", data=atom_dataset["entry-data1"]
)
assert response.status_code == status.HTTP_404_NOT_FOUND
response_content = parse_xml(response.content)
assert (
"Deposit with id %s does not exist" % unknown_deposit_id
== response_content["sword:error"]["summary"]
)
def test_replace_archive_to_unknown_deposit(
authenticated_client, deposit_collection, atom_dataset
):
"""Replacing archive to unknown deposit should return a 404 response
"""
unknown_deposit_id = 996
try:
Deposit.objects.get(pk=unknown_deposit_id)
except Deposit.DoesNotExist:
assert True
url = reverse(EM_IRI, args=[deposit_collection.name, unknown_deposit_id])
response = authenticated_client.put(
url, content_type="application/zip", data=atom_dataset["entry-data1"]
)
assert response.status_code == status.HTTP_404_NOT_FOUND
response_content = parse_xml(response.content)
assert (
"Deposit with id %s does not exist" % unknown_deposit_id
== response_content["sword:error"]["summary"]
)
def test_post_metadata_to_em_iri_failure(
authenticated_client, deposit_collection, partial_deposit, atom_dataset
):
"""Update (POST) archive with wrong content type should return 400
"""
deposit = partial_deposit
update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit.id])
response = authenticated_client.post(
update_uri,
content_type="application/x-gtar-compressed",
data=atom_dataset["entry-data1"],
)
assert response.status_code == status.HTTP_400_BAD_REQUEST
response_content = parse_xml(response.content)
msg = (
"Packaging format supported is restricted to "
+ "application/zip, application/x-tar"
)
assert msg == response_content["sword:error"]["summary"]
def test_put_metadata_to_em_iri_failure(
authenticated_client, deposit_collection, partial_deposit, atom_dataset
):
"""Update (PUT) archive with wrong content type should return 400
"""
# given
deposit = partial_deposit
# when
update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit.id])
response = authenticated_client.put(
update_uri,
content_type="application/atom+xml;type=entry",
data=atom_dataset["entry-data1"],
)
# then
assert response.status_code == status.HTTP_400_BAD_REQUEST
response_content = parse_xml(response.content)
msg = (
"Packaging format supported is restricted to "
+ "application/zip, application/x-tar"
)
assert msg == response_content["sword:error"]["summary"]
def test_put_update_metadata_and_archive_deposit_partial_nominal(
tmp_path,
authenticated_client,
partial_deposit_with_metadata,
deposit_collection,
atom_dataset,
sample_archive,
):
"""Scenario: Replace metadata and archive(s) with new ones should be ok
Response: 204
"""
# given
deposit = partial_deposit_with_metadata
raw_metadata0 = atom_dataset["entry-data0"] % deposit.external_id.encode("utf-8")
requests_meta = DepositRequest.objects.filter(deposit=deposit, type="metadata")
assert len(requests_meta) == 1
request_meta0 = requests_meta[0]
assert request_meta0.raw_metadata == raw_metadata0
requests_archive0 = DepositRequest.objects.filter(deposit=deposit, type="archive")
assert len(requests_archive0) == 1
archive = InMemoryUploadedFile(
BytesIO(sample_archive["data"]),
field_name=sample_archive["name"],
name=sample_archive["name"],
content_type="application/x-tar",
size=sample_archive["length"],
charset=None,
)
data_atom_entry = atom_dataset["entry-data1"]
atom_entry = InMemoryUploadedFile(
BytesIO(data_atom_entry.encode("utf-8")),
field_name="atom0",
name="atom0",
content_type='application/atom+xml; charset="utf-8"',
size=len(data_atom_entry),
charset="utf-8",
)
update_uri = reverse(EDIT_SE_IRI, args=[deposit_collection.name, deposit.id])
response = authenticated_client.put(
update_uri,
format="multipart",
data={"archive": archive, "atom_entry": atom_entry,},
)
assert response.status_code == status.HTTP_204_NO_CONTENT
# check we updated the metadata part
requests_meta = DepositRequest.objects.filter(deposit=deposit, type="metadata")
assert len(requests_meta) == 1
request_meta1 = requests_meta[0]
raw_metadata1 = request_meta1.raw_metadata
assert raw_metadata1 == data_atom_entry
assert raw_metadata0 != raw_metadata1
assert request_meta0 != request_meta1
# and the archive part
requests_archive1 = DepositRequest.objects.filter(deposit=deposit, type="archive")
assert len(requests_archive1) == 1
assert set(requests_archive0) != set(requests_archive1)
+
+
+def test_put_update_metadata_done_deposit_nominal(
+ tmp_path,
+ authenticated_client,
+ complete_deposit,
+ deposit_collection,
+ atom_dataset,
+ sample_data,
+ swh_storage,
+):
+ """Nominal scenario, client send an update of metadata on a deposit with status "done"
+ with an existing swhid. Such swhid has its metadata updated accordingly both in
+ the deposit backend and in the metadata storage.
+
+ Response: 204
+
+ """
+ deposit_swhid = parse_swhid(complete_deposit.swh_id)
+ assert deposit_swhid.object_type == "directory"
+ directory_id = hash_to_bytes(deposit_swhid.object_id)
+
+ # directory targeted by the complete_deposit does not exist in the storage
+ assert list(swh_storage.directory_missing([directory_id])) == [directory_id]
+
+ # so let's create a directory reference in the storage (current deposit targets an
+ # unknown swhid)
+ existing_directory = sample_data.directory
+ swh_storage.directory_add([existing_directory])
+ assert list(swh_storage.directory_missing([existing_directory.id])) == []
+
+ # and patch one complete deposit swh_id so it targets said reference
+ complete_deposit.swh_id = swhid("directory", existing_directory.id)
+ complete_deposit.save()
+
+ actual_existing_requests_archive = DepositRequest.objects.filter(
+ deposit=complete_deposit, type="archive"
+ )
+ nb_archives = len(actual_existing_requests_archive)
+ actual_existing_requests_metadata = DepositRequest.objects.filter(
+ deposit=complete_deposit, type="metadata"
+ )
+ nb_metadata = len(actual_existing_requests_metadata)
+
+ update_uri = reverse(
+ EDIT_SE_IRI, args=[deposit_collection.name, complete_deposit.id]
+ )
+ response = authenticated_client.put(
+ update_uri,
+ content_type="application/atom+xml;type=entry",
+ data=atom_dataset["entry-data1"],
+ HTTP_X_CHECK_SWHID=complete_deposit.swh_id,
+ )
+
+ assert response.status_code == status.HTTP_204_NO_CONTENT
+
+ new_requests_meta = DepositRequest.objects.filter(
+ deposit=complete_deposit, type="metadata"
+ )
+ assert len(new_requests_meta) == nb_metadata + 1
+ request_meta1 = new_requests_meta[0]
+ raw_metadata1 = request_meta1.raw_metadata
+ assert raw_metadata1 == atom_dataset["entry-data1"]
+
+ # check we did not touch the other parts
+ requests_archive1 = DepositRequest.objects.filter(
+ deposit=complete_deposit, type="archive"
+ )
+ assert len(requests_archive1) == nb_archives
+ assert set(actual_existing_requests_archive) == set(requests_archive1)
+
+ # FIXME: Check the metadata storage information created is consistent
+ pass
+
+
+def test_put_update_metadata_done_deposit_failure_swhid_unknown(
+ tmp_path,
+ authenticated_client,
+ complete_deposit,
+ deposit_collection,
+ atom_dataset,
+ swh_storage,
+):
+ """Failure: client updates metadata with a SWHID matching the deposit's. Said SWHID does
+ not exist in the archive somehow.
+
+ This should not happen though, it is still technically possible so it's
+ covered...
+
+ Response: 400
+
+ """
+ # directory targeted by the complete_deposit does not exist in the storage
+ missing_directory_id = hash_to_bytes(parse_swhid(complete_deposit.swh_id).object_id)
+ assert list(swh_storage.directory_missing([missing_directory_id])) == [
+ missing_directory_id
+ ]
+
+ update_uri = reverse(
+ EDIT_SE_IRI, args=[deposit_collection.name, complete_deposit.id]
+ )
+
+ response = authenticated_client.put(
+ update_uri,
+ content_type="application/atom+xml;type=entry",
+ data=atom_dataset["entry-data1"],
+ HTTP_X_CHECK_SWHID=complete_deposit.swh_id,
+ )
+
+ assert response.status_code == status.HTTP_400_BAD_REQUEST
+ assert b"Unknown directory SWHID" in response.content
+
+
+def test_put_update_metadata_done_deposit_failure_mismatched_swhid(
+ tmp_path,
+ authenticated_client,
+ complete_deposit,
+ deposit_collection,
+ atom_dataset,
+ swh_storage,
+):
+ """failure: client updates metadata on deposit with SWHID not matching the deposit's.
+
+ Response: 400
+
+ """
+ incorrect_swhid = "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea"
+ assert complete_deposit.swh_id != incorrect_swhid
+
+ update_uri = reverse(
+ EDIT_SE_IRI, args=[deposit_collection.name, complete_deposit.id]
+ )
+ response = authenticated_client.put(
+ update_uri,
+ content_type="application/atom+xml;type=entry",
+ data=atom_dataset["entry-data1"],
+ HTTP_X_CHECK_SWHID=incorrect_swhid,
+ )
+
+ assert response.status_code == status.HTTP_400_BAD_REQUEST
+ assert b"Mismatched provided SWHID" in response.content
+
+
+def test_put_update_metadata_done_deposit_failure_malformed_xml(
+ tmp_path,
+ authenticated_client,
+ complete_deposit,
+ deposit_collection,
+ atom_dataset,
+ swh_storage,
+):
+ """failure: client updates metadata on deposit done with a malformed xml
+
+ Response: 400
+
+ """
+ update_uri = reverse(
+ EDIT_SE_IRI, args=[deposit_collection.name, complete_deposit.id]
+ )
+ response = authenticated_client.put(
+ update_uri,
+ content_type="application/atom+xml;type=entry",
+ data=atom_dataset["entry-data-ko"],
+ HTTP_X_CHECK_SWHID=complete_deposit.swh_id,
+ )
+
+ assert response.status_code == status.HTTP_400_BAD_REQUEST
+ assert b"Malformed xml metadata" in response.content
+
+
+def test_put_update_metadata_done_deposit_failure_empty_xml(
+ tmp_path,
+ authenticated_client,
+ complete_deposit,
+ deposit_collection,
+ atom_dataset,
+ swh_storage,
+):
+ """failure: client updates metadata on deposit done with an empty xml.
+
+ Response: 400
+
+ """
+ update_uri = reverse(
+ EDIT_SE_IRI, args=[deposit_collection.name, complete_deposit.id]
+ )
+
+ response = authenticated_client.put(
+ update_uri,
+ content_type="application/atom+xml;type=entry",
+ data=atom_dataset["entry-data-empty-body"],
+ HTTP_X_CHECK_SWHID=complete_deposit.swh_id,
+ )
+
+ assert response.status_code == status.HTTP_400_BAD_REQUEST
+ assert b"Empty body request is not supported" in response.content
diff --git a/swh/deposit/tests/conftest.py b/swh/deposit/tests/conftest.py
index c92a6916..7ac74e94 100644
--- a/swh/deposit/tests/conftest.py
+++ b/swh/deposit/tests/conftest.py
@@ -1,418 +1,418 @@
# Copyright (C) 2019-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import base64
import os
from typing import Mapping
from django.test.utils import setup_databases # type: ignore
from django.urls import reverse
import psycopg2
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT
import pytest
from rest_framework import status
from rest_framework.test import APIClient
import yaml
+from swh.core.config import read
from swh.deposit.config import (
COL_IRI,
DEPOSIT_STATUS_DEPOSITED,
DEPOSIT_STATUS_LOAD_FAILURE,
DEPOSIT_STATUS_LOAD_SUCCESS,
DEPOSIT_STATUS_PARTIAL,
DEPOSIT_STATUS_REJECTED,
DEPOSIT_STATUS_VERIFIED,
EDIT_SE_IRI,
setup_django_for,
)
from swh.deposit.parsers import parse_xml
from swh.deposit.tests.common import create_arborescence_archive
from swh.model.identifiers import DIRECTORY, REVISION, SNAPSHOT, swhid
from swh.scheduler import get_scheduler
# mypy is asked to ignore the import statement above because setup_databases
# is not part of the d.t.utils.__all__ variable.
TEST_USER = {
"username": "test",
"password": "password",
"email": "test@example.org",
"provider_url": "https://hal-test.archives-ouvertes.fr/",
"domain": "archives-ouvertes.fr/",
"collection": {"name": "test"},
}
def pytest_configure():
setup_django_for("testing")
@pytest.fixture()
-def deposit_config(swh_scheduler_config):
+def deposit_config(swh_scheduler_config, swh_storage_backend_config):
return {
"max_upload_size": 500,
"extraction_dir": "/tmp/swh-deposit/test/extraction-dir",
"checks": False,
- "provider": {
- "provider_name": "",
- "provider_type": "deposit_client",
- "provider_url": "",
- "metadata": {},
- },
"scheduler": {"cls": "local", "args": swh_scheduler_config,},
+ "storage_metadata": swh_storage_backend_config,
}
@pytest.fixture()
def deposit_config_path(tmp_path, monkeypatch, deposit_config):
conf_path = os.path.join(tmp_path, "deposit.yml")
with open(conf_path, "w") as f:
f.write(yaml.dump(deposit_config))
monkeypatch.setenv("SWH_CONFIG_FILENAME", conf_path)
return conf_path
@pytest.fixture(autouse=True)
def deposit_autoconfig(deposit_config_path, swh_scheduler_config):
"""Enforce config for deposit classes inherited from APIConfig."""
-
- scheduler = get_scheduler("local", swh_scheduler_config)
- task_type = {
- "type": "load-deposit",
- "backend_name": "swh.loader.packages.deposit.tasks.LoadDeposit",
- "description": "Load deposit task",
- }
- scheduler.create_task_type(task_type)
+ cfg = read(deposit_config_path)
+
+ if "scheduler" in cfg:
+ # scheduler setup: require the load-deposit tasks (already existing in
+ # production)
+ scheduler = get_scheduler(**cfg["scheduler"])
+ task_type = {
+ "type": "load-deposit",
+ "backend_name": "swh.loader.packages.deposit.tasks.LoadDeposit",
+ "description": "Load deposit task",
+ }
+ scheduler.create_task_type(task_type)
@pytest.fixture(scope="session")
def django_db_setup(request, django_db_blocker, postgresql_proc):
from django.conf import settings
settings.DATABASES["default"].update(
{
("ENGINE", "django.db.backends.postgresql"),
("NAME", "tests"),
("USER", postgresql_proc.user), # noqa
("HOST", postgresql_proc.host), # noqa
("PORT", postgresql_proc.port), # noqa
}
)
with django_db_blocker.unblock():
setup_databases(
verbosity=request.config.option.verbose, interactive=False, keepdb=False
)
def execute_sql(sql):
"""Execute sql to postgres db"""
with psycopg2.connect(database="postgres") as conn:
conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
cur = conn.cursor()
cur.execute(sql)
@pytest.fixture(autouse=True, scope="session")
def swh_proxy():
"""Automatically inject this fixture in all tests to ensure no outside
connection takes place.
"""
os.environ["http_proxy"] = "http://localhost:999"
os.environ["https_proxy"] = "http://localhost:999"
def create_deposit_collection(collection_name: str):
"""Create a deposit collection with name collection_name
"""
from swh.deposit.models import DepositCollection
try:
collection = DepositCollection._default_manager.get(name=collection_name)
except DepositCollection.DoesNotExist:
collection = DepositCollection(name=collection_name)
collection.save()
return collection
def deposit_collection_factory(collection_name=TEST_USER["collection"]["name"]):
@pytest.fixture
def _deposit_collection(db, collection_name=collection_name):
return create_deposit_collection(collection_name)
return _deposit_collection
deposit_collection = deposit_collection_factory()
deposit_another_collection = deposit_collection_factory("another-collection")
@pytest.fixture
def deposit_user(db, deposit_collection):
"""Create/Return the test_user "test"
"""
from swh.deposit.models import DepositClient
try:
user = DepositClient._default_manager.get(username=TEST_USER["username"])
except DepositClient.DoesNotExist:
user = DepositClient._default_manager.create_user(
username=TEST_USER["username"],
email=TEST_USER["email"],
password=TEST_USER["password"],
provider_url=TEST_USER["provider_url"],
domain=TEST_USER["domain"],
)
user.collections = [deposit_collection.id]
user.save()
return user
@pytest.fixture
def client():
"""Override pytest-django one which does not work for djangorestframework.
"""
return APIClient() # <- drf's client
@pytest.yield_fixture
def authenticated_client(client, deposit_user):
"""Returned a logged client
"""
_token = "%s:%s" % (deposit_user.username, TEST_USER["password"])
token = base64.b64encode(_token.encode("utf-8"))
authorization = "Basic %s" % token.decode("utf-8")
client.credentials(HTTP_AUTHORIZATION=authorization)
yield client
client.logout()
@pytest.fixture
def sample_archive(tmp_path):
"""Returns a sample archive
"""
tmp_path = str(tmp_path) # pytest version limitation in previous version
archive = create_arborescence_archive(
tmp_path, "archive1", "file1", b"some content in file"
)
return archive
@pytest.fixture
def atom_dataset(datadir) -> Mapping[str, str]:
"""Compute the paths to atom files.
Returns:
Dict of atom name per content (bytes)
"""
atom_path = os.path.join(datadir, "atom")
data = {}
for filename in os.listdir(atom_path):
filepath = os.path.join(atom_path, filename)
with open(filepath, "rb") as f:
raw_content = f.read().decode("utf-8")
# Keep the filename without extension
atom_name = filename.split(".")[0]
data[atom_name] = raw_content
return data
def create_deposit(
authenticated_client,
collection_name: str,
sample_archive,
external_id: str,
deposit_status=DEPOSIT_STATUS_DEPOSITED,
):
"""Create a skeleton shell deposit
"""
url = reverse(COL_IRI, args=[collection_name])
# when
response = authenticated_client.post(
url,
content_type="application/zip", # as zip
data=sample_archive["data"],
# + headers
CONTENT_LENGTH=sample_archive["length"],
HTTP_SLUG=external_id,
HTTP_CONTENT_MD5=sample_archive["md5sum"],
HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip",
HTTP_IN_PROGRESS="false",
HTTP_CONTENT_DISPOSITION="attachment; filename=%s" % (sample_archive["name"]),
)
# then
assert response.status_code == status.HTTP_201_CREATED
from swh.deposit.models import Deposit
deposit = Deposit._default_manager.get(external_id=external_id)
if deposit.status != deposit_status:
deposit.status = deposit_status
deposit.save()
assert deposit.status == deposit_status
return deposit
def create_binary_deposit(
authenticated_client,
collection_name: str,
sample_archive,
external_id: str,
deposit_status: str = DEPOSIT_STATUS_DEPOSITED,
atom_dataset: Mapping[str, bytes] = {},
):
"""Create a deposit with both metadata and archive set. Then alters its status
to `deposit_status`.
"""
deposit = create_deposit(
authenticated_client,
collection_name,
sample_archive,
external_id=external_id,
deposit_status=DEPOSIT_STATUS_PARTIAL,
)
response = authenticated_client.post(
reverse(EDIT_SE_IRI, args=[collection_name, deposit.id]),
content_type="application/atom+xml;type=entry",
data=atom_dataset["entry-data0"] % deposit.external_id.encode("utf-8"),
HTTP_SLUG=deposit.external_id,
HTTP_IN_PROGRESS="true",
)
assert response.status_code == status.HTTP_201_CREATED
assert deposit.status == DEPOSIT_STATUS_PARTIAL
from swh.deposit.models import Deposit
deposit = Deposit._default_manager.get(pk=deposit.id)
if deposit.status != deposit_status:
deposit.status = deposit_status
deposit.save()
assert deposit.status == deposit_status
return deposit
def deposit_factory(deposit_status=DEPOSIT_STATUS_DEPOSITED):
"""Build deposit with a specific status
"""
@pytest.fixture()
def _deposit(
sample_archive,
deposit_collection,
authenticated_client,
deposit_status=deposit_status,
):
external_id = "external-id-%s" % deposit_status
return create_deposit(
authenticated_client,
deposit_collection.name,
sample_archive,
external_id=external_id,
deposit_status=deposit_status,
)
return _deposit
deposited_deposit = deposit_factory()
rejected_deposit = deposit_factory(deposit_status=DEPOSIT_STATUS_REJECTED)
partial_deposit = deposit_factory(deposit_status=DEPOSIT_STATUS_PARTIAL)
verified_deposit = deposit_factory(deposit_status=DEPOSIT_STATUS_VERIFIED)
completed_deposit = deposit_factory(deposit_status=DEPOSIT_STATUS_LOAD_SUCCESS)
failed_deposit = deposit_factory(deposit_status=DEPOSIT_STATUS_LOAD_FAILURE)
@pytest.fixture
def partial_deposit_with_metadata(
sample_archive, deposit_collection, authenticated_client, atom_dataset
):
"""Returns deposit with archive and metadata provided, status 'partial'
"""
return create_binary_deposit(
authenticated_client,
deposit_collection.name,
sample_archive,
external_id="external-id-partial",
deposit_status=DEPOSIT_STATUS_PARTIAL,
atom_dataset=atom_dataset,
)
@pytest.fixture
def partial_deposit_only_metadata(
deposit_collection, authenticated_client, atom_dataset
):
response = authenticated_client.post(
reverse(COL_IRI, args=[deposit_collection.name]),
content_type="application/atom+xml;type=entry",
data=atom_dataset["entry-data1"],
HTTP_SLUG="external-id-partial",
HTTP_IN_PROGRESS=True,
)
assert response.status_code == status.HTTP_201_CREATED
response_content = parse_xml(response.content)
deposit_id = response_content["deposit_id"]
from swh.deposit.models import Deposit
deposit = Deposit._default_manager.get(pk=deposit_id)
assert deposit.status == DEPOSIT_STATUS_PARTIAL
return deposit
@pytest.fixture
def complete_deposit(sample_archive, deposit_collection, authenticated_client):
"""Returns a completed deposit (load success)
"""
deposit = create_deposit(
authenticated_client,
deposit_collection.name,
sample_archive,
external_id="external-id-complete",
deposit_status=DEPOSIT_STATUS_LOAD_SUCCESS,
)
origin = "https://hal.archives-ouvertes.fr/hal-01727745"
directory_id = "42a13fc721c8716ff695d0d62fc851d641f3a12b"
revision_id = "548b3c0a2bb43e1fca191e24b5803ff6b3bc7c10"
snapshot_id = "e5e82d064a9c3df7464223042e0c55d72ccff7f0"
deposit.swh_id = swhid(DIRECTORY, directory_id)
deposit.swh_id_context = swhid(
DIRECTORY,
directory_id,
metadata={
"origin": origin,
"visit": swhid(SNAPSHOT, snapshot_id),
"anchor": swhid(REVISION, revision_id),
"path": "/",
},
)
deposit.save()
return deposit
@pytest.fixture()
def tmp_path(tmp_path):
return str(tmp_path) # issue with oldstable's pytest version
diff --git a/swh/deposit/tests/test_init.py b/swh/deposit/tests/test_init.py
index 88fca573..59a145a1 100644
--- a/swh/deposit/tests/test_init.py
+++ b/swh/deposit/tests/test_init.py
@@ -1,10 +1,10 @@
# Copyright (C) 2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
def test_version():
- from swh.deposit.api import __version__
+ from swh.deposit import __version__
assert __version__ is not None