diff --git a/docs/specs/swh.xsd b/docs/specs/swh.xsd
index d99ca9fd..119d83d1 100644
--- a/docs/specs/swh.xsd
+++ b/docs/specs/swh.xsd
@@ -1,58 +1,65 @@
+
+
+
+
+
+
+
diff --git a/swh/deposit/api/collection.py b/swh/deposit/api/collection.py
index fb7c2a61..47f3d291 100644
--- a/swh/deposit/api/collection.py
+++ b/swh/deposit/api/collection.py
@@ -1,134 +1,135 @@
# Copyright (C) 2017-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from typing import Optional, Tuple
from rest_framework import status
from ..config import DEPOSIT_STATUS_LOAD_SUCCESS, EDIT_IRI
from ..models import Deposit
from ..parsers import (
SWHAtomEntryParser,
SWHFileUploadTarParser,
SWHFileUploadZipParser,
SWHMultiPartParser,
)
from .common import (
ACCEPT_ARCHIVE_CONTENT_TYPES,
APIPost,
ParsedRequestHeaders,
Receipt,
get_collection_by_name,
)
class CollectionAPI(APIPost):
"""Deposit request class defining api endpoints for sword deposit.
What's known as 'Col-IRI' in the sword specification.
HTTP verbs supported: POST
"""
parser_classes = (
SWHMultiPartParser,
SWHFileUploadZipParser,
SWHFileUploadTarParser,
SWHAtomEntryParser,
)
def process_post(
self,
req,
headers: ParsedRequestHeaders,
collection_name: str,
deposit: Optional[Deposit] = None,
) -> Tuple[int, str, Receipt]:
"""Create a first deposit as:
- archive deposit (1 zip)
- multipart (1 zip + 1 atom entry)
- atom entry
Args:
req (Request): the request holding the information to parse
and inject in db
collection_name (str): the associated client
Returns:
An http response (HttpResponse) according to the situation.
If everything is ok, a 201 response (created) with a
deposit receipt.
Raises:
- archive deposit:
- 400 (bad request) if the request is not providing an external
identifier
- 403 (forbidden) if the length of the archive exceeds the
max size configured
- 412 (precondition failed) if the length or hash provided
mismatch the reality of the archive.
- 415 (unsupported media type) if a wrong media type is
provided
- multipart deposit:
- 400 (bad request) if the request is not providing an external
identifier
- 412 (precondition failed) if the potentially md5 hash
provided mismatch the reality of the archive
- 415 (unsupported media type) if a wrong media type is
provided
- Atom entry deposit:
- 400 (bad request) if the request is not providing an external
identifier
- 400 (bad request) if the request's body is empty
- 415 (unsupported media type) if a wrong media type is
provided
"""
assert deposit is None
deposit = self._deposit_create(req, collection_name, external_id=headers.slug)
if req.content_type in ACCEPT_ARCHIVE_CONTENT_TYPES:
receipt = self._binary_upload(req, headers, collection_name, deposit)
elif req.content_type.startswith("multipart/"):
receipt = self._multipart_upload(req, headers, collection_name, deposit)
else:
receipt = self._atom_entry(req, headers, collection_name, deposit)
return status.HTTP_201_CREATED, EDIT_IRI, receipt
def _deposit_create(
self, request, collection_name: str, external_id: Optional[str]
) -> Deposit:
collection = get_collection_by_name(collection_name)
client = self.get_client(request)
deposit_parent: Optional[Deposit] = None
if external_id:
+ # TODO: delete this when clients stopped relying on the slug
try:
# find a deposit parent (same external id, status load to success)
deposit_parent = (
Deposit.objects.filter(
client=client,
external_id=external_id,
status=DEPOSIT_STATUS_LOAD_SUCCESS,
)
.order_by("-id")[0:1]
.get()
)
except Deposit.DoesNotExist:
# then no parent for that deposit, deposit_parent already None
pass
return Deposit(
collection=collection,
external_id=external_id or "",
client=client,
parent=deposit_parent,
)
diff --git a/swh/deposit/api/common.py b/swh/deposit/api/common.py
index c6e95026..23a22891 100644
--- a/swh/deposit/api/common.py
+++ b/swh/deposit/api/common.py
@@ -1,1163 +1,1193 @@
# Copyright (C) 2017-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from abc import ABCMeta, abstractmethod
import datetime
import hashlib
import json
from typing import Any, Dict, Optional, Sequence, Tuple, Type, Union
import uuid
import attr
from django.core.files.uploadedfile import UploadedFile
from django.http import FileResponse, HttpResponse
from django.shortcuts import render
from django.urls import reverse
from django.utils import timezone
from rest_framework import status
from rest_framework.authentication import BaseAuthentication, BasicAuthentication
from rest_framework.permissions import BasePermission, IsAuthenticated
from rest_framework.request import Request
from rest_framework.views import APIView
from swh.deposit.api.checks import check_metadata
from swh.deposit.api.converters import convert_status_detail
from swh.deposit.models import Deposit
from swh.deposit.utils import compute_metadata_context
from swh.model import hashutil
from swh.model.identifiers import SWHID, ValidationError
from swh.model.model import (
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
RawExtrinsicMetadata,
)
from swh.scheduler.utils import create_oneshot_task_dict
from ..config import (
ARCHIVE_KEY,
ARCHIVE_TYPE,
CONT_FILE_IRI,
DEPOSIT_STATUS_DEPOSITED,
DEPOSIT_STATUS_LOAD_SUCCESS,
DEPOSIT_STATUS_PARTIAL,
EDIT_IRI,
EM_IRI,
METADATA_KEY,
METADATA_TYPE,
RAW_METADATA_KEY,
SE_IRI,
STATE_IRI,
APIConfig,
)
from ..errors import (
BAD_REQUEST,
CHECKSUM_MISMATCH,
ERROR_CONTENT,
FORBIDDEN,
MAX_UPLOAD_SIZE_EXCEEDED,
MEDIATION_NOT_ALLOWED,
METHOD_NOT_ALLOWED,
NOT_FOUND,
PARSING_ERROR,
DepositError,
ParserError,
)
from ..models import DepositClient, DepositCollection, DepositRequest
from ..parsers import parse_swh_reference, parse_xml
ACCEPT_PACKAGINGS = ["http://purl.org/net/sword/package/SimpleZip"]
ACCEPT_ARCHIVE_CONTENT_TYPES = ["application/zip", "application/x-tar"]
@attr.s
class ParsedRequestHeaders:
content_type = attr.ib(type=str)
content_length = attr.ib(type=Optional[int])
in_progress = attr.ib(type=bool)
content_disposition = attr.ib(type=Optional[str])
content_md5sum = attr.ib(type=Optional[bytes])
packaging = attr.ib(type=Optional[str])
slug = attr.ib(type=Optional[str])
on_behalf_of = attr.ib(type=Optional[str])
metadata_relevant = attr.ib(type=Optional[str])
swhid = attr.ib(type=Optional[str])
@attr.s
class Receipt:
"""Data computed while handling the request body that will be served in the
Deposit Receipt."""
deposit_id = attr.ib(type=int)
deposit_date = attr.ib(type=datetime.datetime)
status = attr.ib(type=str)
archive = attr.ib(type=Optional[str])
def _compute_md5(filehandler: UploadedFile) -> bytes:
h = hashlib.md5()
for chunk in filehandler:
h.update(chunk) # type: ignore
return h.digest()
def get_deposit_by_id(
deposit_id: int, collection_name: Optional[str] = None
) -> Deposit:
"""Gets an existing Deposit object if it exists, or raises `DepositError`.
If `collection` is not None, also checks the deposit belongs to the collection."""
try:
deposit = Deposit.objects.get(pk=deposit_id)
except Deposit.DoesNotExist:
raise DepositError(NOT_FOUND, f"Deposit {deposit_id} does not exist")
if collection_name and deposit.collection.name != collection_name:
get_collection_by_name(collection_name) # raises if does not exist
raise DepositError(
NOT_FOUND,
f"Deposit {deposit_id} does not belong to collection {collection_name}",
)
return deposit
def get_collection_by_name(collection_name: str):
"""Gets an existing Deposit object if it exists, or raises `DepositError`."""
try:
collection = DepositCollection.objects.get(name=collection_name)
except DepositCollection.DoesNotExist:
raise DepositError(NOT_FOUND, f"Unknown collection name {collection_name}")
assert collection is not None
return collection
def guess_deposit_origin_url(deposit: Deposit):
"""Guesses an origin url for the given deposit."""
external_id = deposit.external_id
if not external_id:
# The client provided neither an origin_url nor a slug. That's inconvenient,
# but SWORD requires we support it. So let's generate a random slug.
external_id = str(uuid.uuid4())
return "%s/%s" % (deposit.client.provider_url.rstrip("/"), external_id)
+def check_client_origin(client: DepositClient, origin_url: str):
+ provider_url = client.provider_url.rstrip("/") + "/"
+ if not origin_url.startswith(provider_url):
+ raise DepositError(
+ FORBIDDEN,
+ f"Cannot create origin {origin_url}, it must start with " f"{provider_url}",
+ )
+
+
class AuthenticatedAPIView(APIView):
"""Mixin intended as a based API view to enforce the basic
authentication check
"""
authentication_classes: Sequence[Type[BaseAuthentication]] = (BasicAuthentication,)
permission_classes: Sequence[Type[BasePermission]] = (IsAuthenticated,)
class APIBase(APIConfig, AuthenticatedAPIView, metaclass=ABCMeta):
"""Base deposit request class sharing multiple common behaviors.
"""
_client: Optional[DepositClient] = None
def _read_headers(self, request: Request) -> ParsedRequestHeaders:
"""Read and unify the necessary headers from the request (those are
not stored in the same location or not properly formatted).
Args:
request: Input request
Returns:
Dictionary with the following keys (some associated values may be
None):
- content-type
- content-length
- in-progress
- content-disposition
- packaging
- slug
- on-behalf-of
"""
meta = request._request.META
content_length = meta.get("CONTENT_LENGTH")
if content_length and isinstance(content_length, str):
content_length = int(content_length)
# final deposit if not provided
in_progress = meta.get("HTTP_IN_PROGRESS", False)
if isinstance(in_progress, str):
in_progress = in_progress.lower() == "true"
content_md5sum = meta.get("HTTP_CONTENT_MD5")
if content_md5sum:
content_md5sum = bytes.fromhex(content_md5sum)
return ParsedRequestHeaders(
content_type=request.content_type,
content_length=content_length,
in_progress=in_progress,
content_disposition=meta.get("HTTP_CONTENT_DISPOSITION"),
content_md5sum=content_md5sum,
packaging=meta.get("HTTP_PACKAGING"),
slug=meta.get("HTTP_SLUG"),
on_behalf_of=meta.get("HTTP_ON_BEHALF_OF"),
metadata_relevant=meta.get("HTTP_METADATA_RELEVANT"),
swhid=meta.get("HTTP_X_CHECK_SWHID"),
)
def _deposit_put(self, deposit: Deposit, in_progress: bool = False) -> None:
"""Save/Update a deposit in db.
Args:
deposit: deposit being updated/created
in_progress: deposit status
"""
if in_progress is False:
self._complete_deposit(deposit)
else:
deposit.status = DEPOSIT_STATUS_PARTIAL
deposit.save()
def _complete_deposit(self, deposit: Deposit) -> None:
"""Marks the deposit as 'deposited', then schedule a check task if configured
to do so."""
deposit.complete_date = timezone.now()
deposit.status = DEPOSIT_STATUS_DEPOSITED
deposit.save()
if not deposit.origin_url:
deposit.origin_url = guess_deposit_origin_url(deposit)
if self.config["checks"]:
scheduler = self.scheduler
if deposit.status == DEPOSIT_STATUS_DEPOSITED and not deposit.check_task_id:
task = create_oneshot_task_dict(
"check-deposit",
collection=deposit.collection.name,
deposit_id=deposit.id,
retries_left=3,
)
check_task_id = scheduler.create_tasks([task])[0]["id"]
deposit.check_task_id = check_task_id
deposit.save()
def _deposit_request_put(
self,
deposit: Deposit,
deposit_request_data: Dict[str, Any],
replace_metadata: bool = False,
replace_archives: bool = False,
) -> DepositRequest:
"""Save a deposit request with metadata attached to a deposit.
Args:
deposit: The deposit concerned by the request
deposit_request_data: The dictionary with at most 2 deposit
request types (archive, metadata) to associate to the deposit
replace_metadata: Flag defining if we add or update
existing metadata to the deposit
replace_archives: Flag defining if we add or update
archives to existing deposit
Returns:
the DepositRequest object stored in the backend
"""
if replace_metadata:
DepositRequest.objects.filter(deposit=deposit, type=METADATA_TYPE).delete()
if replace_archives:
DepositRequest.objects.filter(deposit=deposit, type=ARCHIVE_TYPE).delete()
deposit_request = None
archive_file = deposit_request_data.get(ARCHIVE_KEY)
if archive_file:
deposit_request = DepositRequest(
type=ARCHIVE_TYPE, deposit=deposit, archive=archive_file
)
deposit_request.save()
metadata = deposit_request_data.get(METADATA_KEY)
if metadata:
raw_metadata = deposit_request_data[RAW_METADATA_KEY]
deposit_request = DepositRequest(
type=METADATA_TYPE,
deposit=deposit,
metadata=metadata,
raw_metadata=raw_metadata.decode("utf-8"),
)
deposit_request.save()
assert deposit_request is not None
return deposit_request
def _delete_archives(self, collection_name: str, deposit: Deposit) -> Dict:
"""Delete archive references from the deposit id.
"""
DepositRequest.objects.filter(deposit=deposit, type=ARCHIVE_TYPE).delete()
return {}
def _delete_deposit(self, collection_name: str, deposit: Deposit) -> Dict:
"""Delete deposit reference.
Args:
collection_name: Client's collection
deposit: The deposit to delete
Returns
Empty dict when ok.
Dict with error key to describe the failure.
"""
if deposit.collection.name != collection_name:
summary = "Cannot delete a deposit from another collection"
description = "Deposit %s does not belong to the collection %s" % (
deposit.id,
collection_name,
)
raise DepositError(
BAD_REQUEST, summary=summary, verbose_description=description
)
DepositRequest.objects.filter(deposit=deposit).delete()
deposit.delete()
return {}
def _check_file_length(
self, filehandler: UploadedFile, content_length: Optional[int] = None,
) -> None:
"""Check the filehandler passed as argument has exactly the
expected content_length
Args:
filehandler: The file to check
content_length: the expected length if provided.
Raises:
DepositError if the actual length does not match
"""
max_upload_size = self.config["max_upload_size"]
if content_length:
length = filehandler.size
if length != content_length:
raise DepositError(status.HTTP_412_PRECONDITION_FAILED, "Wrong length")
if filehandler.size > max_upload_size:
raise DepositError(
MAX_UPLOAD_SIZE_EXCEEDED,
f"Upload size limit exceeded (max {max_upload_size} bytes)."
"Please consider sending the archive in multiple steps.",
)
def _check_file_md5sum(
self, filehandler: UploadedFile, md5sum: Optional[bytes],
) -> None:
"""Check the filehandler passed as argument has the expected md5sum
Args:
filehandler: The file to check
md5sum: md5 hash expected from the file's content
Raises:
DepositError if the md5sum does not match
"""
if md5sum:
_md5sum = _compute_md5(filehandler)
if _md5sum != md5sum:
raise DepositError(
CHECKSUM_MISMATCH,
"Wrong md5 hash",
f"The checksum sent {hashutil.hash_to_hex(md5sum)} and the actual "
f"checksum {hashutil.hash_to_hex(_md5sum)} does not match.",
)
def _binary_upload(
self,
request: Request,
headers: ParsedRequestHeaders,
collection_name: str,
deposit: Deposit,
replace_metadata: bool = False,
replace_archives: bool = False,
) -> Receipt:
"""Binary upload routine.
Other than such a request, a 415 response is returned.
Args:
request: the request holding information to parse
and inject in db
headers: parsed request headers
collection_name: the associated client
deposit: deposit to be updated
replace_metadata: 'Update or add' request to existing
deposit. If False (default), this adds new metadata request to
existing ones. Otherwise, this will replace existing metadata.
replace_archives: 'Update or add' request to existing
deposit. If False (default), this adds new archive request to
existing ones. Otherwise, this will replace existing archives.
ones.
Raises:
- 400 (bad request) if the request is not providing an external
identifier
- 413 (request entity too large) if the length of the
archive exceeds the max size configured
- 412 (precondition failed) if the length or md5 hash provided
mismatch the reality of the archive
- 415 (unsupported media type) if a wrong media type is provided
"""
content_length = headers.content_length
if not content_length:
raise DepositError(
BAD_REQUEST,
"CONTENT_LENGTH header is mandatory",
"For archive deposit, the CONTENT_LENGTH header must be sent.",
)
content_disposition = headers.content_disposition
if not content_disposition:
raise DepositError(
BAD_REQUEST,
"CONTENT_DISPOSITION header is mandatory",
"For archive deposit, the CONTENT_DISPOSITION header must be sent.",
)
packaging = headers.packaging
if packaging and packaging not in ACCEPT_PACKAGINGS:
raise DepositError(
BAD_REQUEST,
f"Only packaging {ACCEPT_PACKAGINGS} is supported",
f"The packaging provided {packaging} is not supported",
)
filehandler = request.FILES["file"]
assert isinstance(filehandler, UploadedFile), filehandler
self._check_file_length(filehandler, content_length)
self._check_file_md5sum(filehandler, headers.content_md5sum)
# actual storage of data
archive_metadata = filehandler
self._deposit_put(
deposit=deposit, in_progress=headers.in_progress,
)
self._deposit_request_put(
deposit,
{ARCHIVE_KEY: archive_metadata},
replace_metadata=replace_metadata,
replace_archives=replace_archives,
)
return Receipt(
deposit_id=deposit.id,
deposit_date=deposit.reception_date,
status=deposit.status,
archive=filehandler.name,
)
def _read_metadata(self, metadata_stream) -> Tuple[bytes, Dict[str, Any]]:
"""Given a metadata stream, reads the metadata and returns both the
parsed and the raw metadata.
"""
raw_metadata = metadata_stream.read()
metadata = parse_xml(raw_metadata)
return raw_metadata, metadata
def _multipart_upload(
self,
request: Request,
headers: ParsedRequestHeaders,
collection_name: str,
deposit: Deposit,
replace_metadata: bool = False,
replace_archives: bool = False,
) -> Receipt:
"""Multipart upload supported with exactly:
- 1 archive (zip)
- 1 atom entry
Other than such a request, a 415 response is returned.
Args:
request: the request holding information to parse
and inject in db
headers: parsed request headers
collection_name: the associated client
deposit: deposit to be updated
replace_metadata: 'Update or add' request to existing
deposit. If False (default), this adds new metadata request to
existing ones. Otherwise, this will replace existing metadata.
replace_archives: 'Update or add' request to existing
deposit. If False (default), this adds new archive request to
existing ones. Otherwise, this will replace existing archives.
ones.
Raises:
- 400 (bad request) if the request is not providing an external
identifier
- 412 (precondition failed) if the potentially md5 hash provided
mismatch the reality of the archive
- 413 (request entity too large) if the length of the
archive exceeds the max size configured
- 415 (unsupported media type) if a wrong media type is provided
"""
content_types_present = set()
data: Dict[str, Optional[Any]] = {
"application/zip": None, # expected either zip
"application/x-tar": None, # or x-tar
"application/atom+xml": None,
}
for key, value in request.FILES.items():
fh = value
content_type = fh.content_type
if content_type in content_types_present:
raise DepositError(
ERROR_CONTENT,
"Only 1 application/zip (or application/x-tar) archive "
"and 1 atom+xml entry is supported (as per sword2.0 "
"specification)",
"You provided more than 1 application/(zip|x-tar) "
"or more than 1 application/atom+xml content-disposition "
"header in the multipart deposit",
)
content_types_present.add(content_type)
assert content_type is not None
data[content_type] = fh
if len(content_types_present) != 2:
raise DepositError(
ERROR_CONTENT,
"You must provide both 1 application/zip (or "
"application/x-tar) and 1 atom+xml entry for multipart "
"deposit",
"You need to provide only 1 application/(zip|x-tar) "
"and 1 application/atom+xml content-disposition header "
"in the multipart deposit",
)
filehandler = data["application/zip"]
if not filehandler:
filehandler = data["application/x-tar"]
assert isinstance(filehandler, UploadedFile), filehandler
self._check_file_length(filehandler)
self._check_file_md5sum(filehandler, headers.content_md5sum)
try:
raw_metadata, metadata = self._read_metadata(data["application/atom+xml"])
except ParserError:
raise DepositError(
PARSING_ERROR,
"Malformed xml metadata",
"The xml received is malformed. "
"Please ensure your metadata file is correctly formatted.",
)
# actual storage of data
self._deposit_put(
deposit=deposit, in_progress=headers.in_progress,
)
deposit_request_data = {
ARCHIVE_KEY: filehandler,
METADATA_KEY: metadata,
RAW_METADATA_KEY: raw_metadata,
}
self._deposit_request_put(
deposit, deposit_request_data, replace_metadata, replace_archives
)
assert filehandler is not None
return Receipt(
deposit_id=deposit.id,
deposit_date=deposit.reception_date,
archive=filehandler.name,
status=deposit.status,
)
def _store_metadata_deposit(
self,
deposit: Deposit,
swhid_reference: Union[str, SWHID],
metadata: Dict,
raw_metadata: bytes,
deposit_origin: Optional[str] = None,
) -> Tuple[Union[SWHID, str], Union[SWHID, str], Deposit, DepositRequest]:
"""When all user inputs pass the checks, this associates the raw_metadata to the
swhid_reference in the raw extrinsic metadata storage. In case of any issues,
a bad request response is returned to the user with the details.
Checks:
- metadata are technically parsable
- metadata pass the functional checks
- SWHID (if any) is technically valid
Args:
deposit: Deposit reference
swhid_reference: The swhid or the origin to attach metadata information to
metadata: Full dict of metadata to check for validity (parsed out of
raw_metadata)
raw_metadata: The actual raw metadata to send in the storage metadata
deposit_origin: Optional deposit origin url to use if any (e.g. deposit
update scenario provides one)
Raises:
DepositError in case of incorrect inputs from the deposit client
(e.g. functionally invalid metadata, ...)
Returns:
Tuple of core swhid, swhid context, deposit and deposit request
"""
metadata_ok, error_details = check_metadata(metadata)
if not metadata_ok:
assert error_details, "Details should be set when a failure occurs"
raise DepositError(
BAD_REQUEST,
"Functional metadata checks failure",
convert_status_detail(error_details),
)
metadata_authority = MetadataAuthority(
type=MetadataAuthorityType.DEPOSIT_CLIENT,
url=deposit.client.provider_url,
metadata={"name": deposit.client.last_name},
)
metadata_fetcher = MetadataFetcher(
name=self.tool["name"],
version=self.tool["version"],
metadata=self.tool["configuration"],
)
# replace metadata within the deposit backend
deposit_request_data = {
METADATA_KEY: metadata,
RAW_METADATA_KEY: raw_metadata,
}
# actually add the metadata to the completed deposit
deposit_request = self._deposit_request_put(deposit, deposit_request_data)
object_type, metadata_context = compute_metadata_context(swhid_reference)
if deposit_origin: # metadata deposit update on completed deposit
metadata_context["origin"] = deposit_origin
swhid_core: Union[str, SWHID]
if isinstance(swhid_reference, str):
swhid_core = swhid_reference
else:
swhid_core = attr.evolve(swhid_reference, metadata={})
# store that metadata to the metadata storage
metadata_object = RawExtrinsicMetadata(
type=object_type,
target=swhid_core, # core swhid or origin
discovery_date=deposit_request.date,
authority=metadata_authority,
fetcher=metadata_fetcher,
format="sword-v2-atom-codemeta",
metadata=raw_metadata,
**metadata_context,
)
# write to metadata storage
self.storage_metadata.metadata_authority_add([metadata_authority])
self.storage_metadata.metadata_fetcher_add([metadata_fetcher])
self.storage_metadata.raw_extrinsic_metadata_add([metadata_object])
return (swhid_core, swhid_reference, deposit, deposit_request)
def _atom_entry(
self,
request: Request,
headers: ParsedRequestHeaders,
collection_name: str,
deposit: Deposit,
replace_metadata: bool = False,
replace_archives: bool = False,
) -> Receipt:
"""Atom entry deposit.
Args:
request: the request holding information to parse
and inject in db
headers: parsed request headers
collection_name: the associated client
deposit: deposit to be updated
replace_metadata: 'Update or add' request to existing
deposit. If False (default), this adds new metadata request to
existing ones. Otherwise, this will replace existing metadata.
replace_archives: 'Update or add' request to existing
deposit. If False (default), this adds new archive request to
existing ones. Otherwise, this will replace existing archives.
ones.
Raises:
- 400 (bad request) if the request is not providing an external
identifier
- 400 (bad request) if the request's body is empty
- 415 (unsupported media type) if a wrong media type is provided
"""
try:
raw_metadata, metadata = self._read_metadata(request.data)
except ParserError:
raise DepositError(
BAD_REQUEST,
"Malformed xml metadata",
"The xml received is malformed. "
"Please ensure your metadata file is correctly formatted.",
)
if metadata is None:
raise DepositError(
BAD_REQUEST,
"Empty body request is not supported",
"Atom entry deposit is supposed to send for metadata. "
"If the body is empty, there is no metadata.",
)
create_origin = metadata.get("swh:deposit", {}).get("swh:create_origin")
+ add_to_origin = metadata.get("swh:deposit", {}).get("swh:add_to_origin")
+
+ if create_origin and add_to_origin:
+ raise DepositError(
+ BAD_REQUEST,
+ " and are mutually exclusive, "
+ "as they respectively create a new origin and add to an existing "
+ "origin.",
+ )
+
if create_origin:
origin_url = create_origin["swh:origin"]["@url"]
- if origin_url is not None:
- provider_url = deposit.client.provider_url.rstrip("/") + "/"
- if not origin_url.startswith(provider_url):
- raise DepositError(
- FORBIDDEN,
- f"Cannot create origin {origin_url}, it must start with "
- f"{provider_url}",
- )
+ check_client_origin(deposit.client, origin_url)
deposit.origin_url = origin_url
+ if add_to_origin:
+ origin_url = add_to_origin["swh:origin"]["@url"]
+ check_client_origin(deposit.client, origin_url)
+ deposit.parent = (
+ Deposit.objects.filter(
+ client=deposit.client,
+ origin_url=origin_url,
+ status=DEPOSIT_STATUS_LOAD_SUCCESS,
+ )
+ .order_by("-id")[0:1]
+ .get()
+ )
+
if "atom:external_identifier" in metadata:
# Deprecated tag.
# When clients stopped using it, this should raise an error
# unconditionally
if deposit.origin_url:
raise DepositError(
BAD_REQUEST,
" is deprecated, you should only use "
" from now on.",
)
+ if deposit.parent:
+ raise DepositError(
+ BAD_REQUEST, " is deprecated.",
+ )
+
if headers.slug and metadata["atom:external_identifier"] != headers.slug:
raise DepositError(
BAD_REQUEST,
"The 'external_identifier' tag is deprecated, "
"the Slug header should be used instead.",
)
# Determine if we are in the metadata-only deposit case
try:
swhid = parse_swh_reference(metadata)
except ValidationError as e:
raise DepositError(
PARSING_ERROR, "Invalid SWHID reference", str(e),
)
if swhid is not None and (
deposit.origin_url or deposit.parent or deposit.external_id
):
raise DepositError(
BAD_REQUEST,
" is for metadata-only deposits and "
- " / Slug are for code deposits, "
- "only one may be used on a given deposit.",
+ " / / Slug are for "
+ "code deposits, only one may be used on a given deposit.",
)
self._deposit_put(
deposit=deposit, in_progress=headers.in_progress,
)
if swhid is not None:
swhid, swhid_ref, depo, depo_request = self._store_metadata_deposit(
deposit, swhid, metadata, raw_metadata
)
deposit.status = DEPOSIT_STATUS_LOAD_SUCCESS
if isinstance(swhid_ref, SWHID):
deposit.swhid = str(swhid)
deposit.swhid_context = str(swhid_ref)
deposit.complete_date = depo_request.date
deposit.reception_date = depo_request.date
deposit.save()
return Receipt(
deposit_id=deposit.id,
deposit_date=depo_request.date,
status=deposit.status,
archive=None,
)
self._deposit_request_put(
deposit,
{METADATA_KEY: metadata, RAW_METADATA_KEY: raw_metadata},
replace_metadata,
replace_archives,
)
return Receipt(
deposit_id=deposit.id,
deposit_date=deposit.reception_date,
status=deposit.status,
archive=None,
)
def _empty_post(
self,
request: Request,
headers: ParsedRequestHeaders,
collection_name: str,
deposit: Deposit,
) -> Receipt:
"""Empty post to finalize a deposit.
Args:
request: the request holding information to parse
and inject in db
headers: parsed request headers
collection_name: the associated client
deposit: deposit to be finalized
"""
self._complete_deposit(deposit)
assert deposit.complete_date is not None
return Receipt(
deposit_id=deposit.id,
deposit_date=deposit.complete_date,
status=deposit.status,
archive=None,
)
def additional_checks(
self,
request: Request,
headers: ParsedRequestHeaders,
collection_name: str,
deposit: Optional[Deposit],
) -> Dict[str, Any]:
"""Permit the child class to enrich additional checks.
Returns:
dict with 'error' detailing the problem.
"""
return {}
def get_client(self, request) -> DepositClient:
# This class depends on AuthenticatedAPIView, so request.user.username
# is always set
username = request.user.username
assert username is not None
if self._client is None:
try:
self._client = DepositClient.objects.get( # type: ignore
username=username
)
except DepositClient.DoesNotExist:
raise DepositError(NOT_FOUND, f"Unknown client name {username}")
assert self._client.username == username
return self._client
def checks(
self, request: Request, collection_name: str, deposit: Optional[Deposit] = None
) -> ParsedRequestHeaders:
if deposit is None:
collection = get_collection_by_name(collection_name)
else:
assert collection_name == deposit.collection.name
collection = deposit.collection
client = self.get_client(request)
collection_id = collection.id
collections = client.collections
assert collections is not None
if collection_id not in collections:
raise DepositError(
FORBIDDEN,
f"Client {client.username} cannot access collection {collection_name}",
)
headers = self._read_headers(request)
if deposit is not None:
self.restrict_access(request, headers, deposit)
if headers.on_behalf_of:
raise DepositError(MEDIATION_NOT_ALLOWED, "Mediation is not supported.")
self.additional_checks(request, headers, collection_name, deposit)
return headers
def restrict_access(
self, request: Request, headers: ParsedRequestHeaders, deposit: Deposit
) -> None:
"""Allow modifications on deposit with status 'partial' only, reject the rest.
"""
if request.method != "GET" and deposit.status != DEPOSIT_STATUS_PARTIAL:
summary = "You can only act on deposit with status '%s'" % (
DEPOSIT_STATUS_PARTIAL,
)
description = f"This deposit has status '{deposit.status}'"
raise DepositError(
BAD_REQUEST, summary=summary, verbose_description=description
)
def _basic_not_allowed_method(self, request: Request, method: str):
raise DepositError(
METHOD_NOT_ALLOWED, f"{method} method is not supported on this endpoint",
)
def get(
self, request: Request, collection_name: str, deposit_id: int
) -> Union[HttpResponse, FileResponse]:
return self._basic_not_allowed_method(request, "GET")
def post(
self, request: Request, collection_name: str, deposit_id: Optional[int] = None
) -> HttpResponse:
return self._basic_not_allowed_method(request, "POST")
def put(
self, request: Request, collection_name: str, deposit_id: int
) -> HttpResponse:
return self._basic_not_allowed_method(request, "PUT")
def delete(
self, request: Request, collection_name: str, deposit_id: Optional[int] = None
) -> HttpResponse:
return self._basic_not_allowed_method(request, "DELETE")
class APIGet(APIBase, metaclass=ABCMeta):
"""Mixin for class to support GET method.
"""
def get(
self, request: Request, collection_name: str, deposit_id: int
) -> Union[HttpResponse, FileResponse]:
"""Endpoint to create/add resources to deposit.
Returns:
200 response when no error during routine occurred
400 if the deposit does not belong to the collection
404 if the deposit or the collection does not exist
"""
deposit = get_deposit_by_id(deposit_id, collection_name)
self.checks(request, collection_name, deposit)
r = self.process_get(request, collection_name, deposit)
status, content, content_type = r
if content_type == "swh/generator":
with content as path:
return FileResponse(
open(path, "rb"), status=status, content_type="application/zip"
)
if content_type == "application/json":
return HttpResponse(
json.dumps(content), status=status, content_type=content_type
)
return HttpResponse(content, status=status, content_type=content_type)
@abstractmethod
def process_get(
self, request: Request, collection_name: str, deposit: Deposit
) -> Tuple[int, Any, str]:
"""Routine to deal with the deposit's get processing.
Returns:
Tuple status, stream of content, content-type
"""
pass
class APIPost(APIBase, metaclass=ABCMeta):
"""Mixin for class to support POST method.
"""
def post(
self, request: Request, collection_name: str, deposit_id: Optional[int] = None
) -> HttpResponse:
"""Endpoint to create/add resources to deposit.
Returns:
204 response when no error during routine occurred.
400 if the deposit does not belong to the collection
404 if the deposit or the collection does not exist
"""
if deposit_id is None:
deposit = None
else:
deposit = get_deposit_by_id(deposit_id, collection_name)
headers = self.checks(request, collection_name, deposit)
status, iri_key, receipt = self.process_post(
request, headers, collection_name, deposit
)
return self._make_deposit_receipt(
request, collection_name, status, iri_key, receipt,
)
def _make_deposit_receipt(
self,
request,
collection_name: str,
status: int,
iri_key: str,
receipt: Receipt,
) -> HttpResponse:
"""Returns an HttpResponse with a SWORD Deposit receipt as content."""
# Build the IRIs in the receipt
args = [collection_name, receipt.deposit_id]
iris = {
iri: request.build_absolute_uri(reverse(iri, args=args))
for iri in [EM_IRI, EDIT_IRI, CONT_FILE_IRI, SE_IRI, STATE_IRI]
}
context = {
**attr.asdict(receipt),
**iris,
"packagings": ACCEPT_PACKAGINGS,
}
response = render(
request,
"deposit/deposit_receipt.xml",
context=context,
content_type="application/xml",
status=status,
)
response._headers["location"] = "Location", iris[iri_key] # type: ignore
return response
@abstractmethod
def process_post(
self,
request,
headers: ParsedRequestHeaders,
collection_name: str,
deposit: Optional[Deposit] = None,
) -> Tuple[int, str, Receipt]:
"""Routine to deal with the deposit's processing.
Returns
Tuple of:
- response status code (200, 201, etc...)
- key iri (EM_IRI, EDIT_IRI, etc...)
- Receipt
"""
pass
class APIPut(APIBase, metaclass=ABCMeta):
"""Mixin for class to support PUT method.
"""
def put(
self, request: Request, collection_name: str, deposit_id: int
) -> HttpResponse:
"""Endpoint to update deposit resources.
Returns:
204 response when no error during routine occurred.
400 if the deposit does not belong to the collection
404 if the deposit or the collection does not exist
"""
if deposit_id is None:
deposit = None
else:
deposit = get_deposit_by_id(deposit_id, collection_name)
headers = self.checks(request, collection_name, deposit)
self.process_put(request, headers, collection_name, deposit)
return HttpResponse(status=status.HTTP_204_NO_CONTENT)
@abstractmethod
def process_put(
self,
request: Request,
headers: ParsedRequestHeaders,
collection_name: str,
deposit: Deposit,
) -> None:
"""Routine to deal with updating a deposit in some way.
Returns
dictionary of the processing result
"""
pass
class APIDelete(APIBase, metaclass=ABCMeta):
"""Mixin for class to support DELETE method.
"""
def delete(
self, request: Request, collection_name: str, deposit_id: Optional[int] = None
) -> HttpResponse:
"""Endpoint to delete some deposit's resources (archives, deposit).
Returns:
204 response when no error during routine occurred.
400 if the deposit does not belong to the collection
404 if the deposit or the collection does not exist
"""
assert deposit_id is not None
deposit = get_deposit_by_id(deposit_id, collection_name)
self.checks(request, collection_name, deposit)
self.process_delete(request, collection_name, deposit)
return HttpResponse(status=status.HTTP_204_NO_CONTENT)
@abstractmethod
def process_delete(
self, request: Request, collection_name: str, deposit: Deposit
) -> None:
"""Routine to delete a resource.
This is mostly not allowed except for the
EM_IRI (cf. .api.deposit_update.APIUpdateArchive)
"""
pass
diff --git a/swh/deposit/tests/api/test_collection.py b/swh/deposit/tests/api/test_collection.py
index e6842523..5ab2cc86 100644
--- a/swh/deposit/tests/api/test_collection.py
+++ b/swh/deposit/tests/api/test_collection.py
@@ -1,303 +1,399 @@
# Copyright (C) 2017-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import hashlib
from io import BytesIO
from django.urls import reverse
from rest_framework import status
from swh.deposit.config import (
COL_IRI,
DEPOSIT_STATUS_LOAD_FAILURE,
DEPOSIT_STATUS_LOAD_SUCCESS,
DEPOSIT_STATUS_PARTIAL,
DEPOSIT_STATUS_REJECTED,
SE_IRI,
)
from swh.deposit.models import Deposit
from swh.deposit.parsers import parse_xml
from ..conftest import create_deposit
def test_deposit_post_will_fail_with_401(client):
"""Without authentication, endpoint refuses access with 401 response
"""
url = reverse(COL_IRI, args=["hal"])
response = client.post(url)
assert response.status_code == status.HTTP_401_UNAUTHORIZED
def test_access_to_another_user_collection_is_forbidden(
authenticated_client, deposit_another_collection, deposit_user
):
"""Access to another user collection should return a 403
"""
coll2 = deposit_another_collection
url = reverse(COL_IRI, args=[coll2.name])
response = authenticated_client.post(url)
assert response.status_code == status.HTTP_403_FORBIDDEN
msg = "Client %s cannot access collection %s" % (deposit_user.username, coll2.name,)
assert msg in response.content.decode("utf-8")
def test_delete_on_col_iri_not_supported(authenticated_client, deposit_collection):
"""Delete on col iri should return a 405 response
"""
url = reverse(COL_IRI, args=[deposit_collection.name])
response = authenticated_client.delete(url)
assert response.status_code == status.HTTP_405_METHOD_NOT_ALLOWED
assert "DELETE method is not supported on this endpoint" in response.content.decode(
"utf-8"
)
def create_deposit_with_rejection_status(authenticated_client, deposit_collection):
url = reverse(COL_IRI, args=[deposit_collection.name])
data = b"some data which is clearly not a zip file"
md5sum = hashlib.md5(data).hexdigest()
external_id = "some-external-id-1"
# when
response = authenticated_client.post(
url,
content_type="application/zip", # as zip
data=data,
# + headers
CONTENT_LENGTH=len(data),
# other headers needs HTTP_ prefix to be taken into account
HTTP_SLUG=external_id,
HTTP_CONTENT_MD5=md5sum,
HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip",
HTTP_CONTENT_DISPOSITION="attachment; filename=filename0",
)
assert response.status_code == status.HTTP_201_CREATED
response_content = parse_xml(BytesIO(response.content))
actual_state = response_content["deposit_status"]
assert actual_state == DEPOSIT_STATUS_REJECTED
def test_act_on_deposit_rejected_is_not_permitted(
authenticated_client, deposit_collection, rejected_deposit, atom_dataset
):
deposit = rejected_deposit
response = authenticated_client.post(
reverse(SE_IRI, args=[deposit.collection.name, deposit.id]),
content_type="application/atom+xml;type=entry",
data=atom_dataset["entry-data1"],
HTTP_SLUG=deposit.external_id,
)
assert response.status_code == status.HTTP_400_BAD_REQUEST
msg = "You can only act on deposit with status '%s'" % (
DEPOSIT_STATUS_PARTIAL,
)
assert msg in response.content.decode("utf-8")
def test_add_deposit_when_partial_makes_new_deposit(
authenticated_client,
deposit_collection,
partial_deposit,
atom_dataset,
deposit_user,
):
"""Posting deposit on collection when previous is partial makes new deposit
"""
deposit = partial_deposit
assert deposit.status == DEPOSIT_STATUS_PARTIAL
origin_url = deposit_user.provider_url + deposit.external_id
# adding a new deposit with the same external id
response = authenticated_client.post(
reverse(COL_IRI, args=[deposit_collection.name]),
content_type="application/atom+xml;type=entry",
data=atom_dataset["entry-data0"] % origin_url,
HTTP_SLUG=deposit.external_id,
)
assert response.status_code == status.HTTP_201_CREATED, response.content.decode()
response_content = parse_xml(BytesIO(response.content))
deposit_id = response_content["swh:deposit_id"]
assert deposit_id != deposit.id # new deposit
new_deposit = Deposit.objects.get(pk=deposit_id)
assert new_deposit != deposit
assert new_deposit.parent is None
def test_add_deposit_when_failed_makes_new_deposit_with_no_parent(
authenticated_client, deposit_collection, failed_deposit, atom_dataset, deposit_user
):
"""Posting deposit on collection when deposit done makes new deposit with
parent
"""
deposit = failed_deposit
assert deposit.status == DEPOSIT_STATUS_LOAD_FAILURE
origin_url = deposit_user.provider_url + deposit.external_id
# adding a new deposit with the same external id as a completed deposit
# creates the parenting chain
response = authenticated_client.post(
reverse(COL_IRI, args=[deposit_collection.name]),
content_type="application/atom+xml;type=entry",
data=atom_dataset["entry-data0"] % origin_url,
HTTP_SLUG=deposit.external_id,
)
assert response.status_code == status.HTTP_201_CREATED
response_content = parse_xml(BytesIO(response.content))
deposit_id = response_content["swh:deposit_id"]
assert deposit_id != deposit.id
new_deposit = Deposit.objects.get(pk=deposit_id)
assert new_deposit != deposit
assert new_deposit.parent is None
def test_add_deposit_when_done_makes_new_deposit_with_parent_old_one(
authenticated_client,
deposit_collection,
completed_deposit,
atom_dataset,
deposit_user,
):
"""Posting deposit on collection when deposit done makes new deposit with
parent
"""
# given multiple deposit already loaded
deposit = completed_deposit
assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS
origin_url = deposit_user.provider_url + deposit.external_id
# adding a new deposit with the same external id as a completed deposit
# creates the parenting chain
response = authenticated_client.post(
reverse(COL_IRI, args=[deposit_collection.name]),
content_type="application/atom+xml;type=entry",
data=atom_dataset["entry-data0"] % origin_url,
HTTP_SLUG=deposit.external_id,
)
assert response.status_code == status.HTTP_201_CREATED
response_content = parse_xml(BytesIO(response.content))
deposit_id = response_content["swh:deposit_id"]
assert deposit_id != deposit.id
new_deposit = Deposit.objects.get(pk=deposit_id)
assert deposit.collection == new_deposit.collection
assert deposit.origin_url == origin_url
assert new_deposit != deposit
assert new_deposit.parent == deposit
+def test_add_deposit_with_add_to_origin(
+ authenticated_client,
+ deposit_collection,
+ completed_deposit,
+ atom_dataset,
+ deposit_user,
+):
+ """Posting deposit with creates a new deposit with parent
+
+ """
+ # given multiple deposit already loaded
+ deposit = completed_deposit
+ assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS
+ origin_url = deposit_user.provider_url + deposit.external_id
+
+ # adding a new deposit with the same external id as a completed deposit
+ # creates the parenting chain
+ response = authenticated_client.post(
+ reverse(COL_IRI, args=[deposit_collection.name]),
+ content_type="application/atom+xml;type=entry",
+ data=atom_dataset["entry-data-with-add-to-origin"] % origin_url,
+ )
+
+ assert response.status_code == status.HTTP_201_CREATED
+ response_content = parse_xml(BytesIO(response.content))
+ deposit_id = response_content["swh:deposit_id"]
+
+ assert deposit_id != deposit.id
+
+ new_deposit = Deposit.objects.get(pk=deposit_id)
+ assert deposit.collection == new_deposit.collection
+ assert deposit.origin_url == origin_url
+
+ assert new_deposit != deposit
+ assert new_deposit.parent == deposit
+
+
def test_add_deposit_external_id_conflict_no_parent(
authenticated_client,
another_authenticated_client,
deposit_collection,
deposit_another_collection,
atom_dataset,
sample_archive,
deposit_user,
):
"""Posting a deposit with an external_id conflicting with an external_id
of a different client does not create a parent relationship
"""
external_id = "foobar"
origin_url = deposit_user.provider_url + external_id
# create a deposit for that other user, with the same slug
other_deposit = create_deposit(
another_authenticated_client,
deposit_another_collection.name,
sample_archive,
external_id,
DEPOSIT_STATUS_LOAD_SUCCESS,
)
# adding a new deposit with the same external id as a completed deposit
response = authenticated_client.post(
reverse(COL_IRI, args=[deposit_collection.name]),
content_type="application/atom+xml;type=entry",
data=atom_dataset["entry-data0"] % origin_url,
HTTP_SLUG=external_id,
)
assert response.status_code == status.HTTP_201_CREATED
response_content = parse_xml(BytesIO(response.content))
deposit_id = response_content["swh:deposit_id"]
assert other_deposit.id != deposit_id
new_deposit = Deposit.objects.get(pk=deposit_id)
assert new_deposit.parent is None
def test_add_deposit_external_id_conflict_with_parent(
authenticated_client,
another_authenticated_client,
deposit_collection,
deposit_another_collection,
completed_deposit,
atom_dataset,
sample_archive,
deposit_user,
):
"""Posting a deposit with an external_id conflicting with an external_id
of a different client creates a parent relationship with the deposit
of the right client instead of the last matching deposit
+ This test does not have an equivalent for origin url conflicts, as these
+ can not happen (assuming clients do not have provider_url overlaps)
"""
# given multiple deposit already loaded
deposit = completed_deposit
assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS
origin_url = deposit_user.provider_url + deposit.external_id
# create a deposit for that other user, with the same slug
other_deposit = create_deposit(
another_authenticated_client,
deposit_another_collection.name,
sample_archive,
deposit.external_id,
DEPOSIT_STATUS_LOAD_SUCCESS,
)
# adding a new deposit with the same external id as a completed deposit
response = authenticated_client.post(
reverse(COL_IRI, args=[deposit_collection.name]),
content_type="application/atom+xml;type=entry",
data=atom_dataset["entry-data0"] % origin_url,
HTTP_SLUG=deposit.external_id,
)
assert response.status_code == status.HTTP_201_CREATED
response_content = parse_xml(BytesIO(response.content))
deposit_id = response_content["swh:deposit_id"]
assert deposit_id != deposit.id
assert other_deposit.id != deposit.id
new_deposit = Deposit.objects.get(pk=deposit_id)
assert deposit.collection == new_deposit.collection
assert deposit.external_id == new_deposit.external_id
assert new_deposit != deposit
assert new_deposit.parent == deposit
+
+
+def test_add_deposit_add_to_origin_conflict(
+ authenticated_client,
+ another_authenticated_client,
+ deposit_collection,
+ deposit_another_collection,
+ atom_dataset,
+ sample_archive,
+ deposit_user,
+ deposit_another_user,
+):
+ """Posting a deposit with an referencing an origin
+ owned by a different client raises an error
+
+ """
+ external_id = "foobar"
+ origin_url = deposit_another_user.provider_url + external_id
+
+ # create a deposit for that other user, with the same slug
+ create_deposit(
+ another_authenticated_client,
+ deposit_another_collection.name,
+ sample_archive,
+ external_id,
+ DEPOSIT_STATUS_LOAD_SUCCESS,
+ )
+
+ # adding a new deposit with the same external id as a completed deposit
+ response = authenticated_client.post(
+ reverse(COL_IRI, args=[deposit_collection.name]),
+ content_type="application/atom+xml;type=entry",
+ data=atom_dataset["entry-data0"] % origin_url,
+ )
+
+ assert response.status_code == status.HTTP_403_FORBIDDEN
+ assert b"must start with" in response.content
+
+
+def test_add_deposit_add_to_wrong_origin(
+ authenticated_client, deposit_collection, atom_dataset, sample_archive,
+):
+ """Posting a deposit with an referencing an origin
+ not starting with the provider_url raises an error
+
+ """
+ origin_url = "http://example.org/foo"
+
+ # adding a new deposit with the same external id as a completed deposit
+ response = authenticated_client.post(
+ reverse(COL_IRI, args=[deposit_collection.name]),
+ content_type="application/atom+xml;type=entry",
+ data=atom_dataset["entry-data0"] % origin_url,
+ )
+
+ assert response.status_code == status.HTTP_403_FORBIDDEN
+ assert b"must start with" in response.content
diff --git a/swh/deposit/tests/api/test_collection_post_atom.py b/swh/deposit/tests/api/test_collection_post_atom.py
index 3fbf3ba7..e887fd79 100644
--- a/swh/deposit/tests/api/test_collection_post_atom.py
+++ b/swh/deposit/tests/api/test_collection_post_atom.py
@@ -1,436 +1,503 @@
# Copyright (C) 2017-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
"""Tests the handling of the Atom content when doing a POST Col-IRI."""
from io import BytesIO
import uuid
from django.urls import reverse
import pytest
from rest_framework import status
from swh.deposit.config import COL_IRI, DEPOSIT_STATUS_DEPOSITED
from swh.deposit.models import Deposit, DepositCollection, DepositRequest
from swh.deposit.parsers import parse_xml
def test_post_deposit_atom_201_even_with_decimal(
authenticated_client, deposit_collection, atom_dataset
):
"""Posting an initial atom entry should return 201 with deposit receipt
"""
atom_error_with_decimal = atom_dataset["error-with-decimal"]
response = authenticated_client.post(
reverse(COL_IRI, args=[deposit_collection.name]),
content_type="application/atom+xml;type=entry",
data=atom_error_with_decimal,
HTTP_SLUG="external-id",
HTTP_IN_PROGRESS="false",
)
# then
assert response.status_code == status.HTTP_201_CREATED, response.content.decode()
response_content = parse_xml(BytesIO(response.content))
deposit_id = response_content["swh:deposit_id"]
deposit = Deposit.objects.get(pk=deposit_id)
dr = DepositRequest.objects.get(deposit=deposit)
assert dr.metadata is not None
sw_version = dr.metadata.get("codemeta:softwareVersion")
assert sw_version == "10.4"
def test_post_deposit_atom_400_with_empty_body(
authenticated_client, deposit_collection, atom_dataset
):
"""Posting empty body request should return a 400 response
"""
atom_content = atom_dataset["entry-data-empty-body"]
response = authenticated_client.post(
reverse(COL_IRI, args=[deposit_collection.name]),
content_type="application/atom+xml;type=entry",
data=atom_content,
HTTP_SLUG="external-id",
)
assert response.status_code == status.HTTP_400_BAD_REQUEST
assert b"Empty body request is not supported" in response.content
def test_post_deposit_atom_400_badly_formatted_atom(
authenticated_client, deposit_collection, atom_dataset
):
"""Posting a badly formatted atom should return a 400 response
"""
response = authenticated_client.post(
reverse(COL_IRI, args=[deposit_collection.name]),
content_type="application/atom+xml;type=entry",
data=atom_dataset["entry-data-badly-formatted"],
HTTP_SLUG="external-id",
)
assert response.status_code == status.HTTP_400_BAD_REQUEST
assert b"Malformed xml metadata" in response.content
def test_post_deposit_atom_parsing_error(
authenticated_client, deposit_collection, atom_dataset
):
"""Posting parsing error prone atom should return 400
"""
response = authenticated_client.post(
reverse(COL_IRI, args=[deposit_collection.name]),
content_type="application/atom+xml;type=entry",
data=atom_dataset["entry-data-parsing-error-prone"],
HTTP_SLUG="external-id",
)
assert response.status_code == status.HTTP_400_BAD_REQUEST
assert b"Malformed xml metadata" in response.content
-def test_post_deposit_atom_403_wrong_origin_url_prefix(
+def test_post_deposit_atom_400_both_create_origin_and_add_to_origin(
+ authenticated_client, deposit_collection, atom_dataset
+):
+ """Posting a badly formatted atom should return a 400 response
+
+ """
+ response = authenticated_client.post(
+ reverse(COL_IRI, args=[deposit_collection.name]),
+ content_type="application/atom+xml;type=entry",
+ data=atom_dataset["entry-data-with-both-create-origin-and-add-to-origin"],
+ )
+ assert response.status_code == status.HTTP_400_BAD_REQUEST
+ assert (
+ b"<swh:create_origin> and <swh:add_to_origin> "
+ b"are mutually exclusive"
+ ) in response.content
+
+
+def test_add_deposit_with_add_to_origin_and_external_identifier(
+ authenticated_client,
+ deposit_collection,
+ completed_deposit,
+ atom_dataset,
+ deposit_user,
+):
+ """Posting deposit with creates a new deposit with parent
+
+ """
+ # given multiple deposit already loaded
+ origin_url = deposit_user.provider_url + completed_deposit.external_id
+
+ # adding a new deposit with the same external id as a completed deposit
+ # creates the parenting chain
+ response = authenticated_client.post(
+ reverse(COL_IRI, args=[deposit_collection.name]),
+ content_type="application/atom+xml;type=entry",
+ data=atom_dataset["entry-data-with-both-add-to-origin-and-external-id"]
+ % origin_url,
+ )
+
+ assert response.status_code == status.HTTP_400_BAD_REQUEST
+ assert b"<external_identifier> is deprecated." in response.content
+
+
+def test_post_deposit_atom_403_create_wrong_origin_url_prefix(
authenticated_client, deposit_collection, atom_dataset, deposit_user
):
"""Creating an origin for a prefix not owned by the client is forbidden
"""
origin_url = "http://example.org/foo"
response = authenticated_client.post(
reverse(COL_IRI, args=[deposit_collection.name]),
content_type="application/atom+xml;type=entry",
data=atom_dataset["entry-data0"] % origin_url,
HTTP_IN_PROGRESS="true",
)
assert response.status_code == status.HTTP_403_FORBIDDEN
expected_msg = (
f"Cannot create origin {origin_url}, "
f"it must start with {deposit_user.provider_url}"
)
assert expected_msg in response.content.decode()
+def test_post_deposit_atom_403_add_to_wrong_origin_url_prefix(
+ authenticated_client, deposit_collection, atom_dataset, deposit_user
+):
+ """Creating an origin for a prefix not owned by the client is forbidden
+
+ """
+ origin_url = "http://example.org/foo"
+
+ response = authenticated_client.post(
+ reverse(COL_IRI, args=[deposit_collection.name]),
+ content_type="application/atom+xml;type=entry",
+ data=atom_dataset["entry-data-with-add-to-origin"] % origin_url,
+ HTTP_IN_PROGRESS="true",
+ )
+ assert response.status_code == status.HTTP_403_FORBIDDEN
+ expected_msg = (
+ f"Cannot create origin {origin_url}, "
+ f"it must start with {deposit_user.provider_url}"
+ )
+ assert expected_msg in response.content.decode()
+
+
def test_post_deposit_atom_use_slug_header(
authenticated_client, deposit_collection, deposit_user, atom_dataset, mocker
):
"""Posting an atom entry with a slug header but no origin url generates
an origin url from the slug
"""
url = reverse(COL_IRI, args=[deposit_collection.name])
slug = str(uuid.uuid4())
# when
response = authenticated_client.post(
url,
content_type="application/atom+xml;type=entry",
data=atom_dataset["entry-data-no-origin-url"],
HTTP_IN_PROGRESS="false",
HTTP_SLUG=slug,
)
assert response.status_code == status.HTTP_201_CREATED
response_content = parse_xml(BytesIO(response.content))
deposit_id = response_content["swh:deposit_id"]
deposit = Deposit.objects.get(pk=deposit_id)
assert deposit.collection == deposit_collection
assert deposit.origin_url == deposit_user.provider_url + slug
assert deposit.status == DEPOSIT_STATUS_DEPOSITED
def test_post_deposit_atom_no_origin_url_nor_slug_header(
authenticated_client, deposit_collection, deposit_user, atom_dataset, mocker
):
"""Posting an atom entry without an origin url or a slug header should generate one
"""
url = reverse(COL_IRI, args=[deposit_collection.name])
slug = str(uuid.uuid4())
mocker.patch("uuid.uuid4", return_value=slug)
# when
response = authenticated_client.post(
url,
content_type="application/atom+xml;type=entry",
data=atom_dataset["entry-data-no-origin-url"],
# + headers
HTTP_IN_PROGRESS="false",
)
assert response.status_code == status.HTTP_201_CREATED
response_content = parse_xml(BytesIO(response.content))
deposit_id = response_content["swh:deposit_id"]
deposit = Deposit.objects.get(pk=deposit_id)
assert deposit.collection == deposit_collection
assert deposit.origin_url == deposit_user.provider_url + slug
assert deposit.status == DEPOSIT_STATUS_DEPOSITED
-def test_post_deposit_atom_with_external_identifier(
+def test_post_deposit_atom_with_mismatched_slug_and_external_identifier(
authenticated_client, deposit_collection, atom_dataset
):
- """Posting an atom entry without a slug header should return a 400
+ """Posting an atom entry with mismatched slug header and external_identifier
+ should return a 400
"""
external_id = "foobar"
url = reverse(COL_IRI, args=[deposit_collection.name])
# when
response = authenticated_client.post(
url,
content_type="application/atom+xml;type=entry",
data=atom_dataset["error-with-external-identifier"] % external_id,
# + headers
HTTP_IN_PROGRESS="false",
HTTP_SLUG="something",
)
assert b"The 'external_identifier' tag is deprecated" in response.content
assert response.status_code == status.HTTP_400_BAD_REQUEST
def test_post_deposit_atom_with_create_origin_and_external_identifier(
authenticated_client, deposit_collection, atom_dataset, deposit_user
):
""" was deprecated before
was introduced, clients should get an error when trying to use both
"""
external_id = "foobar"
origin_url = deposit_user.provider_url + external_id
url = reverse(COL_IRI, args=[deposit_collection.name])
document = atom_dataset["error-with-external-identifier-and-create-origin"].format(
external_id=external_id, url=origin_url,
)
# when
response = authenticated_client.post(
url,
content_type="application/atom+xml;type=entry",
data=document,
# + headers
HTTP_IN_PROGRESS="false",
)
assert b"<external_identifier> is deprecated" in response.content
assert response.status_code == status.HTTP_400_BAD_REQUEST
def test_post_deposit_atom_with_create_origin_and_reference(
authenticated_client, deposit_collection, atom_dataset, deposit_user
):
""" and are mutually exclusive
"""
external_id = "foobar"
origin_url = deposit_user.provider_url + external_id
url = reverse(COL_IRI, args=[deposit_collection.name])
document = atom_dataset["error-with-reference-and-create-origin"].format(
external_id=external_id, url=origin_url,
)
# when
response = authenticated_client.post(
url,
content_type="application/atom+xml;type=entry",
data=document,
# + headers
HTTP_IN_PROGRESS="false",
)
assert b"only one may be used on a given deposit" in response.content
assert response.status_code == status.HTTP_400_BAD_REQUEST
def test_post_deposit_atom_unknown_collection(authenticated_client, atom_dataset):
"""Posting an atom entry to an unknown collection should return a 404
"""
unknown_collection = "unknown-one"
with pytest.raises(DepositCollection.DoesNotExist):
DepositCollection.objects.get(name=unknown_collection)
response = authenticated_client.post(
reverse(COL_IRI, args=[unknown_collection]), # <- unknown collection
content_type="application/atom+xml;type=entry",
data=atom_dataset["entry-data0"],
HTTP_SLUG="something",
)
assert response.status_code == status.HTTP_404_NOT_FOUND
assert b"Unknown collection" in response.content
def test_post_deposit_atom_entry_initial(
authenticated_client, deposit_collection, atom_dataset, deposit_user
):
"""Posting an initial atom entry should return 201 with deposit receipt
"""
# given
origin_url = deposit_user.provider_url + "1225c695-cfb8-4ebb-aaaa-80da344efa6a"
with pytest.raises(Deposit.DoesNotExist):
Deposit.objects.get(origin_url=origin_url)
atom_entry_data = atom_dataset["entry-data0"] % origin_url
# when
response = authenticated_client.post(
reverse(COL_IRI, args=[deposit_collection.name]),
content_type="application/atom+xml;type=entry",
data=atom_entry_data,
HTTP_IN_PROGRESS="false",
)
# then
assert response.status_code == status.HTTP_201_CREATED, response.content.decode()
response_content = parse_xml(BytesIO(response.content))
deposit_id = response_content["swh:deposit_id"]
deposit = Deposit.objects.get(pk=deposit_id)
assert deposit.collection == deposit_collection
assert deposit.origin_url == origin_url
assert deposit.status == DEPOSIT_STATUS_DEPOSITED
# one associated request to a deposit
deposit_request = DepositRequest.objects.get(deposit=deposit)
assert deposit_request.metadata is not None
assert deposit_request.raw_metadata == atom_entry_data
assert bool(deposit_request.archive) is False
def test_post_deposit_atom_entry_with_codemeta(
authenticated_client, deposit_collection, atom_dataset, deposit_user
):
"""Posting an initial atom entry should return 201 with deposit receipt
"""
# given
origin_url = deposit_user.provider_url + "1225c695-cfb8-4ebb-aaaa-80da344efa6a"
with pytest.raises(Deposit.DoesNotExist):
Deposit.objects.get(origin_url=origin_url)
atom_entry_data = atom_dataset["codemeta-sample"] % origin_url
# when
response = authenticated_client.post(
reverse(COL_IRI, args=[deposit_collection.name]),
content_type="application/atom+xml;type=entry",
data=atom_entry_data,
HTTP_IN_PROGRESS="false",
)
# then
assert response.status_code == status.HTTP_201_CREATED
response_content = parse_xml(BytesIO(response.content))
deposit_id = response_content["swh:deposit_id"]
deposit = Deposit.objects.get(pk=deposit_id)
assert deposit.collection == deposit_collection
assert deposit.origin_url == origin_url
assert deposit.status == DEPOSIT_STATUS_DEPOSITED
# one associated request to a deposit
deposit_request = DepositRequest.objects.get(deposit=deposit)
assert deposit_request.metadata is not None
assert deposit_request.raw_metadata == atom_entry_data
assert bool(deposit_request.archive) is False
def test_post_deposit_atom_entry_multiple_steps(
authenticated_client, deposit_collection, atom_dataset, deposit_user
):
"""After initial deposit, updating a deposit should return a 201
"""
# given
origin_url = deposit_user.provider_url + "2225c695-cfb8-4ebb-aaaa-80da344efa6a"
with pytest.raises(Deposit.DoesNotExist):
deposit = Deposit.objects.get(origin_url=origin_url)
# when
response = authenticated_client.post(
reverse(COL_IRI, args=[deposit_collection.name]),
content_type="application/atom+xml;type=entry",
data=atom_dataset["entry-data1"],
HTTP_IN_PROGRESS="True",
)
# then
assert response.status_code == status.HTTP_201_CREATED
response_content = parse_xml(BytesIO(response.content))
deposit_id = int(response_content["swh:deposit_id"])
deposit = Deposit.objects.get(pk=deposit_id)
assert deposit.collection == deposit_collection
assert deposit.origin_url is None # not provided yet
assert deposit.status == "partial"
# one associated request to a deposit
deposit_requests = DepositRequest.objects.filter(deposit=deposit)
assert len(deposit_requests) == 1
atom_entry_data = atom_dataset["entry-only-create-origin"] % (origin_url)
for link in response_content["atom:link"]:
if link["@rel"] == "http://purl.org/net/sword/terms/add":
se_iri = link["@href"]
break
else:
assert False, f"missing SE-IRI from {response_content['link']}"
# when updating the first deposit post
response = authenticated_client.post(
se_iri,
content_type="application/atom+xml;type=entry",
data=atom_entry_data,
HTTP_IN_PROGRESS="False",
)
# then
assert response.status_code == status.HTTP_201_CREATED, response.content.decode()
response_content = parse_xml(BytesIO(response.content))
deposit_id = int(response_content["swh:deposit_id"])
deposit = Deposit.objects.get(pk=deposit_id)
assert deposit.collection == deposit_collection
assert deposit.origin_url == origin_url
assert deposit.status == DEPOSIT_STATUS_DEPOSITED
assert len(Deposit.objects.all()) == 1
# now 2 associated requests to a same deposit
deposit_requests = DepositRequest.objects.filter(deposit=deposit).order_by("id")
assert len(deposit_requests) == 2
atom_entry_data1 = atom_dataset["entry-data1"]
expected_meta = [
{"metadata": parse_xml(atom_entry_data1), "raw_metadata": atom_entry_data1},
{"metadata": parse_xml(atom_entry_data), "raw_metadata": atom_entry_data},
]
for i, deposit_request in enumerate(deposit_requests):
actual_metadata = deposit_request.metadata
assert actual_metadata == expected_meta[i]["metadata"]
assert deposit_request.raw_metadata == expected_meta[i]["raw_metadata"]
assert bool(deposit_request.archive) is False
diff --git a/swh/deposit/tests/api/test_collection_post_metadata.py b/swh/deposit/tests/api/test_collection_post_metadata.py
index d6f5c727..d4bb0058 100644
--- a/swh/deposit/tests/api/test_collection_post_metadata.py
+++ b/swh/deposit/tests/api/test_collection_post_metadata.py
@@ -1,275 +1,275 @@
# Copyright (C) 2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
"""Tests metadata is loaded when sent via a POST Col-IRI"""
from io import BytesIO
import attr
from django.urls import reverse
import pytest
from rest_framework import status
from swh.deposit.config import COL_IRI, DEPOSIT_STATUS_LOAD_SUCCESS, APIConfig
from swh.deposit.models import Deposit
from swh.deposit.parsers import parse_xml
from swh.deposit.utils import compute_metadata_context
from swh.model.identifiers import SWHID, parse_swhid
from swh.model.model import (
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
MetadataTargetType,
RawExtrinsicMetadata,
)
from swh.storage.interface import PagedResult
def test_deposit_metadata_invalid(
authenticated_client, deposit_collection, atom_dataset
):
"""Posting invalid swhid reference is bad request returned to client
"""
invalid_swhid = "swh:1:dir :31b5c8cc985d190b5a7ef4878128ebfdc2358f49"
xml_data = atom_dataset["entry-data-with-swhid"].format(swhid=invalid_swhid)
response = authenticated_client.post(
reverse(COL_IRI, args=[deposit_collection.name]),
content_type="application/atom+xml;type=entry",
data=xml_data,
)
assert response.status_code == status.HTTP_400_BAD_REQUEST
assert b"Invalid SWHID reference" in response.content
def test_deposit_metadata_fails_functional_checks(
authenticated_client, deposit_collection, atom_dataset
):
"""Posting functionally invalid metadata swhid is bad request returned to client
"""
swhid = "swh:1:dir:31b5c8cc985d190b5a7ef4878128ebfdc2358f49"
invalid_xml_data = atom_dataset[
"entry-data-with-swhid-fail-metadata-functional-checks"
].format(swhid=swhid)
response = authenticated_client.post(
reverse(COL_IRI, args=[deposit_collection.name]),
content_type="application/atom+xml;type=entry",
data=invalid_xml_data,
)
assert response.status_code == status.HTTP_400_BAD_REQUEST
assert b"Functional metadata checks failure" in response.content
@pytest.mark.parametrize(
"swhid,target_type",
[
(
"swh:1:cnt:01b5c8cc985d190b5a7ef4878128ebfdc2358f49",
MetadataTargetType.CONTENT,
),
(
"swh:1:dir:11b5c8cc985d190b5a7ef4878128ebfdc2358f49",
MetadataTargetType.DIRECTORY,
),
(
"swh:1:rev:21b5c8cc985d190b5a7ef4878128ebfdc2358f49",
MetadataTargetType.REVISION,
),
(
"swh:1:rel:31b5c8cc985d190b5a7ef4878128ebfdc2358f49",
MetadataTargetType.RELEASE,
),
(
"swh:1:snp:41b5c8cc985d190b5a7ef4878128ebfdc2358f49",
MetadataTargetType.SNAPSHOT,
),
(
"swh:1:cnt:51b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
MetadataTargetType.CONTENT,
),
(
"swh:1:dir:c4993c872593e960dc84e4430dbbfbc34fd706d0;origin=https://inria.halpreprod.archives-ouvertes.fr/hal-01243573;visit=swh:1:snp:0175049fc45055a3824a1675ac06e3711619a55a;anchor=swh:1:rev:b5f505b005435fa5c4fa4c279792bd7b17167c04;path=/", # noqa
MetadataTargetType.DIRECTORY,
),
(
"swh:1:rev:71b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
MetadataTargetType.REVISION,
),
(
"swh:1:rel:81b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
MetadataTargetType.RELEASE,
),
(
"swh:1:snp:91b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
MetadataTargetType.SNAPSHOT,
),
],
)
def test_deposit_metadata_swhid(
swhid,
target_type,
authenticated_client,
deposit_collection,
atom_dataset,
swh_storage,
):
"""Posting a swhid reference is stored on raw extrinsic metadata storage
"""
swhid_reference = parse_swhid(swhid)
swhid_core = attr.evolve(swhid_reference, metadata={})
xml_data = atom_dataset["entry-data-with-swhid"].format(swhid=swhid)
deposit_client = authenticated_client.deposit_client
response = authenticated_client.post(
reverse(COL_IRI, args=[deposit_collection.name]),
content_type="application/atom+xml;type=entry",
data=xml_data,
)
assert response.status_code == status.HTTP_201_CREATED
response_content = parse_xml(BytesIO(response.content))
# Ensure the deposit is finalized
deposit_id = int(response_content["swh:deposit_id"])
deposit = Deposit.objects.get(pk=deposit_id)
assert isinstance(swhid_core, SWHID)
assert deposit.swhid == str(swhid_core)
assert deposit.swhid_context == str(swhid_reference)
assert deposit.complete_date == deposit.reception_date
assert deposit.complete_date is not None
assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS
# Ensure metadata stored in the metadata storage is consistent
metadata_authority = MetadataAuthority(
type=MetadataAuthorityType.DEPOSIT_CLIENT,
url=deposit_client.provider_url,
metadata={"name": deposit_client.last_name},
)
actual_authority = swh_storage.metadata_authority_get(
MetadataAuthorityType.DEPOSIT_CLIENT, url=deposit_client.provider_url
)
assert actual_authority == metadata_authority
config = APIConfig()
metadata_fetcher = MetadataFetcher(
name=config.tool["name"],
version=config.tool["version"],
metadata=config.tool["configuration"],
)
actual_fetcher = swh_storage.metadata_fetcher_get(
config.tool["name"], config.tool["version"]
)
assert actual_fetcher == metadata_fetcher
page_results = swh_storage.raw_extrinsic_metadata_get(
target_type, swhid_core, metadata_authority
)
discovery_date = page_results.results[0].discovery_date
assert len(page_results.results) == 1
assert page_results.next_page_token is None
object_type, metadata_context = compute_metadata_context(swhid_reference)
assert page_results == PagedResult(
results=[
RawExtrinsicMetadata(
type=object_type,
target=swhid_core,
discovery_date=discovery_date,
authority=attr.evolve(metadata_authority, metadata=None),
fetcher=attr.evolve(metadata_fetcher, metadata=None),
format="sword-v2-atom-codemeta",
metadata=xml_data.encode(),
**metadata_context,
)
],
next_page_token=None,
)
assert deposit.complete_date == discovery_date
@pytest.mark.parametrize(
"url", ["https://gitlab.org/user/repo", "https://whatever.else/repo",]
)
def test_deposit_metadata_origin(
url, authenticated_client, deposit_collection, atom_dataset, swh_storage,
):
"""Posting a swhid reference is stored on raw extrinsic metadata storage
"""
- xml_data = atom_dataset["entry-data-with-origin"].format(url=url)
+ xml_data = atom_dataset["entry-data-with-origin-reference"].format(url=url)
deposit_client = authenticated_client.deposit_client
response = authenticated_client.post(
reverse(COL_IRI, args=[deposit_collection.name]),
content_type="application/atom+xml;type=entry",
data=xml_data,
)
assert response.status_code == status.HTTP_201_CREATED
response_content = parse_xml(BytesIO(response.content))
# Ensure the deposit is finalized
deposit_id = int(response_content["swh:deposit_id"])
deposit = Deposit.objects.get(pk=deposit_id)
# we got not swhid as input so we cannot have those
assert deposit.swhid is None
assert deposit.swhid_context is None
assert deposit.complete_date == deposit.reception_date
assert deposit.complete_date is not None
assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS
# Ensure metadata stored in the metadata storage is consistent
metadata_authority = MetadataAuthority(
type=MetadataAuthorityType.DEPOSIT_CLIENT,
url=deposit_client.provider_url,
metadata={"name": deposit_client.last_name},
)
actual_authority = swh_storage.metadata_authority_get(
MetadataAuthorityType.DEPOSIT_CLIENT, url=deposit_client.provider_url
)
assert actual_authority == metadata_authority
config = APIConfig()
metadata_fetcher = MetadataFetcher(
name=config.tool["name"],
version=config.tool["version"],
metadata=config.tool["configuration"],
)
actual_fetcher = swh_storage.metadata_fetcher_get(
config.tool["name"], config.tool["version"]
)
assert actual_fetcher == metadata_fetcher
page_results = swh_storage.raw_extrinsic_metadata_get(
MetadataTargetType.ORIGIN, url, metadata_authority
)
discovery_date = page_results.results[0].discovery_date
assert len(page_results.results) == 1
assert page_results.next_page_token is None
assert page_results == PagedResult(
results=[
RawExtrinsicMetadata(
type=MetadataTargetType.ORIGIN,
target=url,
discovery_date=discovery_date,
authority=attr.evolve(metadata_authority, metadata=None),
fetcher=attr.evolve(metadata_fetcher, metadata=None),
format="sword-v2-atom-codemeta",
metadata=xml_data.encode(),
)
],
next_page_token=None,
)
assert deposit.complete_date == discovery_date
diff --git a/swh/deposit/tests/data/atom/entry-data-with-origin.xml b/swh/deposit/tests/data/atom/entry-data-with-add-to-origin.xml
similarity index 81%
copy from swh/deposit/tests/data/atom/entry-data-with-origin.xml
copy to swh/deposit/tests/data/atom/entry-data-with-add-to-origin.xml
index 0cc06a8b..45fda2f5 100644
--- a/swh/deposit/tests/data/atom/entry-data-with-origin.xml
+++ b/swh/deposit/tests/data/atom/entry-data-with-add-to-origin.xml
@@ -1,13 +1,13 @@
Awesome Compiler
urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
dudess
-
-
-
+
+
+
diff --git a/swh/deposit/tests/data/atom/entry-data-with-origin.xml b/swh/deposit/tests/data/atom/entry-data-with-both-add-to-origin-and-external-id.xml
similarity index 73%
copy from swh/deposit/tests/data/atom/entry-data-with-origin.xml
copy to swh/deposit/tests/data/atom/entry-data-with-both-add-to-origin-and-external-id.xml
index 0cc06a8b..9c188f84 100644
--- a/swh/deposit/tests/data/atom/entry-data-with-origin.xml
+++ b/swh/deposit/tests/data/atom/entry-data-with-both-add-to-origin-and-external-id.xml
@@ -1,13 +1,14 @@
Awesome Compiler
urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
dudess
+ foo
-
-
-
+
+
+
diff --git a/swh/deposit/tests/data/atom/entry-data-with-origin.xml b/swh/deposit/tests/data/atom/entry-data-with-both-create-origin-and-add-to-origin.xml
similarity index 63%
rename from swh/deposit/tests/data/atom/entry-data-with-origin.xml
rename to swh/deposit/tests/data/atom/entry-data-with-both-create-origin-and-add-to-origin.xml
index 0cc06a8b..43780237 100644
--- a/swh/deposit/tests/data/atom/entry-data-with-origin.xml
+++ b/swh/deposit/tests/data/atom/entry-data-with-both-create-origin-and-add-to-origin.xml
@@ -1,13 +1,16 @@
Awesome Compiler
urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
dudess
-
-
-
+
+
+
+
+
+