diff --git a/swh/deposit/api/common.py b/swh/deposit/api/common.py
--- a/swh/deposit/api/common.py
+++ b/swh/deposit/api/common.py
@@ -9,6 +9,7 @@
import json
from typing import Any, Dict, Optional, Sequence, Tuple, Type, Union
+import attr
from django.http import FileResponse, HttpResponse
from django.shortcuts import render
from django.urls import reverse
@@ -19,7 +20,18 @@
from rest_framework.request import Request
from rest_framework.views import APIView
+from swh.deposit.api.checks import check_metadata
+from swh.deposit.api.converters import convert_status_detail
+from swh.deposit.models import Deposit
+from swh.deposit.utils import compute_metadata_context
from swh.model import hashutil
+from swh.model.identifiers import SWHID, ValidationError
+from swh.model.model import (
+ MetadataAuthority,
+ MetadataAuthorityType,
+ MetadataFetcher,
+ RawExtrinsicMetadata,
+)
from swh.scheduler.utils import create_oneshot_task_dict
from ..config import (
@@ -47,13 +59,14 @@
METHOD_NOT_ALLOWED,
NOT_FOUND,
PARSING_ERROR,
+ BadRequestError,
ParserError,
make_error_dict,
make_error_response,
make_error_response_from_dict,
)
-from ..models import Deposit, DepositClient, DepositCollection, DepositRequest
-from ..parsers import parse_xml
+from ..models import DepositClient, DepositCollection, DepositRequest
+from ..parsers import parse_swh_reference, parse_xml
ACCEPT_PACKAGINGS = ["http://purl.org/net/sword/package/SimpleZip"]
ACCEPT_ARCHIVE_CONTENT_TYPES = ["application/zip", "application/x-tar"]
@@ -603,6 +616,98 @@
"status": deposit.status,
}
+ def _store_metadata_deposit(
+ self,
+ deposit: Deposit,
+ swhid_reference: Union[str, SWHID],
+ metadata: Dict,
+ raw_metadata: bytes,
+ deposit_origin: Optional[str] = None,
+ ) -> Tuple[Union[SWHID, str], Union[SWHID, str], Deposit, DepositRequest]:
+ """When all user inputs pass the checks, this associates the raw_metadata to the
+ swhid_reference in the raw extrinsic metadata storage. In case of any issues,
+ a bad request response is returned to the user with the details.
+
+ Checks:
+ - metadata are technically parsable
+ - metadata pass the functional checks
+ - SWHID (if any) is technically valid
+
+ Args:
+ deposit: Deposit reference
+ swhid_reference: The swhid or the origin to attach metadata information to
+ metadata: Full dict of metadata to check for validity (parsed out of
+ raw_metadata)
+ raw_metadata: The actual raw metadata to send in the storage metadata
+ with_deposit_origin: Flag to differentiate between metadata-only or metadata
+ update scenario.
+
+ Raises:
+ BadRequestError in case of incorrect inputs from the deposit client
+ (e.g. functionally invalid metadata, ...)
+
+ Returns:
+ Tuple of core swhid, swhid context, deposit and deposit request
+
+ """
+ metadata_ok, error_details = check_metadata(metadata)
+ if not metadata_ok:
+ assert error_details, "Details should be set when a failure occurs"
+ raise BadRequestError(
+ "Functional metadata checks failure",
+ convert_status_detail(error_details),
+ )
+
+ metadata_authority = MetadataAuthority(
+ type=MetadataAuthorityType.DEPOSIT_CLIENT,
+ url=deposit.client.provider_url,
+ metadata={"name": deposit.client.last_name},
+ )
+
+ metadata_fetcher = MetadataFetcher(
+ name=self.tool["name"],
+ version=self.tool["version"],
+ metadata=self.tool["configuration"],
+ )
+
+ # replace metadata within the deposit backend
+ deposit_request_data = {
+ METADATA_KEY: metadata,
+ RAW_METADATA_KEY: raw_metadata,
+ }
+
+ # actually add the metadata to the completed deposit
+ deposit_request = self._deposit_request_put(deposit, deposit_request_data)
+
+ object_type, metadata_context = compute_metadata_context(swhid_reference)
+ if deposit_origin: # metadata deposit update on completed deposit
+ metadata_context["origin"] = deposit_origin
+
+ swhid_core: Union[str, SWHID]
+ if isinstance(swhid_reference, str):
+ swhid_core = swhid_reference
+ else:
+ swhid_core = attr.evolve(swhid_reference, metadata={})
+
+ # store that metadata to the metadata storage
+ metadata_object = RawExtrinsicMetadata(
+ type=object_type,
+ target=swhid_core, # core swhid or origin
+ discovery_date=deposit_request.date,
+ authority=metadata_authority,
+ fetcher=metadata_fetcher,
+ format="sword-v2-atom-codemeta",
+ metadata=raw_metadata,
+ **metadata_context,
+ )
+
+ # write to metadata storage
+ self.storage_metadata.metadata_authority_add([metadata_authority])
+ self.storage_metadata.metadata_fetcher_add([metadata_fetcher])
+ self.storage_metadata.raw_extrinsic_metadata_add([metadata_object])
+
+ return (swhid_core, swhid_reference, deposit, deposit_request)
+
def _atom_entry(
self,
request: Request,
@@ -662,11 +767,13 @@
"If the body is empty, there is no metadata.",
)
- external_id = metadata.get("external_identifier", headers["slug"])
+ # Determine if we are in the metadata-only deposit case
+ try:
+ swhid = parse_swh_reference(metadata)
+ except ValidationError as e:
+ return make_error_dict(PARSING_ERROR, "Invalid SWHID reference", str(e),)
- # TODO: Determine if we are in the metadata-only deposit case. If it is, then
- # save deposit and deposit request typed 'metadata' and send metadata to the
- # metadata storage. Otherwise, do as existing deposit.
+ external_id = metadata.get("external_identifier", headers["slug"])
deposit = self._deposit_put(
request,
@@ -675,6 +782,29 @@
external_id=external_id,
)
+ if swhid is not None:
+ try:
+ swhid, swhid_ref, depo, depo_request = self._store_metadata_deposit(
+ deposit, swhid, metadata, raw_metadata
+ )
+ except BadRequestError as bad_request_error:
+ return bad_request_error.to_dict()
+
+ deposit.status = DEPOSIT_STATUS_LOAD_SUCCESS
+ if isinstance(swhid_ref, SWHID):
+ deposit.swhid = str(swhid)
+ deposit.swhid_context = str(swhid_ref)
+ deposit.complete_date = depo_request.date
+ deposit.reception_date = depo_request.date
+ deposit.save()
+
+ return {
+ "deposit_id": deposit.id,
+ "deposit_date": depo_request.date,
+ "status": deposit.status,
+ "archive": None,
+ }
+
self._deposit_request_put(
deposit,
{METADATA_KEY: metadata, RAW_METADATA_KEY: raw_metadata},
diff --git a/swh/deposit/api/deposit_update.py b/swh/deposit/api/deposit_update.py
--- a/swh/deposit/api/deposit_update.py
+++ b/swh/deposit/api/deposit_update.py
@@ -8,29 +8,11 @@
from rest_framework import status
from rest_framework.request import Request
-from swh.deposit.api.checks import check_metadata
-from swh.deposit.api.converters import convert_status_detail
from swh.deposit.models import Deposit
from swh.model.identifiers import parse_swhid
-from swh.model.model import (
- MetadataAuthority,
- MetadataAuthorityType,
- MetadataFetcher,
- MetadataTargetType,
- RawExtrinsicMetadata,
-)
-from swh.storage import get_storage
-from swh.storage.interface import StorageInterface
-
-from ..config import (
- CONT_FILE_IRI,
- DEPOSIT_STATUS_LOAD_SUCCESS,
- EDIT_SE_IRI,
- EM_IRI,
- METADATA_KEY,
- RAW_METADATA_KEY,
-)
-from ..errors import BAD_REQUEST, ParserError, make_error_dict
+
+from ..config import CONT_FILE_IRI, DEPOSIT_STATUS_LOAD_SUCCESS, EDIT_SE_IRI, EM_IRI
+from ..errors import BAD_REQUEST, BadRequestError, ParserError, make_error_dict
from ..parsers import (
SWHAtomEntryParser,
SWHFileUploadTarParser,
@@ -125,12 +107,6 @@
parser_classes = (SWHMultiPartParser, SWHAtomEntryParser)
- def __init__(self):
- super().__init__()
- self.storage_metadata: StorageInterface = get_storage(
- **self.config["storage_metadata"]
- )
-
def restrict_access(
self, request: Request, headers: Dict, deposit: Deposit
) -> Dict[str, Any]:
@@ -229,56 +205,15 @@
"If the body is empty, there is no metadata.",
)
- metadata_ok, error_details = check_metadata(metadata)
- if not metadata_ok:
- assert error_details, "Details should be set when a failure occurs"
- return make_error_dict(
- BAD_REQUEST,
- "Functional metadata checks failure",
- convert_status_detail(error_details),
+ try:
+ _, _, deposit, deposit_request = self._store_metadata_deposit(
+ deposit, parse_swhid(swhid), metadata, raw_metadata, deposit.origin_url,
)
-
- metadata_authority = MetadataAuthority(
- type=MetadataAuthorityType.DEPOSIT_CLIENT,
- url=deposit.client.provider_url,
- metadata={"name": deposit.client.last_name},
- )
-
- metadata_fetcher = MetadataFetcher(
- name=self.tool["name"],
- version=self.tool["version"],
- metadata=self.tool["configuration"],
- )
-
- deposit_swhid = parse_swhid(swhid)
-
- # replace metadata within the deposit backend
- deposit_request_data = {
- METADATA_KEY: metadata,
- RAW_METADATA_KEY: raw_metadata,
- }
-
- # actually add the metadata to the completed deposit
- deposit_request = self._deposit_request_put(deposit, deposit_request_data)
- # store that metadata to the metadata storage
- metadata_object = RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
- target=deposit_swhid,
- discovery_date=deposit_request.date,
- authority=metadata_authority,
- fetcher=metadata_fetcher,
- format="sword-v2-atom-codemeta",
- metadata=raw_metadata,
- origin=deposit.origin_url,
- )
-
- # write to metadata storage
- self.storage_metadata.metadata_authority_add([metadata_authority])
- self.storage_metadata.metadata_fetcher_add([metadata_fetcher])
- self.storage_metadata.raw_extrinsic_metadata_add([metadata_object])
+ except BadRequestError as bad_request_error:
+ return bad_request_error.to_dict()
return {
- "deposit_id": deposit_id,
+ "deposit_id": deposit.id,
"deposit_date": deposit_request.date,
"status": deposit.status,
"archive": None,
diff --git a/swh/deposit/config.py b/swh/deposit/config.py
--- a/swh/deposit/config.py
+++ b/swh/deposit/config.py
@@ -10,6 +10,8 @@
from swh.deposit import __version__
from swh.scheduler import get_scheduler
from swh.scheduler.interface import SchedulerInterface
+from swh.storage import get_storage
+from swh.storage.interface import StorageInterface
# IRIs (Internationalized Resource identifier) sword 2.0 specified
EDIT_SE_IRI = "edit_se_iri"
@@ -101,3 +103,6 @@
"version": __version__,
"configuration": {"sword_version": "2"},
}
+ self.storage_metadata: StorageInterface = get_storage(
+ **self.config["storage_metadata"]
+ )
diff --git a/swh/deposit/errors.py b/swh/deposit/errors.py
--- a/swh/deposit/errors.py
+++ b/swh/deposit/errors.py
@@ -148,3 +148,17 @@
"""
error = make_error_dict(key, summary, verbose_description)
return make_error_response_from_dict(req, error["error"])
+
+
+class BadRequestError(ValueError):
+ """Represents a bad input from the deposit client
+
+ """
+
+ def __init__(self, summary, verbose_description):
+ self.key = BAD_REQUEST
+ self.summary = summary
+ self.verbose_description = verbose_description
+
+ def to_dict(self):
+ return make_error_dict(self.key, self.summary, self.verbose_description)
diff --git a/swh/deposit/tests/api/test_deposit_metadata.py b/swh/deposit/tests/api/test_deposit_metadata.py
new file mode 100644
--- /dev/null
+++ b/swh/deposit/tests/api/test_deposit_metadata.py
@@ -0,0 +1,277 @@
+# Copyright (C) 2020 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from io import BytesIO
+
+import attr
+from django.urls import reverse
+import pytest
+from rest_framework import status
+
+from swh.deposit.config import COL_IRI, DEPOSIT_STATUS_LOAD_SUCCESS, APIConfig
+from swh.deposit.models import Deposit
+from swh.deposit.parsers import parse_xml
+from swh.deposit.utils import compute_metadata_context
+from swh.model.identifiers import SWHID, parse_swhid
+from swh.model.model import (
+ MetadataAuthority,
+ MetadataAuthorityType,
+ MetadataFetcher,
+ MetadataTargetType,
+ RawExtrinsicMetadata,
+)
+from swh.storage.interface import PagedResult
+
+
+def test_deposit_metadata_invalid(
+ authenticated_client, deposit_collection, atom_dataset
+):
+ """Posting invalid swhid reference is bad request returned to client
+
+ """
+ invalid_swhid = "swh:1:dir :31b5c8cc985d190b5a7ef4878128ebfdc2358f49"
+ xml_data = atom_dataset["entry-data-with-swhid"].format(swhid=invalid_swhid)
+
+ response = authenticated_client.post(
+ reverse(COL_IRI, args=[deposit_collection.name]),
+ content_type="application/atom+xml;type=entry",
+ data=xml_data,
+ HTTP_SLUG="external-id",
+ )
+ assert response.status_code == status.HTTP_400_BAD_REQUEST
+ assert b"Invalid SWHID reference" in response.content
+
+
+def test_deposit_metadata_fails_functional_checks(
+ authenticated_client, deposit_collection, atom_dataset
+):
+ """Posting functionally invalid metadata swhid is bad request returned to client
+
+ """
+ swhid = "swh:1:dir:31b5c8cc985d190b5a7ef4878128ebfdc2358f49"
+ invalid_xml_data = atom_dataset[
+ "entry-data-with-swhid-fail-metadata-functional-checks"
+ ].format(swhid=swhid)
+
+ response = authenticated_client.post(
+ reverse(COL_IRI, args=[deposit_collection.name]),
+ content_type="application/atom+xml;type=entry",
+ data=invalid_xml_data,
+ HTTP_SLUG="external-id",
+ )
+ assert response.status_code == status.HTTP_400_BAD_REQUEST
+ assert b"Functional metadata checks failure" in response.content
+
+
+@pytest.mark.parametrize(
+ "swhid,target_type",
+ [
+ (
+ "swh:1:cnt:01b5c8cc985d190b5a7ef4878128ebfdc2358f49",
+ MetadataTargetType.CONTENT,
+ ),
+ (
+ "swh:1:dir:11b5c8cc985d190b5a7ef4878128ebfdc2358f49",
+ MetadataTargetType.DIRECTORY,
+ ),
+ (
+ "swh:1:rev:21b5c8cc985d190b5a7ef4878128ebfdc2358f49",
+ MetadataTargetType.REVISION,
+ ),
+ (
+ "swh:1:rel:31b5c8cc985d190b5a7ef4878128ebfdc2358f49",
+ MetadataTargetType.RELEASE,
+ ),
+ (
+ "swh:1:snp:41b5c8cc985d190b5a7ef4878128ebfdc2358f49",
+ MetadataTargetType.SNAPSHOT,
+ ),
+ (
+ "swh:1:cnt:51b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
+ MetadataTargetType.CONTENT,
+ ),
+ (
+ "swh:1:dir:c4993c872593e960dc84e4430dbbfbc34fd706d0;origin=https://inria.halpreprod.archives-ouvertes.fr/hal-01243573;visit=swh:1:snp:0175049fc45055a3824a1675ac06e3711619a55a;anchor=swh:1:rev:b5f505b005435fa5c4fa4c279792bd7b17167c04;path=/", # noqa
+ MetadataTargetType.DIRECTORY,
+ ),
+ (
+ "swh:1:rev:71b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
+ MetadataTargetType.REVISION,
+ ),
+ (
+ "swh:1:rel:81b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
+ MetadataTargetType.RELEASE,
+ ),
+ (
+ "swh:1:snp:91b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
+ MetadataTargetType.SNAPSHOT,
+ ),
+ ],
+)
+def test_deposit_metadata_swhid(
+ swhid,
+ target_type,
+ authenticated_client,
+ deposit_collection,
+ atom_dataset,
+ swh_storage,
+):
+ """Posting a swhid reference is stored on raw extrinsic metadata storage
+
+ """
+ swhid_reference = parse_swhid(swhid)
+ swhid_core = attr.evolve(swhid_reference, metadata={})
+
+ xml_data = atom_dataset["entry-data-with-swhid"].format(swhid=swhid)
+ deposit_client = authenticated_client.deposit_client
+
+ response = authenticated_client.post(
+ reverse(COL_IRI, args=[deposit_collection.name]),
+ content_type="application/atom+xml;type=entry",
+ data=xml_data,
+ HTTP_SLUG="external-id",
+ )
+
+ assert response.status_code == status.HTTP_201_CREATED
+ response_content = parse_xml(BytesIO(response.content))
+
+ # Ensure the deposit is finalized
+ deposit_id = int(response_content["deposit_id"])
+ deposit = Deposit.objects.get(pk=deposit_id)
+ assert isinstance(swhid_core, SWHID)
+ assert deposit.swhid == str(swhid_core)
+ assert deposit.swhid_context == str(swhid_reference)
+ assert deposit.complete_date == deposit.reception_date
+ assert deposit.complete_date is not None
+ assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS
+
+ # Ensure metadata stored in the metadata storage is consistent
+ metadata_authority = MetadataAuthority(
+ type=MetadataAuthorityType.DEPOSIT_CLIENT,
+ url=deposit_client.provider_url,
+ metadata={"name": deposit_client.last_name},
+ )
+
+ actual_authority = swh_storage.metadata_authority_get(
+ MetadataAuthorityType.DEPOSIT_CLIENT, url=deposit_client.provider_url
+ )
+ assert actual_authority == metadata_authority
+
+ config = APIConfig()
+ metadata_fetcher = MetadataFetcher(
+ name=config.tool["name"],
+ version=config.tool["version"],
+ metadata=config.tool["configuration"],
+ )
+
+ actual_fetcher = swh_storage.metadata_fetcher_get(
+ config.tool["name"], config.tool["version"]
+ )
+ assert actual_fetcher == metadata_fetcher
+
+ page_results = swh_storage.raw_extrinsic_metadata_get(
+ target_type, swhid_core, metadata_authority
+ )
+ discovery_date = page_results.results[0].discovery_date
+
+ assert len(page_results.results) == 1
+ assert page_results.next_page_token is None
+
+ object_type, metadata_context = compute_metadata_context(swhid_reference)
+ assert page_results == PagedResult(
+ results=[
+ RawExtrinsicMetadata(
+ type=object_type,
+ target=swhid_core,
+ discovery_date=discovery_date,
+ authority=attr.evolve(metadata_authority, metadata=None),
+ fetcher=attr.evolve(metadata_fetcher, metadata=None),
+ format="sword-v2-atom-codemeta",
+ metadata=xml_data.encode(),
+ **metadata_context,
+ )
+ ],
+ next_page_token=None,
+ )
+ assert deposit.complete_date == discovery_date
+
+
+@pytest.mark.parametrize(
+ "url", ["https://gitlab.org/user/repo", "https://whatever.else/repo",]
+)
+def test_deposit_metadata_origin(
+ url, authenticated_client, deposit_collection, atom_dataset, swh_storage,
+):
+ """Posting a swhid reference is stored on raw extrinsic metadata storage
+
+ """
+ xml_data = atom_dataset["entry-data-with-origin"].format(url=url)
+ deposit_client = authenticated_client.deposit_client
+ response = authenticated_client.post(
+ reverse(COL_IRI, args=[deposit_collection.name]),
+ content_type="application/atom+xml;type=entry",
+ data=xml_data,
+ HTTP_SLUG="external-id",
+ )
+
+ assert response.status_code == status.HTTP_201_CREATED
+ response_content = parse_xml(BytesIO(response.content))
+ # Ensure the deposit is finalized
+ deposit_id = int(response_content["deposit_id"])
+ deposit = Deposit.objects.get(pk=deposit_id)
+ # we got not swhid as input so we cannot have those
+ assert deposit.swhid is None
+ assert deposit.swhid_context is None
+ assert deposit.complete_date == deposit.reception_date
+ assert deposit.complete_date is not None
+ assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS
+
+ # Ensure metadata stored in the metadata storage is consistent
+ metadata_authority = MetadataAuthority(
+ type=MetadataAuthorityType.DEPOSIT_CLIENT,
+ url=deposit_client.provider_url,
+ metadata={"name": deposit_client.last_name},
+ )
+
+ actual_authority = swh_storage.metadata_authority_get(
+ MetadataAuthorityType.DEPOSIT_CLIENT, url=deposit_client.provider_url
+ )
+ assert actual_authority == metadata_authority
+
+ config = APIConfig()
+ metadata_fetcher = MetadataFetcher(
+ name=config.tool["name"],
+ version=config.tool["version"],
+ metadata=config.tool["configuration"],
+ )
+
+ actual_fetcher = swh_storage.metadata_fetcher_get(
+ config.tool["name"], config.tool["version"]
+ )
+ assert actual_fetcher == metadata_fetcher
+
+ page_results = swh_storage.raw_extrinsic_metadata_get(
+ MetadataTargetType.ORIGIN, url, metadata_authority
+ )
+ discovery_date = page_results.results[0].discovery_date
+
+ assert len(page_results.results) == 1
+ assert page_results.next_page_token is None
+
+ assert page_results == PagedResult(
+ results=[
+ RawExtrinsicMetadata(
+ type=MetadataTargetType.ORIGIN,
+ target=url,
+ discovery_date=discovery_date,
+ authority=attr.evolve(metadata_authority, metadata=None),
+ fetcher=attr.evolve(metadata_fetcher, metadata=None),
+ format="sword-v2-atom-codemeta",
+ metadata=xml_data.encode(),
+ )
+ ],
+ next_page_token=None,
+ )
+ assert deposit.complete_date == discovery_date
diff --git a/swh/deposit/tests/api/test_parsers.py b/swh/deposit/tests/api/test_parsers.py
--- a/swh/deposit/tests/api/test_parsers.py
+++ b/swh/deposit/tests/api/test_parsers.py
@@ -187,19 +187,8 @@
@pytest.fixture
-def xml_with_swhid():
- xml_data = """
-
-
-
-
-
-
-
- """
- return xml_data.strip()
+def xml_with_swhid(atom_dataset):
+ return atom_dataset["entry-data-with-swhid"]
@pytest.mark.parametrize(
diff --git a/swh/deposit/tests/conftest.py b/swh/deposit/tests/conftest.py
--- a/swh/deposit/tests/conftest.py
+++ b/swh/deposit/tests/conftest.py
@@ -204,15 +204,19 @@
return APIClient() # <- drf's client
-@pytest.yield_fixture
+@pytest.fixture
def authenticated_client(client, deposit_user):
"""Returned a logged client
+ This also patched the client instance to keep a reference on the associated
+ deposit_user.
+
"""
_token = "%s:%s" % (deposit_user.username, TEST_USER["password"])
token = base64.b64encode(_token.encode("utf-8"))
authorization = "Basic %s" % token.decode("utf-8")
client.credentials(HTTP_AUTHORIZATION=authorization)
+ client.deposit_client = deposit_user
yield client
client.logout()
diff --git a/swh/deposit/tests/data/atom/entry-data-with-origin.xml b/swh/deposit/tests/data/atom/entry-data-with-origin.xml
new file mode 100644
--- /dev/null
+++ b/swh/deposit/tests/data/atom/entry-data-with-origin.xml
@@ -0,0 +1,13 @@
+
+
+ Awesome Compiler
+ urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
+ dudess
+
+
+
+
+
+
diff --git a/swh/deposit/tests/data/atom/entry-data-with-swhid-fail-metadata-functional-checks.xml b/swh/deposit/tests/data/atom/entry-data-with-swhid-fail-metadata-functional-checks.xml
new file mode 100644
--- /dev/null
+++ b/swh/deposit/tests/data/atom/entry-data-with-swhid-fail-metadata-functional-checks.xml
@@ -0,0 +1,13 @@
+
+
+
+ hal
+ urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
+ 2017-10-07T15:17:08Z
+
+
+
+
+
+
diff --git a/swh/deposit/tests/data/atom/entry-data-with-swhid.xml b/swh/deposit/tests/data/atom/entry-data-with-swhid.xml
new file mode 100644
--- /dev/null
+++ b/swh/deposit/tests/data/atom/entry-data-with-swhid.xml
@@ -0,0 +1,13 @@
+
+
+ Awesome Compiler
+ urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
+ dudess
+
+
+
+
+
+
diff --git a/swh/deposit/tests/test_utils.py b/swh/deposit/tests/test_utils.py
--- a/swh/deposit/tests/test_utils.py
+++ b/swh/deposit/tests/test_utils.py
@@ -1,13 +1,16 @@
-# Copyright (C) 2018-2019 The Software Heritage developers
+# Copyright (C) 2018-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+from typing import Union
from unittest.mock import patch
import pytest
from swh.deposit import utils
+from swh.model.identifiers import SWHID, parse_swhid
+from swh.model.model import MetadataTargetType
def test_merge():
@@ -139,3 +142,59 @@
expected_date = "2017-01-01 00:00:00+00:00"
assert str(actual_date) == expected_date
+
+
+@pytest.mark.parametrize(
+ "swhid_or_origin,expected_type,expected_metadata_context",
+ [
+ ("https://something", MetadataTargetType.ORIGIN, {"origin": None}),
+ (
+ "swh:1:cnt:51b5c8cc985d190b5a7ef4878128ebfdc2358f49",
+ MetadataTargetType.CONTENT,
+ {"origin": None},
+ ),
+ (
+ "swh:1:snp:51b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=http://blah",
+ MetadataTargetType.SNAPSHOT,
+ {"origin": "http://blah", "path": None},
+ ),
+ (
+ "swh:1:dir:51b5c8cc985d190b5a7ef4878128ebfdc2358f49;path=/path",
+ MetadataTargetType.DIRECTORY,
+ {"origin": None, "path": b"/path"},
+ ),
+ (
+ "swh:1:rev:51b5c8cc985d190b5a7ef4878128ebfdc2358f49;visit=swh:1:snp:41b5c8cc985d190b5a7ef4878128ebfdc2358f49", # noqa
+ MetadataTargetType.REVISION,
+ {
+ "origin": None,
+ "path": None,
+ "snapshot": parse_swhid(
+ "swh:1:snp:41b5c8cc985d190b5a7ef4878128ebfdc2358f49"
+ ),
+ },
+ ),
+ (
+ "swh:1:rel:51b5c8cc985d190b5a7ef4878128ebfdc2358f49;anchor=swh:1:dir:41b5c8cc985d190b5a7ef4878128ebfdc2358f49", # noqa
+ MetadataTargetType.RELEASE,
+ {
+ "origin": None,
+ "path": None,
+ "directory": parse_swhid(
+ "swh:1:dir:41b5c8cc985d190b5a7ef4878128ebfdc2358f49"
+ ),
+ },
+ ),
+ ],
+)
+def test_compute_metadata_context(
+ swhid_or_origin: Union[str, SWHID], expected_type, expected_metadata_context
+):
+ if expected_type != MetadataTargetType.ORIGIN:
+ assert isinstance(swhid_or_origin, str)
+ swhid_or_origin = parse_swhid(swhid_or_origin)
+
+ object_type, metadata_context = utils.compute_metadata_context(swhid_or_origin)
+
+ assert object_type == expected_type
+ assert metadata_context == expected_metadata_context
diff --git a/swh/deposit/utils.py b/swh/deposit/utils.py
--- a/swh/deposit/utils.py
+++ b/swh/deposit/utils.py
@@ -1,13 +1,15 @@
-# Copyright (C) 2018-2019 The Software Heritage developers
+# Copyright (C) 2018-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from types import GeneratorType
+from typing import Any, Dict, Tuple, Union
import iso8601
-from swh.model.identifiers import normalize_timestamp
+from swh.model.identifiers import SWHID, normalize_timestamp, parse_swhid
+from swh.model.model import MetadataTargetType
def merge(*dicts):
@@ -81,3 +83,37 @@
date = iso8601.parse_date(date)
return normalize_timestamp(date)
+
+
+def compute_metadata_context(
+ swhid_reference: Union[SWHID, str]
+) -> Tuple[MetadataTargetType, Dict[str, Any]]:
+ """Given a SWHID object, determine the context as a dict.
+
+ The parse_swhid calls within are not expected to raise (because they should have
+ been caught early on).
+
+ """
+ metadata_context: Dict[str, Any] = {"origin": None}
+ if isinstance(swhid_reference, SWHID):
+ object_type = MetadataTargetType(swhid_reference.object_type)
+ assert object_type != MetadataTargetType.ORIGIN
+
+ if swhid_reference.metadata:
+ path = swhid_reference.metadata.get("path")
+ metadata_context = {
+ "origin": swhid_reference.metadata.get("origin"),
+ "path": path.encode() if path else None,
+ }
+ snapshot = swhid_reference.metadata.get("visit")
+ if snapshot:
+ metadata_context["snapshot"] = parse_swhid(snapshot)
+
+ anchor = swhid_reference.metadata.get("anchor")
+ if anchor:
+ anchor_swhid = parse_swhid(anchor)
+ metadata_context[anchor_swhid.object_type] = anchor_swhid
+ else:
+ object_type = MetadataTargetType.ORIGIN
+
+ return object_type, metadata_context