diff --git a/swh/deposit/api/common.py b/swh/deposit/api/common.py
--- a/swh/deposit/api/common.py
+++ b/swh/deposit/api/common.py
@@ -3,6 +3,8 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+import attr
+
from abc import ABCMeta, abstractmethod
import datetime
import hashlib
@@ -19,7 +21,18 @@
from rest_framework.request import Request
from rest_framework.views import APIView
+from swh.deposit.api.checks import check_metadata
+from swh.deposit.api.converters import convert_status_detail
+from swh.deposit.models import Deposit
from swh.model import hashutil
+from swh.model.identifiers import SWHID, ValidationError
+from swh.model.model import (
+ MetadataAuthority,
+ MetadataAuthorityType,
+ MetadataFetcher,
+ MetadataTargetType,
+ RawExtrinsicMetadata,
+)
from swh.scheduler.utils import create_oneshot_task_dict
from ..config import (
@@ -52,8 +65,8 @@
make_error_response,
make_error_response_from_dict,
)
-from ..models import Deposit, DepositClient, DepositCollection, DepositRequest
-from ..parsers import parse_xml
+from ..models import DepositClient, DepositCollection, DepositRequest
+from ..parsers import parse_swh_reference, parse_xml
ACCEPT_PACKAGINGS = ["http://purl.org/net/sword/package/SimpleZip"]
ACCEPT_ARCHIVE_CONTENT_TYPES = ["application/zip", "application/x-tar"]
@@ -603,6 +616,111 @@
"status": deposit.status,
}
+ def _store_metadata_deposit(
+ self,
+ deposit: Deposit,
+ swhid_reference: Union[str, SWHID],
+ metadata: Dict,
+ raw_metadata: bytes,
+ with_deposit_origin: bool = False,
+ ) -> Dict:
+ """Metadata-only deposit
+
+ Checks:
+ - The metadata received should pass the functional metadata checks.
+ - The SWHID is technically valid
+
+ """
+ if not metadata:
+ return make_error_dict(
+ BAD_REQUEST,
+ "Empty body request is not supported",
+ "Atom entry deposit is supposed to send for metadata. "
+ "If the body is empty, there is no metadata.",
+ )
+
+ metadata_ok, error_details = check_metadata(metadata)
+ if not metadata_ok:
+ assert error_details, "Details should be set when a failure occurs"
+ return make_error_dict(
+ BAD_REQUEST,
+ "Functional metadata checks failure",
+ convert_status_detail(error_details),
+ )
+
+ metadata_authority = MetadataAuthority(
+ type=MetadataAuthorityType.DEPOSIT_CLIENT,
+ url=deposit.client.provider_url,
+ metadata={"name": deposit.client.last_name},
+ )
+
+ metadata_fetcher = MetadataFetcher(
+ name=self.tool["name"],
+ version=self.tool["version"],
+ metadata=self.tool["configuration"],
+ )
+
+ # replace metadata within the deposit backend
+ deposit_request_data = {
+ METADATA_KEY: metadata,
+ RAW_METADATA_KEY: raw_metadata,
+ }
+
+ # actually add the metadata to the completed deposit
+ deposit_request = self._deposit_request_put(deposit, deposit_request_data)
+
+ map_type = {
+ "content": MetadataTargetType.CONTENT,
+ "directory": MetadataTargetType.DIRECTORY,
+ "revision": MetadataTargetType.REVISION,
+ "release": MetadataTargetType.RELEASE,
+ "snapshot": MetadataTargetType.SNAPSHOT,
+ "origin": MetadataTargetType.ORIGIN,
+ }
+
+ metadata_d = {"origin": None}
+ if isinstance(swhid_reference, SWHID):
+ object_type = map_type[swhid_reference.object_type]
+ if swhid_reference.metadata:
+ path = swhid_reference.metadata.get("path")
+ metadata_d = {
+ "origin": swhid_reference.metadata.get("origin"),
+ # "visit": swhid_reference.metadata.get("visit"),
+ "path": path.encode() if path else None,
+ # "...": swhid_reference.metadata.get("anchor")
+ }
+ # Make the swhid a swhid core
+ swhid_reference = attr.evolve(swhid_reference, metadata={})
+ else:
+ object_type = MetadataTargetType.ORIGIN
+
+ if with_deposit_origin: # metadata deposit update on completed deposit
+ metadata_d["origin"] = deposit.origin_url
+
+ # store that metadata to the metadata storage
+ metadata_object = RawExtrinsicMetadata(
+ type=object_type,
+ target=swhid_reference,
+ discovery_date=deposit_request.date,
+ authority=metadata_authority,
+ fetcher=metadata_fetcher,
+ format="sword-v2-atom-codemeta",
+ metadata=raw_metadata,
+ **metadata_d
+ )
+
+ # write to metadata storage
+ self.storage_metadata.metadata_authority_add([metadata_authority])
+ self.storage_metadata.metadata_fetcher_add([metadata_fetcher])
+ self.storage_metadata.raw_extrinsic_metadata_add([metadata_object])
+
+ return {
+ "deposit_id": deposit.id,
+ "deposit_date": deposit_request.date,
+ "status": deposit.status,
+ "archive": None,
+ }
+
def _atom_entry(
self,
request: Request,
@@ -662,11 +780,13 @@
"If the body is empty, there is no metadata.",
)
- external_id = metadata.get("external_identifier", headers["slug"])
+ # Determine if we are in the metadata-only deposit case
+ try:
+ swhid = parse_swh_reference(metadata)
+ except ValidationError as e:
+ return make_error_dict(PARSING_ERROR, "Invalid SWHID reference", str(e),)
- # TODO: Determine if we are in the metadata-only deposit case. If it is, then
- # save deposit and deposit request typed 'metadata' and send metadata to the
- # metadata storage. Otherwise, do as existing deposit.
+ external_id = metadata.get("external_identifier", headers["slug"])
deposit = self._deposit_put(
request,
@@ -675,6 +795,9 @@
external_id=external_id,
)
+ if swhid is not None:
+ return self._store_metadata_deposit(deposit, swhid, metadata, raw_metadata)
+
self._deposit_request_put(
deposit,
{METADATA_KEY: metadata, RAW_METADATA_KEY: raw_metadata},
diff --git a/swh/deposit/api/deposit_update.py b/swh/deposit/api/deposit_update.py
--- a/swh/deposit/api/deposit_update.py
+++ b/swh/deposit/api/deposit_update.py
@@ -8,28 +8,10 @@
from rest_framework import status
from rest_framework.request import Request
-from swh.deposit.api.checks import check_metadata
-from swh.deposit.api.converters import convert_status_detail
from swh.deposit.models import Deposit
from swh.model.identifiers import parse_swhid
-from swh.model.model import (
- MetadataAuthority,
- MetadataAuthorityType,
- MetadataFetcher,
- MetadataTargetType,
- RawExtrinsicMetadata,
-)
-from swh.storage import get_storage
-from swh.storage.interface import StorageInterface
-
-from ..config import (
- CONT_FILE_IRI,
- DEPOSIT_STATUS_LOAD_SUCCESS,
- EDIT_SE_IRI,
- EM_IRI,
- METADATA_KEY,
- RAW_METADATA_KEY,
-)
+
+from ..config import CONT_FILE_IRI, DEPOSIT_STATUS_LOAD_SUCCESS, EDIT_SE_IRI, EM_IRI
from ..errors import BAD_REQUEST, ParserError, make_error_dict
from ..parsers import (
SWHAtomEntryParser,
@@ -125,12 +107,6 @@
parser_classes = (SWHMultiPartParser, SWHAtomEntryParser)
- def __init__(self):
- super().__init__()
- self.storage_metadata: StorageInterface = get_storage(
- **self.config["storage_metadata"]
- )
-
def restrict_access(
self, request: Request, headers: Dict, deposit: Deposit
) -> Dict[str, Any]:
@@ -221,69 +197,14 @@
"Please ensure your metadata file is correctly formatted.",
)
- if not metadata:
- return make_error_dict(
- BAD_REQUEST,
- "Empty body request is not supported",
- "Atom entry deposit is supposed to send for metadata. "
- "If the body is empty, there is no metadata.",
- )
-
- metadata_ok, error_details = check_metadata(metadata)
- if not metadata_ok:
- assert error_details, "Details should be set when a failure occurs"
- return make_error_dict(
- BAD_REQUEST,
- "Functional metadata checks failure",
- convert_status_detail(error_details),
- )
-
- metadata_authority = MetadataAuthority(
- type=MetadataAuthorityType.DEPOSIT_CLIENT,
- url=deposit.client.provider_url,
- metadata={"name": deposit.client.last_name},
+ return self._store_metadata_deposit(
+ deposit,
+ parse_swhid(swhid),
+ metadata,
+ raw_metadata,
+ with_deposit_origin=True,
)
- metadata_fetcher = MetadataFetcher(
- name=self.tool["name"],
- version=self.tool["version"],
- metadata=self.tool["configuration"],
- )
-
- deposit_swhid = parse_swhid(swhid)
-
- # replace metadata within the deposit backend
- deposit_request_data = {
- METADATA_KEY: metadata,
- RAW_METADATA_KEY: raw_metadata,
- }
-
- # actually add the metadata to the completed deposit
- deposit_request = self._deposit_request_put(deposit, deposit_request_data)
- # store that metadata to the metadata storage
- metadata_object = RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
- target=deposit_swhid,
- discovery_date=deposit_request.date,
- authority=metadata_authority,
- fetcher=metadata_fetcher,
- format="sword-v2-atom-codemeta",
- metadata=raw_metadata,
- origin=deposit.origin_url,
- )
-
- # write to metadata storage
- self.storage_metadata.metadata_authority_add([metadata_authority])
- self.storage_metadata.metadata_fetcher_add([metadata_fetcher])
- self.storage_metadata.raw_extrinsic_metadata_add([metadata_object])
-
- return {
- "deposit_id": deposit_id,
- "deposit_date": deposit_request.date,
- "status": deposit.status,
- "archive": None,
- }
-
def process_post(
self,
request,
diff --git a/swh/deposit/config.py b/swh/deposit/config.py
--- a/swh/deposit/config.py
+++ b/swh/deposit/config.py
@@ -10,6 +10,8 @@
from swh.deposit import __version__
from swh.scheduler import get_scheduler
from swh.scheduler.interface import SchedulerInterface
+from swh.storage import get_storage
+from swh.storage.interface import StorageInterface
# IRIs (Internationalized Resource identifier) sword 2.0 specified
EDIT_SE_IRI = "edit_se_iri"
@@ -101,3 +103,6 @@
"version": __version__,
"configuration": {"sword_version": "2"},
}
+ self.storage_metadata: StorageInterface = get_storage(
+ **self.config["storage_metadata"]
+ )
diff --git a/swh/deposit/tests/api/test_deposit_metadata.py b/swh/deposit/tests/api/test_deposit_metadata.py
new file mode 100644
--- /dev/null
+++ b/swh/deposit/tests/api/test_deposit_metadata.py
@@ -0,0 +1,169 @@
+# Copyright (C) 2020 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import attr
+from django.urls import reverse
+import pytest
+from rest_framework import status
+
+from swh.deposit.config import COL_IRI, APIConfig
+from swh.model.identifiers import parse_swhid
+from swh.model.model import (
+ MetadataAuthority,
+ MetadataAuthorityType,
+ MetadataFetcher,
+ MetadataTargetType,
+ RawExtrinsicMetadata,
+)
+from swh.storage.interface import PagedResult
+
+
+def test_deposit_metadata_invalid(
+ authenticated_client, deposit_collection, atom_dataset
+):
+ """Posting an invalid swhid reference is propagated to clients
+
+ """
+ invalid_swhid = "swh:1:dir :31b5c8cc985d190b5a7ef4878128ebfdc2358f49"
+ xml_data = atom_dataset["entry-data-with-swhid"].format(swhid=invalid_swhid)
+
+ response = authenticated_client.post(
+ reverse(COL_IRI, args=[deposit_collection.name]),
+ content_type="application/atom+xml;type=entry",
+ data=xml_data,
+ HTTP_SLUG="external-id",
+ )
+ assert response.status_code == status.HTTP_400_BAD_REQUEST
+ assert b"Invalid SWHID reference" in response.content
+
+
+@pytest.mark.parametrize(
+ "swhid,target_type",
+ [
+ (
+ "swh:1:cnt:01b5c8cc985d190b5a7ef4878128ebfdc2358f49",
+ MetadataTargetType.CONTENT,
+ ),
+ (
+ "swh:1:dir:11b5c8cc985d190b5a7ef4878128ebfdc2358f49",
+ MetadataTargetType.DIRECTORY,
+ ),
+ (
+ "swh:1:rev:21b5c8cc985d190b5a7ef4878128ebfdc2358f49",
+ MetadataTargetType.REVISION,
+ ),
+ (
+ "swh:1:rel:31b5c8cc985d190b5a7ef4878128ebfdc2358f49",
+ MetadataTargetType.RELEASE,
+ ),
+ (
+ "swh:1:snp:41b5c8cc985d190b5a7ef4878128ebfdc2358f49",
+ MetadataTargetType.SNAPSHOT,
+ ),
+ (
+ "swh:1:cnt:51b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
+ MetadataTargetType.CONTENT,
+ ),
+ (
+ "swh:1:dir:c4993c872593e960dc84e4430dbbfbc34fd706d0;origin=https://inria.halpreprod.archives-ouvertes.fr/hal-01243573;visit=swh:1:snp:0175049fc45055a3824a1675ac06e3711619a55a;anchor=swh:1:rev:b5f505b005435fa5c4fa4c279792bd7b17167c04;path=/", # noqa
+ MetadataTargetType.DIRECTORY,
+ ),
+ (
+ "swh:1:rev:71b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
+ MetadataTargetType.REVISION,
+ ),
+ (
+ "swh:1:rel:81b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
+ MetadataTargetType.RELEASE,
+ ),
+ (
+ "swh:1:snp:91b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
+ MetadataTargetType.SNAPSHOT,
+ ),
+ ],
+)
+def test_deposit_metadata_swhid(
+ swhid,
+ target_type,
+ authenticated_client,
+ deposit_collection,
+ atom_dataset,
+ swh_storage,
+):
+ """Posting a swhid reference is stored on raw extrinsic metadata storage
+
+ """
+ swhid_reference = parse_swhid(swhid)
+ swhid_core = attr.evolve(swhid_reference, metadata={})
+
+ xml_data = atom_dataset["entry-data-with-swhid"].format(swhid=swhid)
+ deposit_client = authenticated_client.deposit_client
+
+ response = authenticated_client.post(
+ reverse(COL_IRI, args=[deposit_collection.name]),
+ content_type="application/atom+xml;type=entry",
+ data=xml_data,
+ HTTP_SLUG="external-id",
+ )
+
+ assert response.status_code == status.HTTP_201_CREATED
+
+ # Ensure metadata stored in the metadata storage is consistent
+ metadata_authority = MetadataAuthority(
+ type=MetadataAuthorityType.DEPOSIT_CLIENT,
+ url=deposit_client.provider_url,
+ metadata={"name": deposit_client.last_name},
+ )
+
+ actual_authority = swh_storage.metadata_authority_get(
+ MetadataAuthorityType.DEPOSIT_CLIENT, url=deposit_client.provider_url
+ )
+ assert actual_authority == metadata_authority
+
+ config = APIConfig()
+ metadata_fetcher = MetadataFetcher(
+ name=config.tool["name"],
+ version=config.tool["version"],
+ metadata=config.tool["configuration"],
+ )
+
+ actual_fetcher = swh_storage.metadata_fetcher_get(
+ config.tool["name"], config.tool["version"]
+ )
+ assert actual_fetcher == metadata_fetcher
+
+ page_results = swh_storage.raw_extrinsic_metadata_get(
+ target_type, swhid_core, metadata_authority
+ )
+ discovery_date = page_results.results[0].discovery_date
+
+ assert len(page_results.results) == 1
+ assert page_results.next_page_token is None
+
+ metadata_dict = {}
+ if swhid_reference.metadata:
+ path = swhid_reference.metadata.get("path")
+ metadata_dict = {
+ "origin": swhid_reference.metadata.get("origin"),
+ # "visit": swhid_reference.metadata.get("visit"), # type is int, we got a swhid ¯\_(ツ)_/¯ # noqa
+ "path": path.encode() if path else None,
+ # "...": swhid_reference.metadata.get("anchor") # anchor is a swhid... # noqa
+ }
+
+ assert page_results == PagedResult(
+ results=[
+ RawExtrinsicMetadata(
+ type=target_type,
+ target=swhid_core,
+ discovery_date=discovery_date,
+ authority=attr.evolve(metadata_authority, metadata=None),
+ fetcher=attr.evolve(metadata_fetcher, metadata=None),
+ format="sword-v2-atom-codemeta",
+ metadata=xml_data.encode(),
+ **metadata_dict
+ )
+ ],
+ next_page_token=None,
+ )
diff --git a/swh/deposit/tests/api/test_parsers.py b/swh/deposit/tests/api/test_parsers.py
--- a/swh/deposit/tests/api/test_parsers.py
+++ b/swh/deposit/tests/api/test_parsers.py
@@ -187,19 +187,8 @@
@pytest.fixture
-def xml_with_swhid():
- xml_data = """
-
-
-
-
-
-
-
- """
- return xml_data.strip()
+def xml_with_swhid(atom_dataset):
+ return atom_dataset["entry-data-with-swhid"]
@pytest.mark.parametrize(
diff --git a/swh/deposit/tests/conftest.py b/swh/deposit/tests/conftest.py
--- a/swh/deposit/tests/conftest.py
+++ b/swh/deposit/tests/conftest.py
@@ -204,15 +204,19 @@
return APIClient() # <- drf's client
-@pytest.yield_fixture
+@pytest.fixture
def authenticated_client(client, deposit_user):
"""Returned a logged client
+ This also patched the client instance to keep a reference on the associated
+ deposit_user.
+
"""
_token = "%s:%s" % (deposit_user.username, TEST_USER["password"])
token = base64.b64encode(_token.encode("utf-8"))
authorization = "Basic %s" % token.decode("utf-8")
client.credentials(HTTP_AUTHORIZATION=authorization)
+ client.deposit_client = deposit_user
yield client
client.logout()
diff --git a/swh/deposit/tests/data/atom/entry-data-with-swhid.xml b/swh/deposit/tests/data/atom/entry-data-with-swhid.xml
new file mode 100644
--- /dev/null
+++ b/swh/deposit/tests/data/atom/entry-data-with-swhid.xml
@@ -0,0 +1,13 @@
+
+
+ Awesome Compiler
+ urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
+ dudess
+
+
+
+
+
+