Page MenuHomeSoftware Heritage

D5231.id18778.diff
No OneTemporary

D5231.id18778.diff

diff --git a/swh/deposit/api/common.py b/swh/deposit/api/common.py
--- a/swh/deposit/api/common.py
+++ b/swh/deposit/api/common.py
@@ -27,7 +27,12 @@
from swh.deposit.models import Deposit
from swh.deposit.utils import compute_metadata_context
from swh.model import hashutil
-from swh.model.identifiers import ExtendedSWHID, QualifiedSWHID, ValidationError
+from swh.model.identifiers import (
+ ExtendedObjectType,
+ ExtendedSWHID,
+ QualifiedSWHID,
+ ValidationError,
+)
from swh.model.model import (
MetadataAuthority,
MetadataAuthorityType,
@@ -681,7 +686,8 @@
target_swhid = extended_swhid_from_qualified(swhid_reference)
- # store that metadata to the metadata storage
+ self._check_swhid_in_archive(target_swhid)
+
metadata_object = RawExtrinsicMetadata(
target=target_swhid, # core swhid or origin
discovery_date=deposit_request.date,
@@ -699,6 +705,45 @@
return (target_swhid, deposit, deposit_request)
+ def _check_swhid_in_archive(self, target_swhid: ExtendedSWHID) -> None:
+ """Check the target object already exists in the archive,
+ and raises a BAD_REQUEST if it does not."""
+ if target_swhid.object_type in (ExtendedObjectType.CONTENT,):
+ if list(
+ self.storage.content_missing_per_sha1_git([target_swhid.object_id])
+ ):
+ raise DepositError(
+ BAD_REQUEST,
+ f"Cannot load metadata on {target_swhid}, this content "
+ f"object does not exist in the archive (yet?).",
+ )
+ elif target_swhid.object_type in (
+ ExtendedObjectType.DIRECTORY,
+ ExtendedObjectType.REVISION,
+ ExtendedObjectType.RELEASE,
+ ExtendedObjectType.SNAPSHOT,
+ ):
+ target_type_name = target_swhid.object_type.name.lower()
+ method = getattr(self.storage, target_type_name + "_missing")
+ if list(method([target_swhid.object_id])):
+ raise DepositError(
+ BAD_REQUEST,
+ f"Cannot load metadata on {target_swhid}, this {target_type_name} "
+ f"object does not exist in the archive (yet?).",
+ )
+ elif target_swhid.object_type in (ExtendedObjectType.ORIGIN,):
+ if None in list(self.storage.origin_get_by_sha1([target_swhid.object_id])):
+ raise DepositError(
+ BAD_REQUEST,
+ "Cannot load metadata on origin, it is not (yet?) known to the "
+ "archive.",
+ )
+ else:
+ raise DepositError(
+ BAD_REQUEST,
+ f"Cannot deposit metadata on {target_swhid.object_type.value}.",
+ )
+
def _atom_entry(
self,
request: Request,
diff --git a/swh/deposit/config.py b/swh/deposit/config.py
--- a/swh/deposit/config.py
+++ b/swh/deposit/config.py
@@ -104,6 +104,7 @@
"version": __version__,
"configuration": {"sword_version": "2"},
}
+ self.storage: StorageInterface = get_storage(**self.config["storage"])
self.storage_metadata: StorageInterface = get_storage(
**self.config["storage_metadata"]
)
diff --git a/swh/deposit/tests/api/test_collection_post_atom.py b/swh/deposit/tests/api/test_collection_post_atom.py
--- a/swh/deposit/tests/api/test_collection_post_atom.py
+++ b/swh/deposit/tests/api/test_collection_post_atom.py
@@ -7,6 +7,7 @@
from io import BytesIO
import uuid
+import warnings
import attr
from django.urls import reverse_lazy as reverse
@@ -23,7 +24,14 @@
from swh.deposit.parsers import parse_xml
from swh.deposit.tests.common import post_atom
from swh.deposit.utils import compute_metadata_context, extended_swhid_from_qualified
-from swh.model.identifiers import QualifiedSWHID
+from swh.model.hypothesis_strategies import (
+ directories,
+ present_contents,
+ releases,
+ revisions,
+ snapshots,
+)
+from swh.model.identifiers import ObjectType, QualifiedSWHID
from swh.model.model import (
MetadataAuthority,
MetadataAuthorityType,
@@ -34,6 +42,30 @@
from swh.storage.interface import PagedResult
+def _insert_object(swh_storage, swhid):
+ """Insert an object with the given swhid in the archive"""
+ if swhid.object_type == ObjectType.CONTENT:
+ with warnings.catch_warnings():
+ # hypothesis doesn't like us using .example(), but we know what we're doing
+ warnings.simplefilter("ignore")
+ obj = present_contents().example()
+ swh_storage.content_add([attr.evolve(obj, sha1_git=swhid.object_id)])
+ else:
+ object_type_name = swhid.object_type.name.lower()
+ strategy = {
+ "directory": directories,
+ "revision": revisions,
+ "release": releases,
+ "snapshot": snapshots,
+ }[object_type_name]
+ method = getattr(swh_storage, object_type_name + "_add")
+ with warnings.catch_warnings():
+ # hypothesis doesn't like us using .example(), but we know what we're doing
+ warnings.simplefilter("ignore")
+ obj = strategy().example()
+ method([attr.evolve(obj, id=swhid.object_id)])
+
+
def test_post_deposit_atom_201_even_with_decimal(
authenticated_client, deposit_collection, atom_dataset
):
@@ -478,13 +510,15 @@
xml_data = atom_dataset["entry-data-with-swhid"].format(swhid=swhid)
deposit_client = authenticated_client.deposit_client
+ _insert_object(swh_storage, swhid_reference)
+
response = post_atom(
authenticated_client,
reverse(COL_IRI, args=[deposit_collection.name]),
data=xml_data,
)
- assert response.status_code == status.HTTP_201_CREATED
+ assert response.status_code == status.HTTP_201_CREATED, response.content.decode()
response_content = parse_xml(BytesIO(response.content))
# Ensure the deposit is finalized
@@ -558,13 +592,14 @@
xml_data = atom_dataset["entry-data-with-origin-reference"].format(url=url)
origin_swhid = Origin(url).swhid()
deposit_client = authenticated_client.deposit_client
+ swh_storage.origin_add([Origin(url)])
response = post_atom(
authenticated_client,
reverse(COL_IRI, args=[deposit_collection.name]),
data=xml_data,
)
- assert response.status_code == status.HTTP_201_CREATED
+ assert response.status_code == status.HTTP_201_CREATED, response.content.decode()
response_content = parse_xml(BytesIO(response.content))
# Ensure the deposit is finalized
deposit_id = int(response_content["swh:deposit_id"])
@@ -622,3 +657,89 @@
next_page_token=None,
)
assert deposit.complete_date == discovery_date
+
+
+@pytest.mark.parametrize(
+ "swhid",
+ [
+ "swh:1:cnt:01b5c8cc985d190b5a7ef4878128ebfdc2358f49",
+ "swh:1:dir:11b5c8cc985d190b5a7ef4878128ebfdc2358f49",
+ "swh:1:rev:21b5c8cc985d190b5a7ef4878128ebfdc2358f49",
+ "swh:1:rel:31b5c8cc985d190b5a7ef4878128ebfdc2358f49",
+ "swh:1:snp:41b5c8cc985d190b5a7ef4878128ebfdc2358f49",
+ "swh:1:cnt:51b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
+ "swh:1:dir:c4993c872593e960dc84e4430dbbfbc34fd706d0;origin=https://inria.halpreprod.archives-ouvertes.fr/hal-01243573;visit=swh:1:snp:0175049fc45055a3824a1675ac06e3711619a55a;anchor=swh:1:rev:b5f505b005435fa5c4fa4c279792bd7b17167c04;path=/", # noqa
+ "swh:1:rev:71b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
+ "swh:1:rel:81b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
+ "swh:1:snp:91b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
+ ],
+)
+def test_deposit_metadata_unknown_swhid(
+ swhid, authenticated_client, deposit_collection, atom_dataset, swh_storage,
+):
+ """Posting a swhid reference is rejected if the referenced object is unknown
+
+ """
+ xml_data = atom_dataset["entry-data-with-swhid"].format(swhid=swhid)
+
+ response = post_atom(
+ authenticated_client,
+ reverse(COL_IRI, args=[deposit_collection.name]),
+ data=xml_data,
+ )
+
+ assert (
+ response.status_code == status.HTTP_400_BAD_REQUEST
+ ), response.content.decode()
+ response_content = parse_xml(BytesIO(response.content))
+ assert "object does not exist" in response_content["sword:error"]["atom:summary"]
+
+
+@pytest.mark.parametrize(
+ "swhid",
+ [
+ "swh:1:ori:01b5c8cc985d190b5a7ef4878128ebfdc2358f49",
+ "swh:1:emd:11b5c8cc985d190b5a7ef4878128ebfdc2358f49",
+ ],
+)
+def test_deposit_metadata_extended_swhid(
+ swhid, authenticated_client, deposit_collection, atom_dataset, swh_storage,
+):
+ """Posting a swhid reference is rejected if the referenced SWHID is
+ for an extended object type
+
+ """
+ xml_data = atom_dataset["entry-data-with-swhid"].format(swhid=swhid)
+
+ response = post_atom(
+ authenticated_client,
+ reverse(COL_IRI, args=[deposit_collection.name]),
+ data=xml_data,
+ )
+
+ assert (
+ response.status_code == status.HTTP_400_BAD_REQUEST
+ ), response.content.decode()
+ response_content = parse_xml(BytesIO(response.content))
+ assert "Invalid SWHID reference" in response_content["sword:error"]["atom:summary"]
+
+
+def test_deposit_metadata_unknown_origin(
+ authenticated_client, deposit_collection, atom_dataset, swh_storage,
+):
+ """Posting a swhid reference is stored on raw extrinsic metadata storage
+
+ """
+ url = "https://gitlab.org/user/repo"
+ xml_data = atom_dataset["entry-data-with-origin-reference"].format(url=url)
+ response = post_atom(
+ authenticated_client,
+ reverse(COL_IRI, args=[deposit_collection.name]),
+ data=xml_data,
+ )
+
+ assert (
+ response.status_code == status.HTTP_400_BAD_REQUEST
+ ), response.content.decode()
+ response_content = parse_xml(BytesIO(response.content))
+ assert "known to the archive" in response_content["sword:error"]["atom:summary"]
diff --git a/swh/deposit/tests/conftest.py b/swh/deposit/tests/conftest.py
--- a/swh/deposit/tests/conftest.py
+++ b/swh/deposit/tests/conftest.py
@@ -88,6 +88,7 @@
"extraction_dir": "/tmp/swh-deposit/test/extraction-dir",
"checks": False,
"scheduler": {"cls": "local", **swh_scheduler_config,},
+ "storage": swh_storage_backend_config,
"storage_metadata": swh_storage_backend_config,
}

File Metadata

Mime Type
text/plain
Expires
Wed, Jul 2, 10:42 AM (2 w, 1 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3226970

Event Timeline