Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9312092
D5231.id18778.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
10 KB
Subscribers
None
D5231.id18778.diff
View Options
diff --git a/swh/deposit/api/common.py b/swh/deposit/api/common.py
--- a/swh/deposit/api/common.py
+++ b/swh/deposit/api/common.py
@@ -27,7 +27,12 @@
from swh.deposit.models import Deposit
from swh.deposit.utils import compute_metadata_context
from swh.model import hashutil
-from swh.model.identifiers import ExtendedSWHID, QualifiedSWHID, ValidationError
+from swh.model.identifiers import (
+ ExtendedObjectType,
+ ExtendedSWHID,
+ QualifiedSWHID,
+ ValidationError,
+)
from swh.model.model import (
MetadataAuthority,
MetadataAuthorityType,
@@ -681,7 +686,8 @@
target_swhid = extended_swhid_from_qualified(swhid_reference)
- # store that metadata to the metadata storage
+ self._check_swhid_in_archive(target_swhid)
+
metadata_object = RawExtrinsicMetadata(
target=target_swhid, # core swhid or origin
discovery_date=deposit_request.date,
@@ -699,6 +705,45 @@
return (target_swhid, deposit, deposit_request)
+ def _check_swhid_in_archive(self, target_swhid: ExtendedSWHID) -> None:
+ """Check the target object already exists in the archive,
+ and raises a BAD_REQUEST if it does not."""
+ if target_swhid.object_type in (ExtendedObjectType.CONTENT,):
+ if list(
+ self.storage.content_missing_per_sha1_git([target_swhid.object_id])
+ ):
+ raise DepositError(
+ BAD_REQUEST,
+ f"Cannot load metadata on {target_swhid}, this content "
+ f"object does not exist in the archive (yet?).",
+ )
+ elif target_swhid.object_type in (
+ ExtendedObjectType.DIRECTORY,
+ ExtendedObjectType.REVISION,
+ ExtendedObjectType.RELEASE,
+ ExtendedObjectType.SNAPSHOT,
+ ):
+ target_type_name = target_swhid.object_type.name.lower()
+ method = getattr(self.storage, target_type_name + "_missing")
+ if list(method([target_swhid.object_id])):
+ raise DepositError(
+ BAD_REQUEST,
+ f"Cannot load metadata on {target_swhid}, this {target_type_name} "
+ f"object does not exist in the archive (yet?).",
+ )
+ elif target_swhid.object_type in (ExtendedObjectType.ORIGIN,):
+ if None in list(self.storage.origin_get_by_sha1([target_swhid.object_id])):
+ raise DepositError(
+ BAD_REQUEST,
+ "Cannot load metadata on origin, it is not (yet?) known to the "
+ "archive.",
+ )
+ else:
+ raise DepositError(
+ BAD_REQUEST,
+ f"Cannot deposit metadata on {target_swhid.object_type.value}.",
+ )
+
def _atom_entry(
self,
request: Request,
diff --git a/swh/deposit/config.py b/swh/deposit/config.py
--- a/swh/deposit/config.py
+++ b/swh/deposit/config.py
@@ -104,6 +104,7 @@
"version": __version__,
"configuration": {"sword_version": "2"},
}
+ self.storage: StorageInterface = get_storage(**self.config["storage"])
self.storage_metadata: StorageInterface = get_storage(
**self.config["storage_metadata"]
)
diff --git a/swh/deposit/tests/api/test_collection_post_atom.py b/swh/deposit/tests/api/test_collection_post_atom.py
--- a/swh/deposit/tests/api/test_collection_post_atom.py
+++ b/swh/deposit/tests/api/test_collection_post_atom.py
@@ -7,6 +7,7 @@
from io import BytesIO
import uuid
+import warnings
import attr
from django.urls import reverse_lazy as reverse
@@ -23,7 +24,14 @@
from swh.deposit.parsers import parse_xml
from swh.deposit.tests.common import post_atom
from swh.deposit.utils import compute_metadata_context, extended_swhid_from_qualified
-from swh.model.identifiers import QualifiedSWHID
+from swh.model.hypothesis_strategies import (
+ directories,
+ present_contents,
+ releases,
+ revisions,
+ snapshots,
+)
+from swh.model.identifiers import ObjectType, QualifiedSWHID
from swh.model.model import (
MetadataAuthority,
MetadataAuthorityType,
@@ -34,6 +42,30 @@
from swh.storage.interface import PagedResult
+def _insert_object(swh_storage, swhid):
+ """Insert an object with the given swhid in the archive"""
+ if swhid.object_type == ObjectType.CONTENT:
+ with warnings.catch_warnings():
+ # hypothesis doesn't like us using .example(), but we know what we're doing
+ warnings.simplefilter("ignore")
+ obj = present_contents().example()
+ swh_storage.content_add([attr.evolve(obj, sha1_git=swhid.object_id)])
+ else:
+ object_type_name = swhid.object_type.name.lower()
+ strategy = {
+ "directory": directories,
+ "revision": revisions,
+ "release": releases,
+ "snapshot": snapshots,
+ }[object_type_name]
+ method = getattr(swh_storage, object_type_name + "_add")
+ with warnings.catch_warnings():
+ # hypothesis doesn't like us using .example(), but we know what we're doing
+ warnings.simplefilter("ignore")
+ obj = strategy().example()
+ method([attr.evolve(obj, id=swhid.object_id)])
+
+
def test_post_deposit_atom_201_even_with_decimal(
authenticated_client, deposit_collection, atom_dataset
):
@@ -478,13 +510,15 @@
xml_data = atom_dataset["entry-data-with-swhid"].format(swhid=swhid)
deposit_client = authenticated_client.deposit_client
+ _insert_object(swh_storage, swhid_reference)
+
response = post_atom(
authenticated_client,
reverse(COL_IRI, args=[deposit_collection.name]),
data=xml_data,
)
- assert response.status_code == status.HTTP_201_CREATED
+ assert response.status_code == status.HTTP_201_CREATED, response.content.decode()
response_content = parse_xml(BytesIO(response.content))
# Ensure the deposit is finalized
@@ -558,13 +592,14 @@
xml_data = atom_dataset["entry-data-with-origin-reference"].format(url=url)
origin_swhid = Origin(url).swhid()
deposit_client = authenticated_client.deposit_client
+ swh_storage.origin_add([Origin(url)])
response = post_atom(
authenticated_client,
reverse(COL_IRI, args=[deposit_collection.name]),
data=xml_data,
)
- assert response.status_code == status.HTTP_201_CREATED
+ assert response.status_code == status.HTTP_201_CREATED, response.content.decode()
response_content = parse_xml(BytesIO(response.content))
# Ensure the deposit is finalized
deposit_id = int(response_content["swh:deposit_id"])
@@ -622,3 +657,89 @@
next_page_token=None,
)
assert deposit.complete_date == discovery_date
+
+
+@pytest.mark.parametrize(
+ "swhid",
+ [
+ "swh:1:cnt:01b5c8cc985d190b5a7ef4878128ebfdc2358f49",
+ "swh:1:dir:11b5c8cc985d190b5a7ef4878128ebfdc2358f49",
+ "swh:1:rev:21b5c8cc985d190b5a7ef4878128ebfdc2358f49",
+ "swh:1:rel:31b5c8cc985d190b5a7ef4878128ebfdc2358f49",
+ "swh:1:snp:41b5c8cc985d190b5a7ef4878128ebfdc2358f49",
+ "swh:1:cnt:51b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
+ "swh:1:dir:c4993c872593e960dc84e4430dbbfbc34fd706d0;origin=https://inria.halpreprod.archives-ouvertes.fr/hal-01243573;visit=swh:1:snp:0175049fc45055a3824a1675ac06e3711619a55a;anchor=swh:1:rev:b5f505b005435fa5c4fa4c279792bd7b17167c04;path=/", # noqa
+ "swh:1:rev:71b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
+ "swh:1:rel:81b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
+ "swh:1:snp:91b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
+ ],
+)
+def test_deposit_metadata_unknown_swhid(
+ swhid, authenticated_client, deposit_collection, atom_dataset, swh_storage,
+):
+ """Posting a swhid reference is rejected if the referenced object is unknown
+
+ """
+ xml_data = atom_dataset["entry-data-with-swhid"].format(swhid=swhid)
+
+ response = post_atom(
+ authenticated_client,
+ reverse(COL_IRI, args=[deposit_collection.name]),
+ data=xml_data,
+ )
+
+ assert (
+ response.status_code == status.HTTP_400_BAD_REQUEST
+ ), response.content.decode()
+ response_content = parse_xml(BytesIO(response.content))
+ assert "object does not exist" in response_content["sword:error"]["atom:summary"]
+
+
+@pytest.mark.parametrize(
+ "swhid",
+ [
+ "swh:1:ori:01b5c8cc985d190b5a7ef4878128ebfdc2358f49",
+ "swh:1:emd:11b5c8cc985d190b5a7ef4878128ebfdc2358f49",
+ ],
+)
+def test_deposit_metadata_extended_swhid(
+ swhid, authenticated_client, deposit_collection, atom_dataset, swh_storage,
+):
+ """Posting a swhid reference is rejected if the referenced SWHID is
+ for an extended object type
+
+ """
+ xml_data = atom_dataset["entry-data-with-swhid"].format(swhid=swhid)
+
+ response = post_atom(
+ authenticated_client,
+ reverse(COL_IRI, args=[deposit_collection.name]),
+ data=xml_data,
+ )
+
+ assert (
+ response.status_code == status.HTTP_400_BAD_REQUEST
+ ), response.content.decode()
+ response_content = parse_xml(BytesIO(response.content))
+ assert "Invalid SWHID reference" in response_content["sword:error"]["atom:summary"]
+
+
+def test_deposit_metadata_unknown_origin(
+ authenticated_client, deposit_collection, atom_dataset, swh_storage,
+):
+ """Posting a swhid reference is stored on raw extrinsic metadata storage
+
+ """
+ url = "https://gitlab.org/user/repo"
+ xml_data = atom_dataset["entry-data-with-origin-reference"].format(url=url)
+ response = post_atom(
+ authenticated_client,
+ reverse(COL_IRI, args=[deposit_collection.name]),
+ data=xml_data,
+ )
+
+ assert (
+ response.status_code == status.HTTP_400_BAD_REQUEST
+ ), response.content.decode()
+ response_content = parse_xml(BytesIO(response.content))
+ assert "known to the archive" in response_content["sword:error"]["atom:summary"]
diff --git a/swh/deposit/tests/conftest.py b/swh/deposit/tests/conftest.py
--- a/swh/deposit/tests/conftest.py
+++ b/swh/deposit/tests/conftest.py
@@ -88,6 +88,7 @@
"extraction_dir": "/tmp/swh-deposit/test/extraction-dir",
"checks": False,
"scheduler": {"cls": "local", **swh_scheduler_config,},
+ "storage": swh_storage_backend_config,
"storage_metadata": swh_storage_backend_config,
}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Jul 2, 10:42 AM (2 w, 1 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3226970
Attached To
D5231: Check a SWHID exists in the archive before accepting a metadata-only deposit
Event Timeline
Log In to Comment