Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7124817
D4475.id15947.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
29 KB
Subscribers
None
D4475.id15947.diff
View Options
diff --git a/swh/deposit/api/common.py b/swh/deposit/api/common.py
--- a/swh/deposit/api/common.py
+++ b/swh/deposit/api/common.py
@@ -9,6 +9,7 @@
import json
from typing import Any, Dict, Optional, Sequence, Tuple, Type, Union
+import attr
from django.http import FileResponse, HttpResponse
from django.shortcuts import render
from django.urls import reverse
@@ -19,7 +20,18 @@
from rest_framework.request import Request
from rest_framework.views import APIView
+from swh.deposit.api.checks import check_metadata
+from swh.deposit.api.converters import convert_status_detail
+from swh.deposit.models import Deposit
+from swh.deposit.utils import compute_metadata_context
from swh.model import hashutil
+from swh.model.identifiers import SWHID, ValidationError
+from swh.model.model import (
+ MetadataAuthority,
+ MetadataAuthorityType,
+ MetadataFetcher,
+ RawExtrinsicMetadata,
+)
from swh.scheduler.utils import create_oneshot_task_dict
from ..config import (
@@ -52,8 +64,8 @@
make_error_response,
make_error_response_from_dict,
)
-from ..models import Deposit, DepositClient, DepositCollection, DepositRequest
-from ..parsers import parse_xml
+from ..models import DepositClient, DepositCollection, DepositRequest
+from ..parsers import parse_swh_reference, parse_xml
ACCEPT_PACKAGINGS = ["http://purl.org/net/sword/package/SimpleZip"]
ACCEPT_ARCHIVE_CONTENT_TYPES = ["application/zip", "application/x-tar"]
@@ -603,6 +615,104 @@
"status": deposit.status,
}
+ def _store_metadata_deposit(
+ self,
+ deposit: Deposit,
+ swhid_reference: Union[str, SWHID],
+ metadata: Dict,
+ raw_metadata: bytes,
+ with_deposit_origin: bool = False,
+ ) -> Union[
+ Dict, Tuple[Union[SWHID, str], Union[SWHID, str], Deposit, DepositRequest]
+ ]:
+ """When all user inputs pass the checks, this associates the raw_metadata to the
+ swhid_reference in the raw extrinsic metadata storage. In case of any issues,
+ a bad request response is returned to the user with the details.
+
+ Checks:
+ - The metadata received should pass the functional metadata checks.
+ - The SWHID is technically valid
+
+ Args:
+ deposit: Deposit reference
+ swhid_reference: The swhid or the origin to attach metadata information to
+ metadata: Full dict of metadata to check for validity (parsed out of
+ raw_metadata)
+ raw_metadata: The actual raw metadata to send in the storage metadata
+ with_deposit_origin: Flag to differentiate between metadata-only or metadata
+ update scenario.
+
+ Returns:
+ Dict of information on the deposit
+
+ """
+ if not metadata:
+ return make_error_dict(
+ BAD_REQUEST,
+ "Empty body request is not supported",
+ "Atom entry deposit is supposed to send for metadata. "
+ "If the body is empty, there is no metadata.",
+ )
+
+ metadata_ok, error_details = check_metadata(metadata)
+ if not metadata_ok:
+ assert error_details, "Details should be set when a failure occurs"
+ return make_error_dict(
+ BAD_REQUEST,
+ "Functional metadata checks failure",
+ convert_status_detail(error_details),
+ )
+
+ metadata_authority = MetadataAuthority(
+ type=MetadataAuthorityType.DEPOSIT_CLIENT,
+ url=deposit.client.provider_url,
+ metadata={"name": deposit.client.last_name},
+ )
+
+ metadata_fetcher = MetadataFetcher(
+ name=self.tool["name"],
+ version=self.tool["version"],
+ metadata=self.tool["configuration"],
+ )
+
+ # replace metadata within the deposit backend
+ deposit_request_data = {
+ METADATA_KEY: metadata,
+ RAW_METADATA_KEY: raw_metadata,
+ }
+
+ # actually add the metadata to the completed deposit
+ deposit_request = self._deposit_request_put(deposit, deposit_request_data)
+
+ object_type, metadata_context = compute_metadata_context(swhid_reference)
+ if with_deposit_origin: # metadata deposit update on completed deposit
+ metadata_context["origin"] = deposit.origin_url
+
+ swhid_core: Union[str, SWHID]
+ if isinstance(swhid_reference, str):
+ swhid_core = swhid_reference
+ else:
+ swhid_core = attr.evolve(swhid_reference, metadata={})
+
+ # store that metadata to the metadata storage
+ metadata_object = RawExtrinsicMetadata(
+ type=object_type,
+ target=swhid_core, # core swhid or origin
+ discovery_date=deposit_request.date,
+ authority=metadata_authority,
+ fetcher=metadata_fetcher,
+ format="sword-v2-atom-codemeta",
+ metadata=raw_metadata,
+ **metadata_context,
+ )
+
+ # write to metadata storage
+ self.storage_metadata.metadata_authority_add([metadata_authority])
+ self.storage_metadata.metadata_fetcher_add([metadata_fetcher])
+ self.storage_metadata.raw_extrinsic_metadata_add([metadata_object])
+
+ return (swhid_core, swhid_reference, deposit, deposit_request)
+
def _atom_entry(
self,
request: Request,
@@ -662,11 +772,13 @@
"If the body is empty, there is no metadata.",
)
- external_id = metadata.get("external_identifier", headers["slug"])
+ # Determine if we are in the metadata-only deposit case
+ try:
+ swhid = parse_swh_reference(metadata)
+ except ValidationError as e:
+ return make_error_dict(PARSING_ERROR, "Invalid SWHID reference", str(e),)
- # TODO: Determine if we are in the metadata-only deposit case. If it is, then
- # save deposit and deposit request typed 'metadata' and send metadata to the
- # metadata storage. Otherwise, do as existing deposit.
+ external_id = metadata.get("external_identifier", headers["slug"])
deposit = self._deposit_put(
request,
@@ -675,6 +787,29 @@
external_id=external_id,
)
+ if swhid is not None:
+ result = self._store_metadata_deposit(
+ deposit, swhid, metadata, raw_metadata
+ )
+ if isinstance(result, Dict): # error
+ return result
+ swhid_core, swhid_reference, deposit, deposit_request = result
+
+ deposit.status = DEPOSIT_STATUS_LOAD_SUCCESS
+ if isinstance(swhid_reference, SWHID):
+ deposit.swhid = str(swhid_core)
+ deposit.swhid_context = str(swhid_reference)
+ deposit.complete_date = deposit_request.date
+ deposit.reception_date = deposit_request.date
+ deposit.save()
+
+ return {
+ "deposit_id": deposit.id,
+ "deposit_date": deposit_request.date,
+ "status": deposit.status,
+ "archive": None,
+ }
+
self._deposit_request_put(
deposit,
{METADATA_KEY: metadata, RAW_METADATA_KEY: raw_metadata},
diff --git a/swh/deposit/api/deposit_update.py b/swh/deposit/api/deposit_update.py
--- a/swh/deposit/api/deposit_update.py
+++ b/swh/deposit/api/deposit_update.py
@@ -8,28 +8,10 @@
from rest_framework import status
from rest_framework.request import Request
-from swh.deposit.api.checks import check_metadata
-from swh.deposit.api.converters import convert_status_detail
from swh.deposit.models import Deposit
from swh.model.identifiers import parse_swhid
-from swh.model.model import (
- MetadataAuthority,
- MetadataAuthorityType,
- MetadataFetcher,
- MetadataTargetType,
- RawExtrinsicMetadata,
-)
-from swh.storage import get_storage
-from swh.storage.interface import StorageInterface
-
-from ..config import (
- CONT_FILE_IRI,
- DEPOSIT_STATUS_LOAD_SUCCESS,
- EDIT_SE_IRI,
- EM_IRI,
- METADATA_KEY,
- RAW_METADATA_KEY,
-)
+
+from ..config import CONT_FILE_IRI, DEPOSIT_STATUS_LOAD_SUCCESS, EDIT_SE_IRI, EM_IRI
from ..errors import BAD_REQUEST, ParserError, make_error_dict
from ..parsers import (
SWHAtomEntryParser,
@@ -125,12 +107,6 @@
parser_classes = (SWHMultiPartParser, SWHAtomEntryParser)
- def __init__(self):
- super().__init__()
- self.storage_metadata: StorageInterface = get_storage(
- **self.config["storage_metadata"]
- )
-
def restrict_access(
self, request: Request, headers: Dict, deposit: Deposit
) -> Dict[str, Any]:
@@ -221,64 +197,20 @@
"Please ensure your metadata file is correctly formatted.",
)
- if not metadata:
- return make_error_dict(
- BAD_REQUEST,
- "Empty body request is not supported",
- "Atom entry deposit is supposed to send for metadata. "
- "If the body is empty, there is no metadata.",
- )
-
- metadata_ok, error_details = check_metadata(metadata)
- if not metadata_ok:
- assert error_details, "Details should be set when a failure occurs"
- return make_error_dict(
- BAD_REQUEST,
- "Functional metadata checks failure",
- convert_status_detail(error_details),
- )
-
- metadata_authority = MetadataAuthority(
- type=MetadataAuthorityType.DEPOSIT_CLIENT,
- url=deposit.client.provider_url,
- metadata={"name": deposit.client.last_name},
- )
-
- metadata_fetcher = MetadataFetcher(
- name=self.tool["name"],
- version=self.tool["version"],
- metadata=self.tool["configuration"],
- )
-
- deposit_swhid = parse_swhid(swhid)
-
- # replace metadata within the deposit backend
- deposit_request_data = {
- METADATA_KEY: metadata,
- RAW_METADATA_KEY: raw_metadata,
- }
-
- # actually add the metadata to the completed deposit
- deposit_request = self._deposit_request_put(deposit, deposit_request_data)
- # store that metadata to the metadata storage
- metadata_object = RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
- target=deposit_swhid,
- discovery_date=deposit_request.date,
- authority=metadata_authority,
- fetcher=metadata_fetcher,
- format="sword-v2-atom-codemeta",
- metadata=raw_metadata,
- origin=deposit.origin_url,
+ result = self._store_metadata_deposit(
+ deposit,
+ parse_swhid(swhid),
+ metadata,
+ raw_metadata,
+ with_deposit_origin=True,
)
+ if isinstance(result, Dict): # error
+ return result
- # write to metadata storage
- self.storage_metadata.metadata_authority_add([metadata_authority])
- self.storage_metadata.metadata_fetcher_add([metadata_fetcher])
- self.storage_metadata.raw_extrinsic_metadata_add([metadata_object])
+ _, _, deposit, deposit_request = result
return {
- "deposit_id": deposit_id,
+ "deposit_id": deposit.id,
"deposit_date": deposit_request.date,
"status": deposit.status,
"archive": None,
diff --git a/swh/deposit/config.py b/swh/deposit/config.py
--- a/swh/deposit/config.py
+++ b/swh/deposit/config.py
@@ -10,6 +10,8 @@
from swh.deposit import __version__
from swh.scheduler import get_scheduler
from swh.scheduler.interface import SchedulerInterface
+from swh.storage import get_storage
+from swh.storage.interface import StorageInterface
# IRIs (Internationalized Resource identifier) sword 2.0 specified
EDIT_SE_IRI = "edit_se_iri"
@@ -101,3 +103,6 @@
"version": __version__,
"configuration": {"sword_version": "2"},
}
+ self.storage_metadata: StorageInterface = get_storage(
+ **self.config["storage_metadata"]
+ )
diff --git a/swh/deposit/tests/api/test_deposit_metadata.py b/swh/deposit/tests/api/test_deposit_metadata.py
new file mode 100644
--- /dev/null
+++ b/swh/deposit/tests/api/test_deposit_metadata.py
@@ -0,0 +1,256 @@
+# Copyright (C) 2020 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from io import BytesIO
+
+import attr
+from django.urls import reverse
+import pytest
+from rest_framework import status
+
+from swh.deposit.config import COL_IRI, DEPOSIT_STATUS_LOAD_SUCCESS, APIConfig
+from swh.deposit.models import Deposit
+from swh.deposit.parsers import parse_xml
+from swh.deposit.utils import compute_metadata_context
+from swh.model.identifiers import SWHID, parse_swhid
+from swh.model.model import (
+ MetadataAuthority,
+ MetadataAuthorityType,
+ MetadataFetcher,
+ MetadataTargetType,
+ RawExtrinsicMetadata,
+)
+from swh.storage.interface import PagedResult
+
+
+def test_deposit_metadata_invalid(
+ authenticated_client, deposit_collection, atom_dataset
+):
+ """Posting an invalid swhid reference is propagated to clients
+
+ """
+ invalid_swhid = "swh:1:dir :31b5c8cc985d190b5a7ef4878128ebfdc2358f49"
+ xml_data = atom_dataset["entry-data-with-swhid"].format(swhid=invalid_swhid)
+
+ response = authenticated_client.post(
+ reverse(COL_IRI, args=[deposit_collection.name]),
+ content_type="application/atom+xml;type=entry",
+ data=xml_data,
+ HTTP_SLUG="external-id",
+ )
+ assert response.status_code == status.HTTP_400_BAD_REQUEST
+ assert b"Invalid SWHID reference" in response.content
+
+
+@pytest.mark.parametrize(
+ "swhid,target_type",
+ [
+ (
+ "swh:1:cnt:01b5c8cc985d190b5a7ef4878128ebfdc2358f49",
+ MetadataTargetType.CONTENT,
+ ),
+ (
+ "swh:1:dir:11b5c8cc985d190b5a7ef4878128ebfdc2358f49",
+ MetadataTargetType.DIRECTORY,
+ ),
+ (
+ "swh:1:rev:21b5c8cc985d190b5a7ef4878128ebfdc2358f49",
+ MetadataTargetType.REVISION,
+ ),
+ (
+ "swh:1:rel:31b5c8cc985d190b5a7ef4878128ebfdc2358f49",
+ MetadataTargetType.RELEASE,
+ ),
+ (
+ "swh:1:snp:41b5c8cc985d190b5a7ef4878128ebfdc2358f49",
+ MetadataTargetType.SNAPSHOT,
+ ),
+ (
+ "swh:1:cnt:51b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
+ MetadataTargetType.CONTENT,
+ ),
+ (
+ "swh:1:dir:c4993c872593e960dc84e4430dbbfbc34fd706d0;origin=https://inria.halpreprod.archives-ouvertes.fr/hal-01243573;visit=swh:1:snp:0175049fc45055a3824a1675ac06e3711619a55a;anchor=swh:1:rev:b5f505b005435fa5c4fa4c279792bd7b17167c04;path=/", # noqa
+ MetadataTargetType.DIRECTORY,
+ ),
+ (
+ "swh:1:rev:71b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
+ MetadataTargetType.REVISION,
+ ),
+ (
+ "swh:1:rel:81b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
+ MetadataTargetType.RELEASE,
+ ),
+ (
+ "swh:1:snp:91b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
+ MetadataTargetType.SNAPSHOT,
+ ),
+ ],
+)
+def test_deposit_metadata_swhid(
+ swhid,
+ target_type,
+ authenticated_client,
+ deposit_collection,
+ atom_dataset,
+ swh_storage,
+):
+ """Posting a swhid reference is stored on raw extrinsic metadata storage
+
+ """
+ swhid_reference = parse_swhid(swhid)
+ swhid_core = attr.evolve(swhid_reference, metadata={})
+
+ xml_data = atom_dataset["entry-data-with-swhid"].format(swhid=swhid)
+ deposit_client = authenticated_client.deposit_client
+
+ response = authenticated_client.post(
+ reverse(COL_IRI, args=[deposit_collection.name]),
+ content_type="application/atom+xml;type=entry",
+ data=xml_data,
+ HTTP_SLUG="external-id",
+ )
+
+ assert response.status_code == status.HTTP_201_CREATED
+ response_content = parse_xml(BytesIO(response.content))
+
+ # Ensure the deposit is finalized
+ deposit_id = int(response_content["deposit_id"])
+ deposit = Deposit.objects.get(pk=deposit_id)
+ assert isinstance(swhid_core, SWHID)
+ assert deposit.swhid == str(swhid_core)
+ assert deposit.swhid_context == str(swhid_reference)
+ assert deposit.complete_date == deposit.reception_date
+ assert deposit.complete_date is not None
+ assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS
+
+ # Ensure metadata stored in the metadata storage is consistent
+ metadata_authority = MetadataAuthority(
+ type=MetadataAuthorityType.DEPOSIT_CLIENT,
+ url=deposit_client.provider_url,
+ metadata={"name": deposit_client.last_name},
+ )
+
+ actual_authority = swh_storage.metadata_authority_get(
+ MetadataAuthorityType.DEPOSIT_CLIENT, url=deposit_client.provider_url
+ )
+ assert actual_authority == metadata_authority
+
+ config = APIConfig()
+ metadata_fetcher = MetadataFetcher(
+ name=config.tool["name"],
+ version=config.tool["version"],
+ metadata=config.tool["configuration"],
+ )
+
+ actual_fetcher = swh_storage.metadata_fetcher_get(
+ config.tool["name"], config.tool["version"]
+ )
+ assert actual_fetcher == metadata_fetcher
+
+ page_results = swh_storage.raw_extrinsic_metadata_get(
+ target_type, swhid_core, metadata_authority
+ )
+ discovery_date = page_results.results[0].discovery_date
+
+ assert len(page_results.results) == 1
+ assert page_results.next_page_token is None
+
+ object_type, metadata_context = compute_metadata_context(swhid_reference)
+ assert page_results == PagedResult(
+ results=[
+ RawExtrinsicMetadata(
+ type=object_type,
+ target=swhid_core,
+ discovery_date=discovery_date,
+ authority=attr.evolve(metadata_authority, metadata=None),
+ fetcher=attr.evolve(metadata_fetcher, metadata=None),
+ format="sword-v2-atom-codemeta",
+ metadata=xml_data.encode(),
+ **metadata_context,
+ )
+ ],
+ next_page_token=None,
+ )
+ assert deposit.complete_date == discovery_date
+
+
+@pytest.mark.parametrize(
+ "url", ["https://gitlab.org/user/repo", "https://whatever.else/repo",]
+)
+def test_deposit_metadata_origin(
+ url, authenticated_client, deposit_collection, atom_dataset, swh_storage,
+):
+ """Posting a swhid reference is stored on raw extrinsic metadata storage
+
+ """
+ xml_data = atom_dataset["entry-data-with-origin"].format(url=url)
+ deposit_client = authenticated_client.deposit_client
+ response = authenticated_client.post(
+ reverse(COL_IRI, args=[deposit_collection.name]),
+ content_type="application/atom+xml;type=entry",
+ data=xml_data,
+ HTTP_SLUG="external-id",
+ )
+
+ assert response.status_code == status.HTTP_201_CREATED
+ response_content = parse_xml(BytesIO(response.content))
+ # Ensure the deposit is finalized
+ deposit_id = int(response_content["deposit_id"])
+ deposit = Deposit.objects.get(pk=deposit_id)
+ # we got not swhid as input so we cannot have those
+ assert deposit.swhid is None
+ assert deposit.swhid_context is None
+ assert deposit.complete_date == deposit.reception_date
+ assert deposit.complete_date is not None
+ assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS
+
+ # Ensure metadata stored in the metadata storage is consistent
+ metadata_authority = MetadataAuthority(
+ type=MetadataAuthorityType.DEPOSIT_CLIENT,
+ url=deposit_client.provider_url,
+ metadata={"name": deposit_client.last_name},
+ )
+
+ actual_authority = swh_storage.metadata_authority_get(
+ MetadataAuthorityType.DEPOSIT_CLIENT, url=deposit_client.provider_url
+ )
+ assert actual_authority == metadata_authority
+
+ config = APIConfig()
+ metadata_fetcher = MetadataFetcher(
+ name=config.tool["name"],
+ version=config.tool["version"],
+ metadata=config.tool["configuration"],
+ )
+
+ actual_fetcher = swh_storage.metadata_fetcher_get(
+ config.tool["name"], config.tool["version"]
+ )
+ assert actual_fetcher == metadata_fetcher
+
+ page_results = swh_storage.raw_extrinsic_metadata_get(
+ MetadataTargetType.ORIGIN, url, metadata_authority
+ )
+ discovery_date = page_results.results[0].discovery_date
+
+ assert len(page_results.results) == 1
+ assert page_results.next_page_token is None
+
+ assert page_results == PagedResult(
+ results=[
+ RawExtrinsicMetadata(
+ type=MetadataTargetType.ORIGIN,
+ target=url,
+ discovery_date=discovery_date,
+ authority=attr.evolve(metadata_authority, metadata=None),
+ fetcher=attr.evolve(metadata_fetcher, metadata=None),
+ format="sword-v2-atom-codemeta",
+ metadata=xml_data.encode(),
+ )
+ ],
+ next_page_token=None,
+ )
+ assert deposit.complete_date == discovery_date
diff --git a/swh/deposit/tests/api/test_parsers.py b/swh/deposit/tests/api/test_parsers.py
--- a/swh/deposit/tests/api/test_parsers.py
+++ b/swh/deposit/tests/api/test_parsers.py
@@ -187,19 +187,8 @@
@pytest.fixture
-def xml_with_swhid():
- xml_data = """<?xml version="1.0"?>
- <entry xmlns="http://www.w3.org/2005/Atom"
- xmlns:codemeta="https://doi.org/10.5063/SCHEMA/CODEMETA-2.0"
- xmlns:swh="https://www.softwareheritage.org/schema/2018/deposit">
- <swh:deposit>
- <swh:reference>
- <swh:object swhid="{swhid}" />
- </swh:reference>
- </swh:deposit>
- </entry>
- """
- return xml_data.strip()
+def xml_with_swhid(atom_dataset):
+ return atom_dataset["entry-data-with-swhid"]
@pytest.mark.parametrize(
diff --git a/swh/deposit/tests/conftest.py b/swh/deposit/tests/conftest.py
--- a/swh/deposit/tests/conftest.py
+++ b/swh/deposit/tests/conftest.py
@@ -204,15 +204,19 @@
return APIClient() # <- drf's client
-@pytest.yield_fixture
+@pytest.fixture
def authenticated_client(client, deposit_user):
"""Returned a logged client
+ This also patched the client instance to keep a reference on the associated
+ deposit_user.
+
"""
_token = "%s:%s" % (deposit_user.username, TEST_USER["password"])
token = base64.b64encode(_token.encode("utf-8"))
authorization = "Basic %s" % token.decode("utf-8")
client.credentials(HTTP_AUTHORIZATION=authorization)
+ client.deposit_client = deposit_user
yield client
client.logout()
diff --git a/swh/deposit/tests/data/atom/entry-data-with-origin.xml b/swh/deposit/tests/data/atom/entry-data-with-origin.xml
new file mode 100644
--- /dev/null
+++ b/swh/deposit/tests/data/atom/entry-data-with-origin.xml
@@ -0,0 +1,13 @@
+<?xml version="1.0"?>
+<entry xmlns="http://www.w3.org/2005/Atom"
+ xmlns:codemeta="https://doi.org/10.5063/SCHEMA/CODEMETA-2.0"
+ xmlns:swh="https://www.softwareheritage.org/schema/2018/deposit">
+ <title>Awesome Compiler</title>
+ <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
+ <author>dudess</author>
+ <swh:deposit>
+ <swh:reference>
+ <swh:origin url="{url}" />
+ </swh:reference>
+ </swh:deposit>
+</entry>
diff --git a/swh/deposit/tests/data/atom/entry-data-with-swhid.xml b/swh/deposit/tests/data/atom/entry-data-with-swhid.xml
new file mode 100644
--- /dev/null
+++ b/swh/deposit/tests/data/atom/entry-data-with-swhid.xml
@@ -0,0 +1,13 @@
+<?xml version="1.0"?>
+<entry xmlns="http://www.w3.org/2005/Atom"
+ xmlns:codemeta="https://doi.org/10.5063/SCHEMA/CODEMETA-2.0"
+ xmlns:swh="https://www.softwareheritage.org/schema/2018/deposit">
+ <title>Awesome Compiler</title>
+ <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
+ <author>dudess</author>
+ <swh:deposit>
+ <swh:reference>
+ <swh:object swhid="{swhid}" />
+ </swh:reference>
+ </swh:deposit>
+</entry>
diff --git a/swh/deposit/tests/test_utils.py b/swh/deposit/tests/test_utils.py
--- a/swh/deposit/tests/test_utils.py
+++ b/swh/deposit/tests/test_utils.py
@@ -1,13 +1,16 @@
-# Copyright (C) 2018-2019 The Software Heritage developers
+# Copyright (C) 2018-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+from typing import Union
from unittest.mock import patch
import pytest
from swh.deposit import utils
+from swh.model.identifiers import SWHID, parse_swhid
+from swh.model.model import MetadataTargetType
def test_merge():
@@ -139,3 +142,59 @@
expected_date = "2017-01-01 00:00:00+00:00"
assert str(actual_date) == expected_date
+
+
+@pytest.mark.parametrize(
+ "swhid_or_origin,expected_type,expected_metadata_context",
+ [
+ ("https://something", MetadataTargetType.ORIGIN, {"origin": None}),
+ (
+ "swh:1:cnt:51b5c8cc985d190b5a7ef4878128ebfdc2358f49",
+ MetadataTargetType.CONTENT,
+ {"origin": None},
+ ),
+ (
+ "swh:1:snp:51b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=http://blah",
+ MetadataTargetType.SNAPSHOT,
+ {"origin": "http://blah", "path": None},
+ ),
+ (
+ "swh:1:dir:51b5c8cc985d190b5a7ef4878128ebfdc2358f49;path=/path",
+ MetadataTargetType.DIRECTORY,
+ {"origin": None, "path": b"/path"},
+ ),
+ (
+ "swh:1:rev:51b5c8cc985d190b5a7ef4878128ebfdc2358f49;visit=swh:1:snp:41b5c8cc985d190b5a7ef4878128ebfdc2358f49", # noqa
+ MetadataTargetType.REVISION,
+ {
+ "origin": None,
+ "path": None,
+ "snapshot": parse_swhid(
+ "swh:1:snp:41b5c8cc985d190b5a7ef4878128ebfdc2358f49"
+ ),
+ },
+ ),
+ (
+ "swh:1:rel:51b5c8cc985d190b5a7ef4878128ebfdc2358f49;anchor=swh:1:dir:41b5c8cc985d190b5a7ef4878128ebfdc2358f49", # noqa
+ MetadataTargetType.RELEASE,
+ {
+ "origin": None,
+ "path": None,
+ "directory": parse_swhid(
+ "swh:1:dir:41b5c8cc985d190b5a7ef4878128ebfdc2358f49"
+ ),
+ },
+ ),
+ ],
+)
+def test_compute_metadata_context(
+ swhid_or_origin: Union[str, SWHID], expected_type, expected_metadata_context
+):
+ if expected_type != MetadataTargetType.ORIGIN:
+ assert isinstance(swhid_or_origin, str)
+ swhid_or_origin = parse_swhid(swhid_or_origin)
+
+ object_type, metadata_context = utils.compute_metadata_context(swhid_or_origin)
+
+ assert object_type == expected_type
+ assert metadata_context == expected_metadata_context
diff --git a/swh/deposit/utils.py b/swh/deposit/utils.py
--- a/swh/deposit/utils.py
+++ b/swh/deposit/utils.py
@@ -1,13 +1,15 @@
-# Copyright (C) 2018-2019 The Software Heritage developers
+# Copyright (C) 2018-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from types import GeneratorType
+from typing import Any, Dict, Tuple, Union
import iso8601
-from swh.model.identifiers import normalize_timestamp
+from swh.model.identifiers import SWHID, normalize_timestamp, parse_swhid
+from swh.model.model import MetadataTargetType
def merge(*dicts):
@@ -81,3 +83,37 @@
date = iso8601.parse_date(date)
return normalize_timestamp(date)
+
+
+def compute_metadata_context(
+ swhid_reference: Union[SWHID, str]
+) -> Tuple[MetadataTargetType, Dict[str, Any]]:
+ """Given a SWHID object, determine the context as a dict.
+
+ The parse_swhid calls within are not expected to raise (because they should have
+ been caught early on).
+
+ """
+ metadata_context: Dict[str, Any] = {"origin": None}
+ if isinstance(swhid_reference, SWHID):
+ object_type = MetadataTargetType(swhid_reference.object_type)
+ assert object_type != MetadataTargetType.ORIGIN
+
+ if swhid_reference.metadata:
+ path = swhid_reference.metadata.get("path")
+ metadata_context = {
+ "origin": swhid_reference.metadata.get("origin"),
+ "path": path.encode() if path else None,
+ }
+ snapshot = swhid_reference.metadata.get("visit")
+ if snapshot:
+ metadata_context["snapshot"] = parse_swhid(snapshot)
+
+ anchor = swhid_reference.metadata.get("anchor")
+ if anchor:
+ anchor_swhid = parse_swhid(anchor)
+ metadata_context[anchor_swhid.object_type] = anchor_swhid
+ else:
+ object_type = MetadataTargetType.ORIGIN
+
+ return object_type, metadata_context
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Dec 21 2024, 8:30 PM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3229150
Attached To
D4475: Adapt existing POST to a collection to allow metadata-only deposit
Event Timeline
Log In to Comment