diff --git a/swh/deposit/api/common.py b/swh/deposit/api/common.py --- a/swh/deposit/api/common.py +++ b/swh/deposit/api/common.py @@ -9,6 +9,7 @@ import json from typing import Any, Dict, Optional, Sequence, Tuple, Type, Union +import attr from django.http import FileResponse, HttpResponse from django.shortcuts import render from django.urls import reverse @@ -19,7 +20,18 @@ from rest_framework.request import Request from rest_framework.views import APIView +from swh.deposit.api.checks import check_metadata +from swh.deposit.api.converters import convert_status_detail +from swh.deposit.models import Deposit +from swh.deposit.utils import compute_metadata_context from swh.model import hashutil +from swh.model.identifiers import SWHID, ValidationError +from swh.model.model import ( + MetadataAuthority, + MetadataAuthorityType, + MetadataFetcher, + RawExtrinsicMetadata, +) from swh.scheduler.utils import create_oneshot_task_dict from ..config import ( @@ -52,8 +64,8 @@ make_error_response, make_error_response_from_dict, ) -from ..models import Deposit, DepositClient, DepositCollection, DepositRequest -from ..parsers import parse_xml +from ..models import DepositClient, DepositCollection, DepositRequest +from ..parsers import parse_swh_reference, parse_xml ACCEPT_PACKAGINGS = ["http://purl.org/net/sword/package/SimpleZip"] ACCEPT_ARCHIVE_CONTENT_TYPES = ["application/zip", "application/x-tar"] @@ -603,6 +615,122 @@ "status": deposit.status, } + def _store_metadata_deposit( + self, + deposit: Deposit, + swhid_reference: Union[str, SWHID], + metadata: Dict, + raw_metadata: bytes, + with_deposit_origin: bool = False, + ) -> Dict: + """In the nominal scenario, this associates the raw_metadata to the swhid_reference in + the raw extrinsic metadata storage. This also updates locally the deposit + references (in the db). In case of any issues, a bad request is returned to + the user with the details. + + Checks: + - The metadata received should pass the functional metadata checks. + - The SWHID is technically valid + + Args: + deposit: Deposit reference + swhid_reference: The swhid or the origin to attach metadata information to + metadata: Full dict of metadata to check for validity (parsed out of + raw_metadata) + raw_metadata: The actual raw metadata to send in the storage metadata + with_deposit_origin: Flag to differentiate between metadata-only or metadata + update scenario. + + Returns: + Dict of information on the deposit + + """ + if not metadata: + return make_error_dict( + BAD_REQUEST, + "Empty body request is not supported", + "Atom entry deposit is supposed to send for metadata. " + "If the body is empty, there is no metadata.", + ) + + metadata_ok, error_details = check_metadata(metadata) + if not metadata_ok: + assert error_details, "Details should be set when a failure occurs" + return make_error_dict( + BAD_REQUEST, + "Functional metadata checks failure", + convert_status_detail(error_details), + ) + + metadata_authority = MetadataAuthority( + type=MetadataAuthorityType.DEPOSIT_CLIENT, + url=deposit.client.provider_url, + metadata={"name": deposit.client.last_name}, + ) + + metadata_fetcher = MetadataFetcher( + name=self.tool["name"], + version=self.tool["version"], + metadata=self.tool["configuration"], + ) + + # replace metadata within the deposit backend + deposit_request_data = { + METADATA_KEY: metadata, + RAW_METADATA_KEY: raw_metadata, + } + + # actually add the metadata to the completed deposit + deposit_request = self._deposit_request_put(deposit, deposit_request_data) + + object_type, metadata_context = compute_metadata_context(swhid_reference) + if with_deposit_origin: # metadata deposit update on completed deposit + metadata_context["origin"] = deposit.origin_url + + swhid_core: Union[str, SWHID] + if isinstance(swhid_reference, SWHID) and swhid_reference.metadata: + swhid_core = attr.evolve(swhid_reference, metadata={}) + else: + swhid_core = swhid_reference + + # store that metadata to the metadata storage + metadata_object = RawExtrinsicMetadata( + type=object_type, + target=swhid_core, # core swhid or origin + discovery_date=deposit_request.date, + authority=metadata_authority, + fetcher=metadata_fetcher, + format="sword-v2-atom-codemeta", + metadata=raw_metadata, + **metadata_context, + ) + + # write to metadata storage + self.storage_metadata.metadata_authority_add([metadata_authority]) + self.storage_metadata.metadata_fetcher_add([metadata_fetcher]) + self.storage_metadata.raw_extrinsic_metadata_add([metadata_object]) + + # Ensure the deposit is finalized on metadata-only deposit scenario + if ( + deposit.status != DEPOSIT_STATUS_LOAD_SUCCESS + and not deposit.swhid + and not deposit.swhid_context + ): + deposit.status = DEPOSIT_STATUS_LOAD_SUCCESS + if isinstance(swhid_reference, SWHID): + deposit.swhid = str(swhid_core) + deposit.swhid_context = str(swhid_reference) + deposit.complete_date = deposit_request.date + deposit.reception_date = deposit_request.date + deposit.save() + + return { + "deposit_id": deposit.id, + "deposit_date": deposit_request.date, + "status": deposit.status, + "archive": None, + } + def _atom_entry( self, request: Request, @@ -662,11 +790,13 @@ "If the body is empty, there is no metadata.", ) - external_id = metadata.get("external_identifier", headers["slug"]) + # Determine if we are in the metadata-only deposit case + try: + swhid = parse_swh_reference(metadata) + except ValidationError as e: + return make_error_dict(PARSING_ERROR, "Invalid SWHID reference", str(e),) - # TODO: Determine if we are in the metadata-only deposit case. If it is, then - # save deposit and deposit request typed 'metadata' and send metadata to the - # metadata storage. Otherwise, do as existing deposit. + external_id = metadata.get("external_identifier", headers["slug"]) deposit = self._deposit_put( request, @@ -675,6 +805,9 @@ external_id=external_id, ) + if swhid is not None: + return self._store_metadata_deposit(deposit, swhid, metadata, raw_metadata) + self._deposit_request_put( deposit, {METADATA_KEY: metadata, RAW_METADATA_KEY: raw_metadata}, diff --git a/swh/deposit/api/deposit_update.py b/swh/deposit/api/deposit_update.py --- a/swh/deposit/api/deposit_update.py +++ b/swh/deposit/api/deposit_update.py @@ -8,28 +8,10 @@ from rest_framework import status from rest_framework.request import Request -from swh.deposit.api.checks import check_metadata -from swh.deposit.api.converters import convert_status_detail from swh.deposit.models import Deposit from swh.model.identifiers import parse_swhid -from swh.model.model import ( - MetadataAuthority, - MetadataAuthorityType, - MetadataFetcher, - MetadataTargetType, - RawExtrinsicMetadata, -) -from swh.storage import get_storage -from swh.storage.interface import StorageInterface - -from ..config import ( - CONT_FILE_IRI, - DEPOSIT_STATUS_LOAD_SUCCESS, - EDIT_SE_IRI, - EM_IRI, - METADATA_KEY, - RAW_METADATA_KEY, -) + +from ..config import CONT_FILE_IRI, DEPOSIT_STATUS_LOAD_SUCCESS, EDIT_SE_IRI, EM_IRI from ..errors import BAD_REQUEST, ParserError, make_error_dict from ..parsers import ( SWHAtomEntryParser, @@ -125,12 +107,6 @@ parser_classes = (SWHMultiPartParser, SWHAtomEntryParser) - def __init__(self): - super().__init__() - self.storage_metadata: StorageInterface = get_storage( - **self.config["storage_metadata"] - ) - def restrict_access( self, request: Request, headers: Dict, deposit: Deposit ) -> Dict[str, Any]: @@ -221,69 +197,14 @@ "Please ensure your metadata file is correctly formatted.", ) - if not metadata: - return make_error_dict( - BAD_REQUEST, - "Empty body request is not supported", - "Atom entry deposit is supposed to send for metadata. " - "If the body is empty, there is no metadata.", - ) - - metadata_ok, error_details = check_metadata(metadata) - if not metadata_ok: - assert error_details, "Details should be set when a failure occurs" - return make_error_dict( - BAD_REQUEST, - "Functional metadata checks failure", - convert_status_detail(error_details), - ) - - metadata_authority = MetadataAuthority( - type=MetadataAuthorityType.DEPOSIT_CLIENT, - url=deposit.client.provider_url, - metadata={"name": deposit.client.last_name}, + return self._store_metadata_deposit( + deposit, + parse_swhid(swhid), + metadata, + raw_metadata, + with_deposit_origin=True, ) - metadata_fetcher = MetadataFetcher( - name=self.tool["name"], - version=self.tool["version"], - metadata=self.tool["configuration"], - ) - - deposit_swhid = parse_swhid(swhid) - - # replace metadata within the deposit backend - deposit_request_data = { - METADATA_KEY: metadata, - RAW_METADATA_KEY: raw_metadata, - } - - # actually add the metadata to the completed deposit - deposit_request = self._deposit_request_put(deposit, deposit_request_data) - # store that metadata to the metadata storage - metadata_object = RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, - target=deposit_swhid, - discovery_date=deposit_request.date, - authority=metadata_authority, - fetcher=metadata_fetcher, - format="sword-v2-atom-codemeta", - metadata=raw_metadata, - origin=deposit.origin_url, - ) - - # write to metadata storage - self.storage_metadata.metadata_authority_add([metadata_authority]) - self.storage_metadata.metadata_fetcher_add([metadata_fetcher]) - self.storage_metadata.raw_extrinsic_metadata_add([metadata_object]) - - return { - "deposit_id": deposit_id, - "deposit_date": deposit_request.date, - "status": deposit.status, - "archive": None, - } - def process_post( self, request, diff --git a/swh/deposit/config.py b/swh/deposit/config.py --- a/swh/deposit/config.py +++ b/swh/deposit/config.py @@ -10,6 +10,8 @@ from swh.deposit import __version__ from swh.scheduler import get_scheduler from swh.scheduler.interface import SchedulerInterface +from swh.storage import get_storage +from swh.storage.interface import StorageInterface # IRIs (Internationalized Resource identifier) sword 2.0 specified EDIT_SE_IRI = "edit_se_iri" @@ -101,3 +103,6 @@ "version": __version__, "configuration": {"sword_version": "2"}, } + self.storage_metadata: StorageInterface = get_storage( + **self.config["storage_metadata"] + ) diff --git a/swh/deposit/tests/api/test_deposit_metadata.py b/swh/deposit/tests/api/test_deposit_metadata.py new file mode 100644 --- /dev/null +++ b/swh/deposit/tests/api/test_deposit_metadata.py @@ -0,0 +1,256 @@ +# Copyright (C) 2020 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from io import BytesIO + +import attr +from django.urls import reverse +import pytest +from rest_framework import status + +from swh.deposit.config import COL_IRI, DEPOSIT_STATUS_LOAD_SUCCESS, APIConfig +from swh.deposit.models import Deposit +from swh.deposit.parsers import parse_xml +from swh.deposit.utils import compute_metadata_context +from swh.model.identifiers import SWHID, parse_swhid +from swh.model.model import ( + MetadataAuthority, + MetadataAuthorityType, + MetadataFetcher, + MetadataTargetType, + RawExtrinsicMetadata, +) +from swh.storage.interface import PagedResult + + +def test_deposit_metadata_invalid( + authenticated_client, deposit_collection, atom_dataset +): + """Posting an invalid swhid reference is propagated to clients + + """ + invalid_swhid = "swh:1:dir :31b5c8cc985d190b5a7ef4878128ebfdc2358f49" + xml_data = atom_dataset["entry-data-with-swhid"].format(swhid=invalid_swhid) + + response = authenticated_client.post( + reverse(COL_IRI, args=[deposit_collection.name]), + content_type="application/atom+xml;type=entry", + data=xml_data, + HTTP_SLUG="external-id", + ) + assert response.status_code == status.HTTP_400_BAD_REQUEST + assert b"Invalid SWHID reference" in response.content + + +@pytest.mark.parametrize( + "swhid,target_type", + [ + ( + "swh:1:cnt:01b5c8cc985d190b5a7ef4878128ebfdc2358f49", + MetadataTargetType.CONTENT, + ), + ( + "swh:1:dir:11b5c8cc985d190b5a7ef4878128ebfdc2358f49", + MetadataTargetType.DIRECTORY, + ), + ( + "swh:1:rev:21b5c8cc985d190b5a7ef4878128ebfdc2358f49", + MetadataTargetType.REVISION, + ), + ( + "swh:1:rel:31b5c8cc985d190b5a7ef4878128ebfdc2358f49", + MetadataTargetType.RELEASE, + ), + ( + "swh:1:snp:41b5c8cc985d190b5a7ef4878128ebfdc2358f49", + MetadataTargetType.SNAPSHOT, + ), + ( + "swh:1:cnt:51b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo", + MetadataTargetType.CONTENT, + ), + ( + "swh:1:dir:c4993c872593e960dc84e4430dbbfbc34fd706d0;origin=https://inria.halpreprod.archives-ouvertes.fr/hal-01243573;visit=swh:1:snp:0175049fc45055a3824a1675ac06e3711619a55a;anchor=swh:1:rev:b5f505b005435fa5c4fa4c279792bd7b17167c04;path=/", # noqa + MetadataTargetType.DIRECTORY, + ), + ( + "swh:1:rev:71b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo", + MetadataTargetType.REVISION, + ), + ( + "swh:1:rel:81b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo", + MetadataTargetType.RELEASE, + ), + ( + "swh:1:snp:91b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo", + MetadataTargetType.SNAPSHOT, + ), + ], +) +def test_deposit_metadata_swhid( + swhid, + target_type, + authenticated_client, + deposit_collection, + atom_dataset, + swh_storage, +): + """Posting a swhid reference is stored on raw extrinsic metadata storage + + """ + swhid_reference = parse_swhid(swhid) + swhid_core = attr.evolve(swhid_reference, metadata={}) + + xml_data = atom_dataset["entry-data-with-swhid"].format(swhid=swhid) + deposit_client = authenticated_client.deposit_client + + response = authenticated_client.post( + reverse(COL_IRI, args=[deposit_collection.name]), + content_type="application/atom+xml;type=entry", + data=xml_data, + HTTP_SLUG="external-id", + ) + + assert response.status_code == status.HTTP_201_CREATED + response_content = parse_xml(BytesIO(response.content)) + + # Ensure the deposit is finalized + deposit_id = int(response_content["deposit_id"]) + deposit = Deposit.objects.get(pk=deposit_id) + assert isinstance(swhid_core, SWHID) + assert deposit.swhid == str(swhid_core) + assert deposit.swhid_context == str(swhid_reference) + assert deposit.complete_date == deposit.reception_date + assert deposit.complete_date is not None + assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS + + # Ensure metadata stored in the metadata storage is consistent + metadata_authority = MetadataAuthority( + type=MetadataAuthorityType.DEPOSIT_CLIENT, + url=deposit_client.provider_url, + metadata={"name": deposit_client.last_name}, + ) + + actual_authority = swh_storage.metadata_authority_get( + MetadataAuthorityType.DEPOSIT_CLIENT, url=deposit_client.provider_url + ) + assert actual_authority == metadata_authority + + config = APIConfig() + metadata_fetcher = MetadataFetcher( + name=config.tool["name"], + version=config.tool["version"], + metadata=config.tool["configuration"], + ) + + actual_fetcher = swh_storage.metadata_fetcher_get( + config.tool["name"], config.tool["version"] + ) + assert actual_fetcher == metadata_fetcher + + page_results = swh_storage.raw_extrinsic_metadata_get( + target_type, swhid_core, metadata_authority + ) + discovery_date = page_results.results[0].discovery_date + + assert len(page_results.results) == 1 + assert page_results.next_page_token is None + + object_type, metadata_context = compute_metadata_context(swhid_reference) + assert page_results == PagedResult( + results=[ + RawExtrinsicMetadata( + type=object_type, + target=swhid_core, + discovery_date=discovery_date, + authority=attr.evolve(metadata_authority, metadata=None), + fetcher=attr.evolve(metadata_fetcher, metadata=None), + format="sword-v2-atom-codemeta", + metadata=xml_data.encode(), + **metadata_context, + ) + ], + next_page_token=None, + ) + assert deposit.complete_date == discovery_date + + +@pytest.mark.parametrize( + "url", ["https://gitlab.org/user/repo", "https://whatever.else/repo",] +) +def test_deposit_metadata_origin( + url, authenticated_client, deposit_collection, atom_dataset, swh_storage, +): + """Posting a swhid reference is stored on raw extrinsic metadata storage + + """ + xml_data = atom_dataset["entry-data-with-origin"].format(url=url) + deposit_client = authenticated_client.deposit_client + response = authenticated_client.post( + reverse(COL_IRI, args=[deposit_collection.name]), + content_type="application/atom+xml;type=entry", + data=xml_data, + HTTP_SLUG="external-id", + ) + + assert response.status_code == status.HTTP_201_CREATED + response_content = parse_xml(BytesIO(response.content)) + # Ensure the deposit is finalized + deposit_id = int(response_content["deposit_id"]) + deposit = Deposit.objects.get(pk=deposit_id) + # we got not swhid as input so we cannot have those + assert deposit.swhid is None + assert deposit.swhid_context is None + assert deposit.complete_date == deposit.reception_date + assert deposit.complete_date is not None + assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS + + # Ensure metadata stored in the metadata storage is consistent + metadata_authority = MetadataAuthority( + type=MetadataAuthorityType.DEPOSIT_CLIENT, + url=deposit_client.provider_url, + metadata={"name": deposit_client.last_name}, + ) + + actual_authority = swh_storage.metadata_authority_get( + MetadataAuthorityType.DEPOSIT_CLIENT, url=deposit_client.provider_url + ) + assert actual_authority == metadata_authority + + config = APIConfig() + metadata_fetcher = MetadataFetcher( + name=config.tool["name"], + version=config.tool["version"], + metadata=config.tool["configuration"], + ) + + actual_fetcher = swh_storage.metadata_fetcher_get( + config.tool["name"], config.tool["version"] + ) + assert actual_fetcher == metadata_fetcher + + page_results = swh_storage.raw_extrinsic_metadata_get( + MetadataTargetType.ORIGIN, url, metadata_authority + ) + discovery_date = page_results.results[0].discovery_date + + assert len(page_results.results) == 1 + assert page_results.next_page_token is None + + assert page_results == PagedResult( + results=[ + RawExtrinsicMetadata( + type=MetadataTargetType.ORIGIN, + target=url, + discovery_date=discovery_date, + authority=attr.evolve(metadata_authority, metadata=None), + fetcher=attr.evolve(metadata_fetcher, metadata=None), + format="sword-v2-atom-codemeta", + metadata=xml_data.encode(), + ) + ], + next_page_token=None, + ) + assert deposit.complete_date == discovery_date diff --git a/swh/deposit/tests/api/test_parsers.py b/swh/deposit/tests/api/test_parsers.py --- a/swh/deposit/tests/api/test_parsers.py +++ b/swh/deposit/tests/api/test_parsers.py @@ -187,19 +187,8 @@ @pytest.fixture -def xml_with_swhid(): - xml_data = """ - - - - - - - - """ - return xml_data.strip() +def xml_with_swhid(atom_dataset): + return atom_dataset["entry-data-with-swhid"] @pytest.mark.parametrize( diff --git a/swh/deposit/tests/conftest.py b/swh/deposit/tests/conftest.py --- a/swh/deposit/tests/conftest.py +++ b/swh/deposit/tests/conftest.py @@ -204,15 +204,19 @@ return APIClient() # <- drf's client -@pytest.yield_fixture +@pytest.fixture def authenticated_client(client, deposit_user): """Returned a logged client + This also patched the client instance to keep a reference on the associated + deposit_user. + """ _token = "%s:%s" % (deposit_user.username, TEST_USER["password"]) token = base64.b64encode(_token.encode("utf-8")) authorization = "Basic %s" % token.decode("utf-8") client.credentials(HTTP_AUTHORIZATION=authorization) + client.deposit_client = deposit_user yield client client.logout() diff --git a/swh/deposit/tests/data/atom/entry-data-with-origin.xml b/swh/deposit/tests/data/atom/entry-data-with-origin.xml new file mode 100644 --- /dev/null +++ b/swh/deposit/tests/data/atom/entry-data-with-origin.xml @@ -0,0 +1,13 @@ + + + Awesome Compiler + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + dudess + + + + + + diff --git a/swh/deposit/tests/data/atom/entry-data-with-swhid.xml b/swh/deposit/tests/data/atom/entry-data-with-swhid.xml new file mode 100644 --- /dev/null +++ b/swh/deposit/tests/data/atom/entry-data-with-swhid.xml @@ -0,0 +1,13 @@ + + + Awesome Compiler + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + dudess + + + + + + diff --git a/swh/deposit/tests/test_utils.py b/swh/deposit/tests/test_utils.py --- a/swh/deposit/tests/test_utils.py +++ b/swh/deposit/tests/test_utils.py @@ -1,13 +1,16 @@ -# Copyright (C) 2018-2019 The Software Heritage developers +# Copyright (C) 2018-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +from typing import Union from unittest.mock import patch import pytest from swh.deposit import utils +from swh.model.identifiers import SWHID, parse_swhid +from swh.model.model import MetadataTargetType def test_merge(): @@ -139,3 +142,59 @@ expected_date = "2017-01-01 00:00:00+00:00" assert str(actual_date) == expected_date + + +@pytest.mark.parametrize( + "swhid_or_origin,expected_type,expected_metadata_context", + [ + ("https://something", MetadataTargetType.ORIGIN, {"origin": None}), + ( + "swh:1:cnt:51b5c8cc985d190b5a7ef4878128ebfdc2358f49", + MetadataTargetType.CONTENT, + {"origin": None}, + ), + ( + "swh:1:snp:51b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=http://blah", + MetadataTargetType.SNAPSHOT, + {"origin": "http://blah", "path": None}, + ), + ( + "swh:1:dir:51b5c8cc985d190b5a7ef4878128ebfdc2358f49;path=/path", + MetadataTargetType.DIRECTORY, + {"origin": None, "path": b"/path"}, + ), + ( + "swh:1:rev:51b5c8cc985d190b5a7ef4878128ebfdc2358f49;visit=swh:1:snp:41b5c8cc985d190b5a7ef4878128ebfdc2358f49", # noqa + MetadataTargetType.REVISION, + { + "origin": None, + "path": None, + "snapshot": parse_swhid( + "swh:1:snp:41b5c8cc985d190b5a7ef4878128ebfdc2358f49" + ), + }, + ), + ( + "swh:1:rel:51b5c8cc985d190b5a7ef4878128ebfdc2358f49;anchor=swh:1:dir:41b5c8cc985d190b5a7ef4878128ebfdc2358f49", # noqa + MetadataTargetType.RELEASE, + { + "origin": None, + "path": None, + "directory": parse_swhid( + "swh:1:dir:41b5c8cc985d190b5a7ef4878128ebfdc2358f49" + ), + }, + ), + ], +) +def test_compute_metadata_context( + swhid_or_origin: Union[str, SWHID], expected_type, expected_metadata_context +): + if expected_type != MetadataTargetType.ORIGIN: + assert isinstance(swhid_or_origin, str) + swhid_or_origin = parse_swhid(swhid_or_origin) + + object_type, metadata_context = utils.compute_metadata_context(swhid_or_origin) + + assert object_type == expected_type + assert metadata_context == expected_metadata_context diff --git a/swh/deposit/utils.py b/swh/deposit/utils.py --- a/swh/deposit/utils.py +++ b/swh/deposit/utils.py @@ -1,13 +1,15 @@ -# Copyright (C) 2018-2019 The Software Heritage developers +# Copyright (C) 2018-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from types import GeneratorType +from typing import Any, Dict, Tuple, Union import iso8601 -from swh.model.identifiers import normalize_timestamp +from swh.model.identifiers import SWHID, normalize_timestamp, parse_swhid +from swh.model.model import MetadataTargetType def merge(*dicts): @@ -81,3 +83,37 @@ date = iso8601.parse_date(date) return normalize_timestamp(date) + + +def compute_metadata_context( + swhid_reference: Union[SWHID, str] +) -> Tuple[MetadataTargetType, Dict[str, Any]]: + """Given a SWHID object, determine the context as a dict. + + The parse_swhid calls within are not expected to raise (because they should have + been caught early on). + + """ + metadata_context: Dict[str, Any] = {"origin": None} + if isinstance(swhid_reference, SWHID): + object_type = MetadataTargetType(swhid_reference.object_type) + assert object_type != MetadataTargetType.ORIGIN + + if swhid_reference.metadata: + path = swhid_reference.metadata.get("path") + metadata_context = { + "origin": swhid_reference.metadata.get("origin"), + "path": path.encode() if path else None, + } + snapshot = swhid_reference.metadata.get("visit") + if snapshot: + metadata_context["snapshot"] = parse_swhid(snapshot) + + anchor = swhid_reference.metadata.get("anchor") + if anchor: + anchor_swhid = parse_swhid(anchor) + metadata_context[anchor_swhid.object_type] = anchor_swhid + else: + object_type = MetadataTargetType.ORIGIN + + return object_type, metadata_context