diff --git a/swh/indexer/codemeta.py b/swh/indexer/codemeta.py --- a/swh/indexer/codemeta.py +++ b/swh/indexer/codemeta.py @@ -9,6 +9,7 @@ import json import os.path import re +from typing import Any, List from pyld import jsonld @@ -121,11 +122,18 @@ raise Exception(url) -def compact(doc): - """Same as `pyld.jsonld.compact`, but in the context of CodeMeta.""" - return jsonld.compact( - doc, CODEMETA_CONTEXT_URL, options={"documentLoader": _document_loader} - ) +def compact(doc, forgefed: bool): + """Same as `pyld.jsonld.compact`, but in the context of CodeMeta. + + Args: + forgefed: Whether to add ForgeFed and ActivityStreams as compact URIs. + This is typically used for extrinsic metadata documents, which frequently + use properties from these namespaces. + """ + contexts: List[Any] = [CODEMETA_CONTEXT_URL] + if forgefed: + contexts.append({"as": ACTIVITYSTREAMS_URI, "forge": FORGEFED_URI}) + return jsonld.compact(doc, contexts, options={"documentLoader": _document_loader}) def expand(doc): @@ -203,4 +211,7 @@ elif value not in merged_document[key]: merged_document[key].append(value) - return compact(merged_document) + # XXX: we should set forgefed=True when merging extrinsic-metadata documents. + # however, this function is only used to merge multiple files of the same + # directory (which is only for intrinsic-metadata), so it is not an issue for now + return compact(merged_document, forgefed=False) diff --git a/swh/indexer/metadata_dictionary/base.py b/swh/indexer/metadata_dictionary/base.py --- a/swh/indexer/metadata_dictionary/base.py +++ b/swh/indexer/metadata_dictionary/base.py @@ -62,7 +62,7 @@ raise NotImplementedError(f"{self.__class__.__name__}.translate") def normalize_translation(self, metadata: Dict[str, Any]) -> Dict[str, Any]: - return compact(metadata) + raise NotImplementedError(f"{self.__class__.__name__}.normalize_translation") class BaseExtrinsicMapping(BaseMapping): @@ -82,6 +82,9 @@ """ raise NotImplementedError(f"{cls.__name__}.extrinsic_metadata_formats") + def normalize_translation(self, metadata: Dict[str, Any]) -> Dict[str, Any]: + return compact(metadata, forgefed=True) + class BaseIntrinsicMapping(BaseMapping): """Base class for intrinsic-metadata mappings to inherit from @@ -99,6 +102,9 @@ """ raise NotImplementedError(f"{cls.__name__}.detect_metadata_files") + def normalize_translation(self, metadata: Dict[str, Any]) -> Dict[str, Any]: + return compact(metadata, forgefed=False) + class SingleFileIntrinsicMapping(BaseIntrinsicMapping): """Base class for all intrinsic metadata mappings that use a single file as input.""" diff --git a/swh/indexer/tests/metadata_dictionary/test_github.py b/swh/indexer/tests/metadata_dictionary/test_github.py --- a/swh/indexer/tests/metadata_dictionary/test_github.py +++ b/swh/indexer/tests/metadata_dictionary/test_github.py @@ -5,6 +5,14 @@ from swh.indexer.metadata_dictionary import MAPPINGS +CONTEXT = [ + "https://doi.org/10.5063/schema/codemeta-2.0", + { + "as": "https://www.w3.org/ns/activitystreams#", + "forge": "https://forgefed.org/ns#", + }, +] + def test_compute_metadata_none(): """ @@ -111,11 +119,11 @@ """ result = MAPPINGS["GitHubMapping"]().translate(content) assert result == { - "@context": "https://doi.org/10.5063/schema/codemeta-2.0", + "@context": CONTEXT, "type": "https://forgefed.org/ns#Repository", - "https://forgefed.org/ns#forks": { - "https://www.w3.org/ns/activitystreams#totalItems": 1, - "type": "https://www.w3.org/ns/activitystreams#OrderedCollection", + "forge:forks": { + "as:totalItems": 1, + "type": "as:OrderedCollection", }, "license": "https://spdx.org/licenses/GPL-3.0", "name": "SoftwareHeritage/swh-indexer",