diff --git a/swh/indexer/codemeta.py b/swh/indexer/codemeta.py --- a/swh/indexer/codemeta.py +++ b/swh/indexer/codemeta.py @@ -30,6 +30,8 @@ } CODEMETA_URI = "https://codemeta.github.io/terms/" SCHEMA_URI = "http://schema.org/" +FORGEFED_URI = "https://forgefed.org/ns#" +ACTIVITYSTREAMS_URI = "https://www.w3.org/ns/activitystreams#" PROPERTY_BLACKLIST = { diff --git a/swh/indexer/metadata_dictionary/base.py b/swh/indexer/metadata_dictionary/base.py --- a/swh/indexer/metadata_dictionary/base.py +++ b/swh/indexer/metadata_dictionary/base.py @@ -5,7 +5,7 @@ import json import logging -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, Callable, Dict, List, Optional, Tuple, TypeVar from typing_extensions import TypedDict @@ -20,6 +20,26 @@ type: str +TTranslateCallable = TypeVar( + "TTranslateCallable", bound=Callable[[Any, Dict[str, Any], Any], None] +) + + +def mark_productions( + namespace: str, terms: List[str] +) -> Callable[[TTranslateCallable], TTranslateCallable]: + """Returns a decorator that marks the decorated function as adding + the given terms to the ``translated_metadata`` dict""" + + def decorator(f: TTranslateCallable) -> TTranslateCallable: + if not hasattr(f, "produced_terms"): + f.produced_terms = [] # type: ignore + f.produced_terms.extend(namespace + term for term in terms) # type: ignore + return f + + return decorator + + class BaseMapping: """Base class for mappings to inherit from @@ -103,14 +123,24 @@ @classmethod def supported_terms(cls): - return { + # one-to-one mapping from the original key to a CodeMeta term + simple_terms = { term for (key, term) in cls.mapping.items() if key in cls.string_fields or hasattr(cls, "normalize_" + cls._normalize_method_name(key)) - or hasattr(cls, "translate_" + cls._normalize_method_name(key)) } + # more complex mapping from the original key to JSON-LD + complex_terms = { + term + for meth_name in dir(cls) + if meth_name.startswith("translate_") + for term in getattr(getattr(cls, meth_name), "produced_terms", []) + } + + return simple_terms | complex_terms + def _translate_dict( self, content_dict: Dict, *, normalize: bool = True ) -> Dict[str, str]: diff --git a/swh/indexer/metadata_dictionary/github.py b/swh/indexer/metadata_dictionary/github.py --- a/swh/indexer/metadata_dictionary/github.py +++ b/swh/indexer/metadata_dictionary/github.py @@ -3,12 +3,12 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json -from typing import List, Tuple +from typing import Any, Dict, List, Tuple -from swh.indexer.codemeta import SCHEMA_URI +from swh.indexer.codemeta import ACTIVITYSTREAMS_URI, FORGEFED_URI, SCHEMA_URI from swh.indexer.storage.interface import Sha1 -from .base import DirectoryLsEntry, JsonMapping +from .base import DirectoryLsEntry, JsonMapping, mark_productions def _prettyprint(d): @@ -31,6 +31,38 @@ def extrinsic_metadata_formats(cls) -> Tuple[str, ...]: return ("application/vnd.github.v3+json",) + def _translate_dict(self, content_dict: Dict[str, Any], **kwargs) -> Dict[str, Any]: + d = super()._translate_dict(content_dict, **kwargs) + d["type"] = FORGEFED_URI + "Repository" + return d + + @mark_productions(FORGEFED_URI, ["forks"]) + @mark_productions(ACTIVITYSTREAMS_URI, ["totalItems"]) + def translate_forks_count( + self, translated_metadata: Dict[str, Any], v: Any + ) -> None: + """ + + >>> translated_metadata = {} + >>> GitHubMapping().translate_forks_count(translated_metadata, 42) + >>> _prettyprint(translated_metadata) + { + "https://forgefed.org/ns#forks": [ + { + "@type": "https://www.w3.org/ns/activitystreams#OrderedCollection", + "https://www.w3.org/ns/activitystreams#totalItems": 42 + } + ] + } + """ + if isinstance(v, int): + translated_metadata.setdefault(FORGEFED_URI + "forks", []).append( + { + "@type": ACTIVITYSTREAMS_URI + "OrderedCollection", + ACTIVITYSTREAMS_URI + "totalItems": v, + } + ) + def normalize_license(self, d): """ diff --git a/swh/indexer/tests/metadata_dictionary/test_github.py b/swh/indexer/tests/metadata_dictionary/test_github.py --- a/swh/indexer/tests/metadata_dictionary/test_github.py +++ b/swh/indexer/tests/metadata_dictionary/test_github.py @@ -21,7 +21,12 @@ def test_supported_terms(): terms = MAPPINGS["GitHubMapping"].supported_terms() - assert {"http://schema.org/name", "http://schema.org/license"} <= terms + assert { + "http://schema.org/name", + "http://schema.org/license", + "https://forgefed.org/ns#forks", + "https://www.w3.org/ns/activitystreams#totalItems", + } <= terms def test_compute_metadata_github(): @@ -107,7 +112,11 @@ result = MAPPINGS["GitHubMapping"]().translate(content) assert result == { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", - "type": "SoftwareSourceCode", + "type": "https://forgefed.org/ns#Repository", + "https://forgefed.org/ns#forks": { + "https://www.w3.org/ns/activitystreams#totalItems": 1, + "type": "https://www.w3.org/ns/activitystreams#OrderedCollection", + }, "license": "https://spdx.org/licenses/GPL-3.0", "name": "swh-indexer", }