diff --git a/swh/indexer/metadata_dictionary/github.py b/swh/indexer/metadata_dictionary/github.py index 08e62d3..020c8d0 100644 --- a/swh/indexer/metadata_dictionary/github.py +++ b/swh/indexer/metadata_dictionary/github.py @@ -1,74 +1,130 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json from typing import Any, Dict, Tuple from swh.indexer.codemeta import ACTIVITYSTREAMS_URI, CROSSWALK_TABLE, FORGEFED_URI from .base import BaseExtrinsicMapping, JsonMapping, produce_terms def _prettyprint(d): print(json.dumps(d, indent=4)) class GitHubMapping(BaseExtrinsicMapping, JsonMapping): name = "github" mapping = CROSSWALK_TABLE["GitHub"] string_fields = [ "archive_url", "created_at", "updated_at", "description", "full_name", "html_url", "issues_url", ] @classmethod def extrinsic_metadata_formats(cls) -> Tuple[str, ...]: return ("application/vnd.github.v3+json",) def _translate_dict(self, content_dict: Dict[str, Any], **kwargs) -> Dict[str, Any]: d = super()._translate_dict(content_dict, **kwargs) d["type"] = FORGEFED_URI + "Repository" return d @produce_terms(FORGEFED_URI, ["forks"]) @produce_terms(ACTIVITYSTREAMS_URI, ["totalItems"]) def translate_forks_count( self, translated_metadata: Dict[str, Any], v: Any ) -> None: """ >>> translated_metadata = {} >>> GitHubMapping().translate_forks_count(translated_metadata, 42) >>> _prettyprint(translated_metadata) { "https://forgefed.org/ns#forks": [ { "@type": "https://www.w3.org/ns/activitystreams#OrderedCollection", "https://www.w3.org/ns/activitystreams#totalItems": 42 } ] } """ if isinstance(v, int): translated_metadata.setdefault(FORGEFED_URI + "forks", []).append( { "@type": ACTIVITYSTREAMS_URI + "OrderedCollection", ACTIVITYSTREAMS_URI + "totalItems": v, } ) + @produce_terms(ACTIVITYSTREAMS_URI, ["likes"]) + @produce_terms(ACTIVITYSTREAMS_URI, ["totalItems"]) + def translate_stargazers_count( + self, translated_metadata: Dict[str, Any], v: Any + ) -> None: + """ + + >>> translated_metadata = {} + >>> GitHubMapping().translate_stargazers_count(translated_metadata, 42) + >>> _prettyprint(translated_metadata) + { + "https://www.w3.org/ns/activitystreams#likes": [ + { + "@type": "https://www.w3.org/ns/activitystreams#Collection", + "https://www.w3.org/ns/activitystreams#totalItems": 42 + } + ] + } + """ + if isinstance(v, int): + translated_metadata.setdefault(ACTIVITYSTREAMS_URI + "likes", []).append( + { + "@type": ACTIVITYSTREAMS_URI + "Collection", + ACTIVITYSTREAMS_URI + "totalItems": v, + } + ) + + @produce_terms(ACTIVITYSTREAMS_URI, ["followers"]) + @produce_terms(ACTIVITYSTREAMS_URI, ["totalItems"]) + def translate_watchers_count( + self, translated_metadata: Dict[str, Any], v: Any + ) -> None: + """ + + >>> translated_metadata = {} + >>> GitHubMapping().translate_watchers_count(translated_metadata, 42) + >>> _prettyprint(translated_metadata) + { + "https://www.w3.org/ns/activitystreams#followers": [ + { + "@type": "https://www.w3.org/ns/activitystreams#Collection", + "https://www.w3.org/ns/activitystreams#totalItems": 42 + } + ] + } + """ + if isinstance(v, int): + translated_metadata.setdefault( + ACTIVITYSTREAMS_URI + "followers", [] + ).append( + { + "@type": ACTIVITYSTREAMS_URI + "Collection", + ACTIVITYSTREAMS_URI + "totalItems": v, + } + ) + def normalize_license(self, d): """ >>> GitHubMapping().normalize_license({'spdx_id': 'MIT'}) {'@id': 'https://spdx.org/licenses/MIT'} """ if isinstance(d, dict) and isinstance(d.get("spdx_id"), str): return {"@id": "https://spdx.org/licenses/" + d["spdx_id"]} diff --git a/swh/indexer/tests/metadata_dictionary/test_github.py b/swh/indexer/tests/metadata_dictionary/test_github.py index 0c00ac5..290d91c 100644 --- a/swh/indexer/tests/metadata_dictionary/test_github.py +++ b/swh/indexer/tests/metadata_dictionary/test_github.py @@ -1,134 +1,142 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.indexer.metadata_dictionary import MAPPINGS CONTEXT = [ "https://doi.org/10.5063/schema/codemeta-2.0", { "as": "https://www.w3.org/ns/activitystreams#", "forge": "https://forgefed.org/ns#", }, ] def test_compute_metadata_none(): """ testing content empty content is empty should return None """ content = b"" # None if no metadata was found or an error occurred declared_metadata = None result = MAPPINGS["GitHubMapping"]().translate(content) assert declared_metadata == result def test_supported_terms(): terms = MAPPINGS["GitHubMapping"].supported_terms() assert { "http://schema.org/name", "http://schema.org/license", "https://forgefed.org/ns#forks", "https://www.w3.org/ns/activitystreams#totalItems", } <= terms def test_compute_metadata_github(): """ testing only computation of metadata with hard_mapping_npm """ content = b""" { "id": 80521091, "node_id": "MDEwOlJlcG9zaXRvcnk4MDUyMTA5MQ==", "name": "swh-indexer", "full_name": "SoftwareHeritage/swh-indexer", "private": false, "owner": { "login": "SoftwareHeritage", "id": 18555939, "node_id": "MDEyOk9yZ2FuaXphdGlvbjE4NTU1OTM5", "avatar_url": "https://avatars.githubusercontent.com/u/18555939?v=4", "gravatar_id": "", "url": "https://api.github.com/users/SoftwareHeritage", "type": "Organization", "site_admin": false }, "html_url": "https://github.com/SoftwareHeritage/swh-indexer", "description": "GitHub mirror of Metadata indexer", "fork": false, "url": "https://api.github.com/repos/SoftwareHeritage/swh-indexer", "created_at": "2017-01-31T13:05:39Z", "updated_at": "2022-06-22T08:02:20Z", "pushed_at": "2022-06-29T09:01:08Z", "git_url": "git://github.com/SoftwareHeritage/swh-indexer.git", "ssh_url": "git@github.com:SoftwareHeritage/swh-indexer.git", "clone_url": "https://github.com/SoftwareHeritage/swh-indexer.git", "svn_url": "https://github.com/SoftwareHeritage/swh-indexer", "homepage": "https://forge.softwareheritage.org/source/swh-indexer/", "size": 2713, "stargazers_count": 13, - "watchers_count": 13, + "watchers_count": 12, "language": "Python", "has_issues": false, "has_projects": false, "has_downloads": true, "has_wiki": false, "has_pages": false, "forks_count": 1, "mirror_url": null, "archived": false, "disabled": false, "open_issues_count": 0, "license": { "key": "gpl-3.0", "name": "GNU General Public License v3.0", "spdx_id": "GPL-3.0", "url": "https://api.github.com/licenses/gpl-3.0", "node_id": "MDc6TGljZW5zZTk=" }, "allow_forking": true, "is_template": false, "web_commit_signoff_required": false, "topics": [ ], "visibility": "public", "forks": 1, "open_issues": 0, "watchers": 13, "default_branch": "master", "temp_clone_token": null, "organization": { "login": "SoftwareHeritage", "id": 18555939, "node_id": "MDEyOk9yZ2FuaXphdGlvbjE4NTU1OTM5", "avatar_url": "https://avatars.githubusercontent.com/u/18555939?v=4", "gravatar_id": "", "type": "Organization", "site_admin": false }, "network_count": 1, "subscribers_count": 6 } """ result = MAPPINGS["GitHubMapping"]().translate(content) assert result == { "@context": CONTEXT, "type": "https://forgefed.org/ns#Repository", "forge:forks": { "as:totalItems": 1, "type": "as:OrderedCollection", }, + "as:likes": { + "as:totalItems": 13, + "type": "as:Collection", + }, + "as:followers": { + "as:totalItems": 12, + "type": "as:Collection", + }, "license": "https://spdx.org/licenses/GPL-3.0", "name": "SoftwareHeritage/swh-indexer", "description": "GitHub mirror of Metadata indexer", "schema:codeRepository": "https://github.com/SoftwareHeritage/swh-indexer", "schema:dateCreated": "2017-01-31T13:05:39Z", "schema:dateModified": "2022-06-22T08:02:20Z", }