diff --git a/swh/indexer/metadata_dictionary/github.py b/swh/indexer/metadata_dictionary/github.py index 2df751b..6bebbd5 100644 --- a/swh/indexer/metadata_dictionary/github.py +++ b/swh/indexer/metadata_dictionary/github.py @@ -1,73 +1,78 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json from typing import Any, Dict, List, Tuple -from swh.indexer.codemeta import ACTIVITYSTREAMS_URI, FORGEFED_URI, SCHEMA_URI +from swh.indexer.codemeta import ACTIVITYSTREAMS_URI, CROSSWALK_TABLE, FORGEFED_URI from swh.indexer.storage.interface import Sha1 from .base import DirectoryLsEntry, JsonMapping, produce_terms def _prettyprint(d): print(json.dumps(d, indent=4)) class GitHubMapping(JsonMapping): name = "github" - mapping = { - "name": SCHEMA_URI + "name", - "license": SCHEMA_URI + "license", - } - string_fields = ["name"] + mapping = CROSSWALK_TABLE["GitHub"] + string_fields = [ + "archive_url", + "created_at", + "updated_at", + "description", + "full_name", + "html_url", + "issues_url", + ] @classmethod def detect_metadata_files(cls, file_entries: List[DirectoryLsEntry]) -> List[Sha1]: return [] @classmethod def extrinsic_metadata_formats(cls) -> Tuple[str, ...]: return ("application/vnd.github.v3+json",) def _translate_dict(self, content_dict: Dict[str, Any], **kwargs) -> Dict[str, Any]: d = super()._translate_dict(content_dict, **kwargs) d["type"] = FORGEFED_URI + "Repository" return d @produce_terms(FORGEFED_URI, ["forks"]) @produce_terms(ACTIVITYSTREAMS_URI, ["totalItems"]) def translate_forks_count( self, translated_metadata: Dict[str, Any], v: Any ) -> None: """ >>> translated_metadata = {} >>> GitHubMapping().translate_forks_count(translated_metadata, 42) >>> _prettyprint(translated_metadata) { "https://forgefed.org/ns#forks": [ { "@type": "https://www.w3.org/ns/activitystreams#OrderedCollection", "https://www.w3.org/ns/activitystreams#totalItems": 42 } ] } """ if isinstance(v, int): translated_metadata.setdefault(FORGEFED_URI + "forks", []).append( { "@type": ACTIVITYSTREAMS_URI + "OrderedCollection", ACTIVITYSTREAMS_URI + "totalItems": v, } ) def normalize_license(self, d): """ >>> GitHubMapping().normalize_license({'spdx_id': 'MIT'}) {'@id': 'https://spdx.org/licenses/MIT'} """ if isinstance(d, dict) and isinstance(d.get("spdx_id"), str): return {"@id": "https://spdx.org/licenses/" + d["spdx_id"]} diff --git a/swh/indexer/tests/metadata_dictionary/test_github.py b/swh/indexer/tests/metadata_dictionary/test_github.py index d6739ec..f5ec615 100644 --- a/swh/indexer/tests/metadata_dictionary/test_github.py +++ b/swh/indexer/tests/metadata_dictionary/test_github.py @@ -1,122 +1,126 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.indexer.metadata_dictionary import MAPPINGS def test_compute_metadata_none(): """ testing content empty content is empty should return None """ content = b"" # None if no metadata was found or an error occurred declared_metadata = None result = MAPPINGS["GitHubMapping"]().translate(content) assert declared_metadata == result def test_supported_terms(): terms = MAPPINGS["GitHubMapping"].supported_terms() assert { "http://schema.org/name", "http://schema.org/license", "https://forgefed.org/ns#forks", "https://www.w3.org/ns/activitystreams#totalItems", } <= terms def test_compute_metadata_github(): """ testing only computation of metadata with hard_mapping_npm """ content = b""" { "id": 80521091, "node_id": "MDEwOlJlcG9zaXRvcnk4MDUyMTA5MQ==", "name": "swh-indexer", "full_name": "SoftwareHeritage/swh-indexer", "private": false, "owner": { "login": "SoftwareHeritage", "id": 18555939, "node_id": "MDEyOk9yZ2FuaXphdGlvbjE4NTU1OTM5", "avatar_url": "https://avatars.githubusercontent.com/u/18555939?v=4", "gravatar_id": "", "url": "https://api.github.com/users/SoftwareHeritage", "type": "Organization", "site_admin": false }, "html_url": "https://github.com/SoftwareHeritage/swh-indexer", "description": "GitHub mirror of Metadata indexer", "fork": false, "url": "https://api.github.com/repos/SoftwareHeritage/swh-indexer", "created_at": "2017-01-31T13:05:39Z", "updated_at": "2022-06-22T08:02:20Z", "pushed_at": "2022-06-29T09:01:08Z", "git_url": "git://github.com/SoftwareHeritage/swh-indexer.git", "ssh_url": "git@github.com:SoftwareHeritage/swh-indexer.git", "clone_url": "https://github.com/SoftwareHeritage/swh-indexer.git", "svn_url": "https://github.com/SoftwareHeritage/swh-indexer", "homepage": "https://forge.softwareheritage.org/source/swh-indexer/", "size": 2713, "stargazers_count": 13, "watchers_count": 13, "language": "Python", "has_issues": false, "has_projects": false, "has_downloads": true, "has_wiki": false, "has_pages": false, "forks_count": 1, "mirror_url": null, "archived": false, "disabled": false, "open_issues_count": 0, "license": { "key": "gpl-3.0", "name": "GNU General Public License v3.0", "spdx_id": "GPL-3.0", "url": "https://api.github.com/licenses/gpl-3.0", "node_id": "MDc6TGljZW5zZTk=" }, "allow_forking": true, "is_template": false, "web_commit_signoff_required": false, "topics": [ ], "visibility": "public", "forks": 1, "open_issues": 0, "watchers": 13, "default_branch": "master", "temp_clone_token": null, "organization": { "login": "SoftwareHeritage", "id": 18555939, "node_id": "MDEyOk9yZ2FuaXphdGlvbjE4NTU1OTM5", "avatar_url": "https://avatars.githubusercontent.com/u/18555939?v=4", "gravatar_id": "", "type": "Organization", "site_admin": false }, "network_count": 1, "subscribers_count": 6 } """ result = MAPPINGS["GitHubMapping"]().translate(content) assert result == { "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "type": "https://forgefed.org/ns#Repository", "https://forgefed.org/ns#forks": { "https://www.w3.org/ns/activitystreams#totalItems": 1, "type": "https://www.w3.org/ns/activitystreams#OrderedCollection", }, "license": "https://spdx.org/licenses/GPL-3.0", - "name": "swh-indexer", + "name": "SoftwareHeritage/swh-indexer", + "description": "GitHub mirror of Metadata indexer", + "schema:codeRepository": "https://github.com/SoftwareHeritage/swh-indexer", + "schema:dateCreated": "2017-01-31T13:05:39Z", + "schema:dateModified": "2022-06-22T08:02:20Z", }