diff --git a/swh/loader/metadata/base.py b/swh/loader/metadata/base.py --- a/swh/loader/metadata/base.py +++ b/swh/loader/metadata/base.py @@ -171,6 +171,12 @@ return self._origin_metadata_objects + def get_parent_origins(self) -> List[Origin]: + """If the given origin is a "forge fork" (ie. created with the "Fork" button + of GitHub-like forges), returns a list of origins it was forked from; + closest parent first.""" + raise NotImplementedError(f"{self.__class__.__name__}.get_parent_origins") + if TYPE_CHECKING: # Makes mypy check BaseMetadataFetcher follows the MetadataFetcherProtocol diff --git a/swh/loader/metadata/github.py b/swh/loader/metadata/github.py --- a/swh/loader/metadata/github.py +++ b/swh/loader/metadata/github.py @@ -5,11 +5,13 @@ """Metadata fetcher for GitHub.""" +import json import re from typing import List, Optional, Tuple import urllib.parse from swh.lister.github.utils import GitHubSession +from swh.model.model import Origin from . import USER_AGENT from .base import BaseMetadataFetcher, InvalidOrigin @@ -67,3 +69,18 @@ # archiving verbatim, though. return [(METADATA_FORMAT, metadata_bytes)] + + def get_parent_origins(self) -> List[Origin]: + parents = [] + for metadata in self.get_origin_metadata(): + if metadata.format != METADATA_FORMAT: + continue + data = json.loads(metadata.metadata) + parent = data.get("parent") + source = data.get("source") + if parent is not None: + parents.append(Origin(url=parent["html_url"])) + if source is not None and source["html_url"] != parent["html_url"]: + parents.append(Origin(url=source["html_url"])) + + return parents diff --git a/swh/loader/metadata/tests/data/https_api.github.com/repos_jmarlena_linguist b/swh/loader/metadata/tests/data/https_api.github.com/repos_jmarlena_linguist new file mode 100644 --- /dev/null +++ b/swh/loader/metadata/tests/data/https_api.github.com/repos_jmarlena_linguist @@ -0,0 +1,324 @@ +{ + "id": 64780761, + "node_id": "MDEwOlJlcG9zaXRvcnk2NDc4MDc2MQ==", + "name": "linguist", + "full_name": "jmarlena/linguist", + "private": false, + "owner": { + "login": "jmarlena", + "id": 6732600, + "node_id": "MDQ6VXNlcjY3MzI2MDA=", + "avatar_url": "https://avatars.githubusercontent.com/u/6732600?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/jmarlena", + "html_url": "https://github.com/jmarlena", + "followers_url": "https://api.github.com/users/jmarlena/followers", + "following_url": "https://api.github.com/users/jmarlena/following{/other_user}", + "gists_url": "https://api.github.com/users/jmarlena/gists{/gist_id}", + "starred_url": "https://api.github.com/users/jmarlena/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/jmarlena/subscriptions", + "organizations_url": "https://api.github.com/users/jmarlena/orgs", + "repos_url": "https://api.github.com/users/jmarlena/repos", + "events_url": "https://api.github.com/users/jmarlena/events{/privacy}", + "received_events_url": "https://api.github.com/users/jmarlena/received_events", + "type": "User", + "site_admin": false + }, + "html_url": "https://github.com/jmarlena/linguist", + "description": "Language Savant. If your repository's language is being reported incorrectly, send us a pull request!", + "fork": true, + "url": "https://api.github.com/repos/jmarlena/linguist", + "forks_url": "https://api.github.com/repos/jmarlena/linguist/forks", + "keys_url": "https://api.github.com/repos/jmarlena/linguist/keys{/key_id}", + "collaborators_url": "https://api.github.com/repos/jmarlena/linguist/collaborators{/collaborator}", + "teams_url": "https://api.github.com/repos/jmarlena/linguist/teams", + "hooks_url": "https://api.github.com/repos/jmarlena/linguist/hooks", + "issue_events_url": "https://api.github.com/repos/jmarlena/linguist/issues/events{/number}", + "events_url": "https://api.github.com/repos/jmarlena/linguist/events", + "assignees_url": "https://api.github.com/repos/jmarlena/linguist/assignees{/user}", + "branches_url": "https://api.github.com/repos/jmarlena/linguist/branches{/branch}", + "tags_url": "https://api.github.com/repos/jmarlena/linguist/tags", + "blobs_url": "https://api.github.com/repos/jmarlena/linguist/git/blobs{/sha}", + "git_tags_url": "https://api.github.com/repos/jmarlena/linguist/git/tags{/sha}", + "git_refs_url": "https://api.github.com/repos/jmarlena/linguist/git/refs{/sha}", + "trees_url": "https://api.github.com/repos/jmarlena/linguist/git/trees{/sha}", + "statuses_url": "https://api.github.com/repos/jmarlena/linguist/statuses/{sha}", + "languages_url": "https://api.github.com/repos/jmarlena/linguist/languages", + "stargazers_url": "https://api.github.com/repos/jmarlena/linguist/stargazers", + "contributors_url": "https://api.github.com/repos/jmarlena/linguist/contributors", + "subscribers_url": "https://api.github.com/repos/jmarlena/linguist/subscribers", + "subscription_url": "https://api.github.com/repos/jmarlena/linguist/subscription", + "commits_url": "https://api.github.com/repos/jmarlena/linguist/commits{/sha}", + "git_commits_url": "https://api.github.com/repos/jmarlena/linguist/git/commits{/sha}", + "comments_url": "https://api.github.com/repos/jmarlena/linguist/comments{/number}", + "issue_comment_url": "https://api.github.com/repos/jmarlena/linguist/issues/comments{/number}", + "contents_url": "https://api.github.com/repos/jmarlena/linguist/contents/{+path}", + "compare_url": "https://api.github.com/repos/jmarlena/linguist/compare/{base}...{head}", + "merges_url": "https://api.github.com/repos/jmarlena/linguist/merges", + "archive_url": "https://api.github.com/repos/jmarlena/linguist/{archive_format}{/ref}", + "downloads_url": "https://api.github.com/repos/jmarlena/linguist/downloads", + "issues_url": "https://api.github.com/repos/jmarlena/linguist/issues{/number}", + "pulls_url": "https://api.github.com/repos/jmarlena/linguist/pulls{/number}", + "milestones_url": "https://api.github.com/repos/jmarlena/linguist/milestones{/number}", + "notifications_url": "https://api.github.com/repos/jmarlena/linguist/notifications{?since,all,participating}", + "labels_url": "https://api.github.com/repos/jmarlena/linguist/labels{/name}", + "releases_url": "https://api.github.com/repos/jmarlena/linguist/releases{/id}", + "deployments_url": "https://api.github.com/repos/jmarlena/linguist/deployments", + "created_at": "2016-08-02T18:10:46Z", + "updated_at": "2021-04-25T02:01:40Z", + "pushed_at": "2016-08-02T19:05:42Z", + "git_url": "git://github.com/jmarlena/linguist.git", + "ssh_url": "git@github.com:jmarlena/linguist.git", + "clone_url": "https://github.com/jmarlena/linguist.git", + "svn_url": "https://github.com/jmarlena/linguist", + "homepage": "", + "size": 32899, + "stargazers_count": 0, + "watchers_count": 0, + "language": "Ruby", + "has_issues": false, + "has_projects": true, + "has_downloads": true, + "has_wiki": false, + "has_pages": false, + "forks_count": 1, + "mirror_url": null, + "archived": false, + "disabled": false, + "open_issues_count": 0, + "license": { + "key": "mit", + "name": "MIT License", + "spdx_id": "MIT", + "url": "https://api.github.com/licenses/mit", + "node_id": "MDc6TGljZW5zZTEz" + }, + "allow_forking": true, + "is_template": false, + "topics": [ + + ], + "visibility": "public", + "forks": 1, + "open_issues": 0, + "watchers": 0, + "default_branch": "master", + "temp_clone_token": null, + "parent": { + "id": 64778136, + "node_id": "MDEwOlJlcG9zaXRvcnk2NDc3ODEzNg==", + "name": "linguist", + "full_name": "octocat/linguist", + "private": false, + "owner": { + "login": "octocat", + "id": 583231, + "node_id": "MDQ6VXNlcjU4MzIzMQ==", + "avatar_url": "https://avatars.githubusercontent.com/u/583231?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/octocat", + "html_url": "https://github.com/octocat", + "followers_url": "https://api.github.com/users/octocat/followers", + "following_url": "https://api.github.com/users/octocat/following{/other_user}", + "gists_url": "https://api.github.com/users/octocat/gists{/gist_id}", + "starred_url": "https://api.github.com/users/octocat/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/octocat/subscriptions", + "organizations_url": "https://api.github.com/users/octocat/orgs", + "repos_url": "https://api.github.com/users/octocat/repos", + "events_url": "https://api.github.com/users/octocat/events{/privacy}", + "received_events_url": "https://api.github.com/users/octocat/received_events", + "type": "User", + "site_admin": false + }, + "html_url": "https://github.com/octocat/linguist", + "description": "Language Savant. If your repository's language is being reported incorrectly, send us a pull request!", + "fork": true, + "url": "https://api.github.com/repos/octocat/linguist", + "forks_url": "https://api.github.com/repos/octocat/linguist/forks", + "keys_url": "https://api.github.com/repos/octocat/linguist/keys{/key_id}", + "collaborators_url": "https://api.github.com/repos/octocat/linguist/collaborators{/collaborator}", + "teams_url": "https://api.github.com/repos/octocat/linguist/teams", + "hooks_url": "https://api.github.com/repos/octocat/linguist/hooks", + "issue_events_url": "https://api.github.com/repos/octocat/linguist/issues/events{/number}", + "events_url": "https://api.github.com/repos/octocat/linguist/events", + "assignees_url": "https://api.github.com/repos/octocat/linguist/assignees{/user}", + "branches_url": "https://api.github.com/repos/octocat/linguist/branches{/branch}", + "tags_url": "https://api.github.com/repos/octocat/linguist/tags", + "blobs_url": "https://api.github.com/repos/octocat/linguist/git/blobs{/sha}", + "git_tags_url": "https://api.github.com/repos/octocat/linguist/git/tags{/sha}", + "git_refs_url": "https://api.github.com/repos/octocat/linguist/git/refs{/sha}", + "trees_url": "https://api.github.com/repos/octocat/linguist/git/trees{/sha}", + "statuses_url": "https://api.github.com/repos/octocat/linguist/statuses/{sha}", + "languages_url": "https://api.github.com/repos/octocat/linguist/languages", + "stargazers_url": "https://api.github.com/repos/octocat/linguist/stargazers", + "contributors_url": "https://api.github.com/repos/octocat/linguist/contributors", + "subscribers_url": "https://api.github.com/repos/octocat/linguist/subscribers", + "subscription_url": "https://api.github.com/repos/octocat/linguist/subscription", + "commits_url": "https://api.github.com/repos/octocat/linguist/commits{/sha}", + "git_commits_url": "https://api.github.com/repos/octocat/linguist/git/commits{/sha}", + "comments_url": "https://api.github.com/repos/octocat/linguist/comments{/number}", + "issue_comment_url": "https://api.github.com/repos/octocat/linguist/issues/comments{/number}", + "contents_url": "https://api.github.com/repos/octocat/linguist/contents/{+path}", + "compare_url": "https://api.github.com/repos/octocat/linguist/compare/{base}...{head}", + "merges_url": "https://api.github.com/repos/octocat/linguist/merges", + "archive_url": "https://api.github.com/repos/octocat/linguist/{archive_format}{/ref}", + "downloads_url": "https://api.github.com/repos/octocat/linguist/downloads", + "issues_url": "https://api.github.com/repos/octocat/linguist/issues{/number}", + "pulls_url": "https://api.github.com/repos/octocat/linguist/pulls{/number}", + "milestones_url": "https://api.github.com/repos/octocat/linguist/milestones{/number}", + "notifications_url": "https://api.github.com/repos/octocat/linguist/notifications{?since,all,participating}", + "labels_url": "https://api.github.com/repos/octocat/linguist/labels{/name}", + "releases_url": "https://api.github.com/repos/octocat/linguist/releases{/id}", + "deployments_url": "https://api.github.com/repos/octocat/linguist/deployments", + "created_at": "2016-08-02T17:35:14Z", + "updated_at": "2022-04-26T12:31:02Z", + "pushed_at": "2022-04-26T09:28:02Z", + "git_url": "git://github.com/octocat/linguist.git", + "ssh_url": "git@github.com:octocat/linguist.git", + "clone_url": "https://github.com/octocat/linguist.git", + "svn_url": "https://github.com/octocat/linguist", + "homepage": "", + "size": 32899, + "stargazers_count": 119, + "watchers_count": 119, + "language": "Ruby", + "has_issues": false, + "has_projects": true, + "has_downloads": true, + "has_wiki": false, + "has_pages": false, + "forks_count": 129, + "mirror_url": null, + "archived": false, + "disabled": false, + "open_issues_count": 10, + "license": { + "key": "mit", + "name": "MIT License", + "spdx_id": "MIT", + "url": "https://api.github.com/licenses/mit", + "node_id": "MDc6TGljZW5zZTEz" + }, + "allow_forking": true, + "is_template": false, + "topics": [ + + ], + "visibility": "public", + "forks": 129, + "open_issues": 10, + "watchers": 119, + "default_branch": "master" + }, + "source": { + "id": 1725199, + "node_id": "MDEwOlJlcG9zaXRvcnkxNzI1MTk5", + "name": "linguist", + "full_name": "github/linguist", + "private": false, + "owner": { + "login": "github", + "id": 9919, + "node_id": "MDEyOk9yZ2FuaXphdGlvbjk5MTk=", + "avatar_url": "https://avatars.githubusercontent.com/u/9919?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/github", + "html_url": "https://github.com/github", + "followers_url": "https://api.github.com/users/github/followers", + "following_url": "https://api.github.com/users/github/following{/other_user}", + "gists_url": "https://api.github.com/users/github/gists{/gist_id}", + "starred_url": "https://api.github.com/users/github/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/github/subscriptions", + "organizations_url": "https://api.github.com/users/github/orgs", + "repos_url": "https://api.github.com/users/github/repos", + "events_url": "https://api.github.com/users/github/events{/privacy}", + "received_events_url": "https://api.github.com/users/github/received_events", + "type": "Organization", + "site_admin": false + }, + "html_url": "https://github.com/github/linguist", + "description": "Language Savant. If your repository's language is being reported incorrectly, send us a pull request!", + "fork": false, + "url": "https://api.github.com/repos/github/linguist", + "forks_url": "https://api.github.com/repos/github/linguist/forks", + "keys_url": "https://api.github.com/repos/github/linguist/keys{/key_id}", + "collaborators_url": "https://api.github.com/repos/github/linguist/collaborators{/collaborator}", + "teams_url": "https://api.github.com/repos/github/linguist/teams", + "hooks_url": "https://api.github.com/repos/github/linguist/hooks", + "issue_events_url": "https://api.github.com/repos/github/linguist/issues/events{/number}", + "events_url": "https://api.github.com/repos/github/linguist/events", + "assignees_url": "https://api.github.com/repos/github/linguist/assignees{/user}", + "branches_url": "https://api.github.com/repos/github/linguist/branches{/branch}", + "tags_url": "https://api.github.com/repos/github/linguist/tags", + "blobs_url": "https://api.github.com/repos/github/linguist/git/blobs{/sha}", + "git_tags_url": "https://api.github.com/repos/github/linguist/git/tags{/sha}", + "git_refs_url": "https://api.github.com/repos/github/linguist/git/refs{/sha}", + "trees_url": "https://api.github.com/repos/github/linguist/git/trees{/sha}", + "statuses_url": "https://api.github.com/repos/github/linguist/statuses/{sha}", + "languages_url": "https://api.github.com/repos/github/linguist/languages", + "stargazers_url": "https://api.github.com/repos/github/linguist/stargazers", + "contributors_url": "https://api.github.com/repos/github/linguist/contributors", + "subscribers_url": "https://api.github.com/repos/github/linguist/subscribers", + "subscription_url": "https://api.github.com/repos/github/linguist/subscription", + "commits_url": "https://api.github.com/repos/github/linguist/commits{/sha}", + "git_commits_url": "https://api.github.com/repos/github/linguist/git/commits{/sha}", + "comments_url": "https://api.github.com/repos/github/linguist/comments{/number}", + "issue_comment_url": "https://api.github.com/repos/github/linguist/issues/comments{/number}", + "contents_url": "https://api.github.com/repos/github/linguist/contents/{+path}", + "compare_url": "https://api.github.com/repos/github/linguist/compare/{base}...{head}", + "merges_url": "https://api.github.com/repos/github/linguist/merges", + "archive_url": "https://api.github.com/repos/github/linguist/{archive_format}{/ref}", + "downloads_url": "https://api.github.com/repos/github/linguist/downloads", + "issues_url": "https://api.github.com/repos/github/linguist/issues{/number}", + "pulls_url": "https://api.github.com/repos/github/linguist/pulls{/number}", + "milestones_url": "https://api.github.com/repos/github/linguist/milestones{/number}", + "notifications_url": "https://api.github.com/repos/github/linguist/notifications{?since,all,participating}", + "labels_url": "https://api.github.com/repos/github/linguist/labels{/name}", + "releases_url": "https://api.github.com/repos/github/linguist/releases{/id}", + "deployments_url": "https://api.github.com/repos/github/linguist/deployments", + "created_at": "2011-05-09T22:53:13Z", + "updated_at": "2022-04-26T13:05:32Z", + "pushed_at": "2022-04-26T10:27:23Z", + "git_url": "git://github.com/github/linguist.git", + "ssh_url": "git@github.com:github/linguist.git", + "clone_url": "https://github.com/github/linguist.git", + "svn_url": "https://github.com/github/linguist", + "homepage": "", + "size": 38000, + "stargazers_count": 9750, + "watchers_count": 9750, + "language": "Ruby", + "has_issues": true, + "has_projects": false, + "has_downloads": true, + "has_wiki": false, + "has_pages": false, + "forks_count": 3538, + "mirror_url": null, + "archived": false, + "disabled": false, + "open_issues_count": 106, + "license": { + "key": "mit", + "name": "MIT License", + "spdx_id": "MIT", + "url": "https://api.github.com/licenses/mit", + "node_id": "MDc6TGljZW5zZTEz" + }, + "allow_forking": true, + "is_template": false, + "topics": [ + "language-grammars", + "language-statistics", + "linguistic", + "syntax-highlighting" + ], + "visibility": "public", + "forks": 3538, + "open_issues": 106, + "watchers": 9750, + "default_branch": "master" + }, + "network_count": 3538, + "subscribers_count": 1 +} diff --git a/swh/loader/metadata/tests/data/https_api.github.com/repos_octocat_linguist b/swh/loader/metadata/tests/data/https_api.github.com/repos_octocat_linguist new file mode 100644 --- /dev/null +++ b/swh/loader/metadata/tests/data/https_api.github.com/repos_octocat_linguist @@ -0,0 +1,327 @@ +{ + "id": 64778136, + "node_id": "MDEwOlJlcG9zaXRvcnk2NDc3ODEzNg==", + "name": "linguist", + "full_name": "octocat/linguist", + "private": false, + "owner": { + "login": "octocat", + "id": 583231, + "node_id": "MDQ6VXNlcjU4MzIzMQ==", + "avatar_url": "https://avatars.githubusercontent.com/u/583231?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/octocat", + "html_url": "https://github.com/octocat", + "followers_url": "https://api.github.com/users/octocat/followers", + "following_url": "https://api.github.com/users/octocat/following{/other_user}", + "gists_url": "https://api.github.com/users/octocat/gists{/gist_id}", + "starred_url": "https://api.github.com/users/octocat/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/octocat/subscriptions", + "organizations_url": "https://api.github.com/users/octocat/orgs", + "repos_url": "https://api.github.com/users/octocat/repos", + "events_url": "https://api.github.com/users/octocat/events{/privacy}", + "received_events_url": "https://api.github.com/users/octocat/received_events", + "type": "User", + "site_admin": false + }, + "html_url": "https://github.com/octocat/linguist", + "description": "Language Savant. If your repository's language is being reported incorrectly, send us a pull request!", + "fork": true, + "url": "https://api.github.com/repos/octocat/linguist", + "forks_url": "https://api.github.com/repos/octocat/linguist/forks", + "keys_url": "https://api.github.com/repos/octocat/linguist/keys{/key_id}", + "collaborators_url": "https://api.github.com/repos/octocat/linguist/collaborators{/collaborator}", + "teams_url": "https://api.github.com/repos/octocat/linguist/teams", + "hooks_url": "https://api.github.com/repos/octocat/linguist/hooks", + "issue_events_url": "https://api.github.com/repos/octocat/linguist/issues/events{/number}", + "events_url": "https://api.github.com/repos/octocat/linguist/events", + "assignees_url": "https://api.github.com/repos/octocat/linguist/assignees{/user}", + "branches_url": "https://api.github.com/repos/octocat/linguist/branches{/branch}", + "tags_url": "https://api.github.com/repos/octocat/linguist/tags", + "blobs_url": "https://api.github.com/repos/octocat/linguist/git/blobs{/sha}", + "git_tags_url": "https://api.github.com/repos/octocat/linguist/git/tags{/sha}", + "git_refs_url": "https://api.github.com/repos/octocat/linguist/git/refs{/sha}", + "trees_url": "https://api.github.com/repos/octocat/linguist/git/trees{/sha}", + "statuses_url": "https://api.github.com/repos/octocat/linguist/statuses/{sha}", + "languages_url": "https://api.github.com/repos/octocat/linguist/languages", + "stargazers_url": "https://api.github.com/repos/octocat/linguist/stargazers", + "contributors_url": "https://api.github.com/repos/octocat/linguist/contributors", + "subscribers_url": "https://api.github.com/repos/octocat/linguist/subscribers", + "subscription_url": "https://api.github.com/repos/octocat/linguist/subscription", + "commits_url": "https://api.github.com/repos/octocat/linguist/commits{/sha}", + "git_commits_url": "https://api.github.com/repos/octocat/linguist/git/commits{/sha}", + "comments_url": "https://api.github.com/repos/octocat/linguist/comments{/number}", + "issue_comment_url": "https://api.github.com/repos/octocat/linguist/issues/comments{/number}", + "contents_url": "https://api.github.com/repos/octocat/linguist/contents/{+path}", + "compare_url": "https://api.github.com/repos/octocat/linguist/compare/{base}...{head}", + "merges_url": "https://api.github.com/repos/octocat/linguist/merges", + "archive_url": "https://api.github.com/repos/octocat/linguist/{archive_format}{/ref}", + "downloads_url": "https://api.github.com/repos/octocat/linguist/downloads", + "issues_url": "https://api.github.com/repos/octocat/linguist/issues{/number}", + "pulls_url": "https://api.github.com/repos/octocat/linguist/pulls{/number}", + "milestones_url": "https://api.github.com/repos/octocat/linguist/milestones{/number}", + "notifications_url": "https://api.github.com/repos/octocat/linguist/notifications{?since,all,participating}", + "labels_url": "https://api.github.com/repos/octocat/linguist/labels{/name}", + "releases_url": "https://api.github.com/repos/octocat/linguist/releases{/id}", + "deployments_url": "https://api.github.com/repos/octocat/linguist/deployments", + "created_at": "2016-08-02T17:35:14Z", + "updated_at": "2022-04-23T17:19:19Z", + "pushed_at": "2022-04-26T09:28:02Z", + "git_url": "git://github.com/octocat/linguist.git", + "ssh_url": "git@github.com:octocat/linguist.git", + "clone_url": "https://github.com/octocat/linguist.git", + "svn_url": "https://github.com/octocat/linguist", + "homepage": "", + "size": 32899, + "stargazers_count": 120, + "watchers_count": 120, + "language": "Ruby", + "has_issues": false, + "has_projects": true, + "has_downloads": true, + "has_wiki": false, + "has_pages": false, + "forks_count": 129, + "mirror_url": null, + "archived": false, + "disabled": false, + "open_issues_count": 10, + "license": { + "key": "mit", + "name": "MIT License", + "spdx_id": "MIT", + "url": "https://api.github.com/licenses/mit", + "node_id": "MDc6TGljZW5zZTEz" + }, + "allow_forking": true, + "is_template": false, + "topics": [ + + ], + "visibility": "public", + "forks": 129, + "open_issues": 10, + "watchers": 120, + "default_branch": "master", + "temp_clone_token": null, + "parent": { + "id": 1725199, + "node_id": "MDEwOlJlcG9zaXRvcnkxNzI1MTk5", + "name": "linguist", + "full_name": "github/linguist", + "private": false, + "owner": { + "login": "github", + "id": 9919, + "node_id": "MDEyOk9yZ2FuaXphdGlvbjk5MTk=", + "avatar_url": "https://avatars.githubusercontent.com/u/9919?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/github", + "html_url": "https://github.com/github", + "followers_url": "https://api.github.com/users/github/followers", + "following_url": "https://api.github.com/users/github/following{/other_user}", + "gists_url": "https://api.github.com/users/github/gists{/gist_id}", + "starred_url": "https://api.github.com/users/github/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/github/subscriptions", + "organizations_url": "https://api.github.com/users/github/orgs", + "repos_url": "https://api.github.com/users/github/repos", + "events_url": "https://api.github.com/users/github/events{/privacy}", + "received_events_url": "https://api.github.com/users/github/received_events", + "type": "Organization", + "site_admin": false + }, + "html_url": "https://github.com/github/linguist", + "description": "Language Savant. If your repository's language is being reported incorrectly, send us a pull request!", + "fork": false, + "url": "https://api.github.com/repos/github/linguist", + "forks_url": "https://api.github.com/repos/github/linguist/forks", + "keys_url": "https://api.github.com/repos/github/linguist/keys{/key_id}", + "collaborators_url": "https://api.github.com/repos/github/linguist/collaborators{/collaborator}", + "teams_url": "https://api.github.com/repos/github/linguist/teams", + "hooks_url": "https://api.github.com/repos/github/linguist/hooks", + "issue_events_url": "https://api.github.com/repos/github/linguist/issues/events{/number}", + "events_url": "https://api.github.com/repos/github/linguist/events", + "assignees_url": "https://api.github.com/repos/github/linguist/assignees{/user}", + "branches_url": "https://api.github.com/repos/github/linguist/branches{/branch}", + "tags_url": "https://api.github.com/repos/github/linguist/tags", + "blobs_url": "https://api.github.com/repos/github/linguist/git/blobs{/sha}", + "git_tags_url": "https://api.github.com/repos/github/linguist/git/tags{/sha}", + "git_refs_url": "https://api.github.com/repos/github/linguist/git/refs{/sha}", + "trees_url": "https://api.github.com/repos/github/linguist/git/trees{/sha}", + "statuses_url": "https://api.github.com/repos/github/linguist/statuses/{sha}", + "languages_url": "https://api.github.com/repos/github/linguist/languages", + "stargazers_url": "https://api.github.com/repos/github/linguist/stargazers", + "contributors_url": "https://api.github.com/repos/github/linguist/contributors", + "subscribers_url": "https://api.github.com/repos/github/linguist/subscribers", + "subscription_url": "https://api.github.com/repos/github/linguist/subscription", + "commits_url": "https://api.github.com/repos/github/linguist/commits{/sha}", + "git_commits_url": "https://api.github.com/repos/github/linguist/git/commits{/sha}", + "comments_url": "https://api.github.com/repos/github/linguist/comments{/number}", + "issue_comment_url": "https://api.github.com/repos/github/linguist/issues/comments{/number}", + "contents_url": "https://api.github.com/repos/github/linguist/contents/{+path}", + "compare_url": "https://api.github.com/repos/github/linguist/compare/{base}...{head}", + "merges_url": "https://api.github.com/repos/github/linguist/merges", + "archive_url": "https://api.github.com/repos/github/linguist/{archive_format}{/ref}", + "downloads_url": "https://api.github.com/repos/github/linguist/downloads", + "issues_url": "https://api.github.com/repos/github/linguist/issues{/number}", + "pulls_url": "https://api.github.com/repos/github/linguist/pulls{/number}", + "milestones_url": "https://api.github.com/repos/github/linguist/milestones{/number}", + "notifications_url": "https://api.github.com/repos/github/linguist/notifications{?since,all,participating}", + "labels_url": "https://api.github.com/repos/github/linguist/labels{/name}", + "releases_url": "https://api.github.com/repos/github/linguist/releases{/id}", + "deployments_url": "https://api.github.com/repos/github/linguist/deployments", + "created_at": "2011-05-09T22:53:13Z", + "updated_at": "2022-04-26T08:44:55Z", + "pushed_at": "2022-04-26T10:27:23Z", + "git_url": "git://github.com/github/linguist.git", + "ssh_url": "git@github.com:github/linguist.git", + "clone_url": "https://github.com/github/linguist.git", + "svn_url": "https://github.com/github/linguist", + "homepage": "", + "size": 38000, + "stargazers_count": 9751, + "watchers_count": 9751, + "language": "Ruby", + "has_issues": true, + "has_projects": false, + "has_downloads": true, + "has_wiki": false, + "has_pages": false, + "forks_count": 3539, + "mirror_url": null, + "archived": false, + "disabled": false, + "open_issues_count": 107, + "license": { + "key": "mit", + "name": "MIT License", + "spdx_id": "MIT", + "url": "https://api.github.com/licenses/mit", + "node_id": "MDc6TGljZW5zZTEz" + }, + "allow_forking": true, + "is_template": false, + "topics": [ + "language-grammars", + "language-statistics", + "linguistic", + "syntax-highlighting" + ], + "visibility": "public", + "forks": 3539, + "open_issues": 107, + "watchers": 9751, + "default_branch": "master" + }, + "source": { + "id": 1725199, + "node_id": "MDEwOlJlcG9zaXRvcnkxNzI1MTk5", + "name": "linguist", + "full_name": "github/linguist", + "private": false, + "owner": { + "login": "github", + "id": 9919, + "node_id": "MDEyOk9yZ2FuaXphdGlvbjk5MTk=", + "avatar_url": "https://avatars.githubusercontent.com/u/9919?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/github", + "html_url": "https://github.com/github", + "followers_url": "https://api.github.com/users/github/followers", + "following_url": "https://api.github.com/users/github/following{/other_user}", + "gists_url": "https://api.github.com/users/github/gists{/gist_id}", + "starred_url": "https://api.github.com/users/github/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/github/subscriptions", + "organizations_url": "https://api.github.com/users/github/orgs", + "repos_url": "https://api.github.com/users/github/repos", + "events_url": "https://api.github.com/users/github/events{/privacy}", + "received_events_url": "https://api.github.com/users/github/received_events", + "type": "Organization", + "site_admin": false + }, + "html_url": "https://github.com/github/linguist", + "description": "Language Savant. If your repository's language is being reported incorrectly, send us a pull request!", + "fork": false, + "url": "https://api.github.com/repos/github/linguist", + "forks_url": "https://api.github.com/repos/github/linguist/forks", + "keys_url": "https://api.github.com/repos/github/linguist/keys{/key_id}", + "collaborators_url": "https://api.github.com/repos/github/linguist/collaborators{/collaborator}", + "teams_url": "https://api.github.com/repos/github/linguist/teams", + "hooks_url": "https://api.github.com/repos/github/linguist/hooks", + "issue_events_url": "https://api.github.com/repos/github/linguist/issues/events{/number}", + "events_url": "https://api.github.com/repos/github/linguist/events", + "assignees_url": "https://api.github.com/repos/github/linguist/assignees{/user}", + "branches_url": "https://api.github.com/repos/github/linguist/branches{/branch}", + "tags_url": "https://api.github.com/repos/github/linguist/tags", + "blobs_url": "https://api.github.com/repos/github/linguist/git/blobs{/sha}", + "git_tags_url": "https://api.github.com/repos/github/linguist/git/tags{/sha}", + "git_refs_url": "https://api.github.com/repos/github/linguist/git/refs{/sha}", + "trees_url": "https://api.github.com/repos/github/linguist/git/trees{/sha}", + "statuses_url": "https://api.github.com/repos/github/linguist/statuses/{sha}", + "languages_url": "https://api.github.com/repos/github/linguist/languages", + "stargazers_url": "https://api.github.com/repos/github/linguist/stargazers", + "contributors_url": "https://api.github.com/repos/github/linguist/contributors", + "subscribers_url": "https://api.github.com/repos/github/linguist/subscribers", + "subscription_url": "https://api.github.com/repos/github/linguist/subscription", + "commits_url": "https://api.github.com/repos/github/linguist/commits{/sha}", + "git_commits_url": "https://api.github.com/repos/github/linguist/git/commits{/sha}", + "comments_url": "https://api.github.com/repos/github/linguist/comments{/number}", + "issue_comment_url": "https://api.github.com/repos/github/linguist/issues/comments{/number}", + "contents_url": "https://api.github.com/repos/github/linguist/contents/{+path}", + "compare_url": "https://api.github.com/repos/github/linguist/compare/{base}...{head}", + "merges_url": "https://api.github.com/repos/github/linguist/merges", + "archive_url": "https://api.github.com/repos/github/linguist/{archive_format}{/ref}", + "downloads_url": "https://api.github.com/repos/github/linguist/downloads", + "issues_url": "https://api.github.com/repos/github/linguist/issues{/number}", + "pulls_url": "https://api.github.com/repos/github/linguist/pulls{/number}", + "milestones_url": "https://api.github.com/repos/github/linguist/milestones{/number}", + "notifications_url": "https://api.github.com/repos/github/linguist/notifications{?since,all,participating}", + "labels_url": "https://api.github.com/repos/github/linguist/labels{/name}", + "releases_url": "https://api.github.com/repos/github/linguist/releases{/id}", + "deployments_url": "https://api.github.com/repos/github/linguist/deployments", + "created_at": "2011-05-09T22:53:13Z", + "updated_at": "2022-04-26T08:44:55Z", + "pushed_at": "2022-04-26T10:27:23Z", + "git_url": "git://github.com/github/linguist.git", + "ssh_url": "git@github.com:github/linguist.git", + "clone_url": "https://github.com/github/linguist.git", + "svn_url": "https://github.com/github/linguist", + "homepage": "", + "size": 38000, + "stargazers_count": 9751, + "watchers_count": 9751, + "language": "Ruby", + "has_issues": true, + "has_projects": false, + "has_downloads": true, + "has_wiki": false, + "has_pages": false, + "forks_count": 3539, + "mirror_url": null, + "archived": false, + "disabled": false, + "open_issues_count": 107, + "license": { + "key": "mit", + "name": "MIT License", + "spdx_id": "MIT", + "url": "https://api.github.com/licenses/mit", + "node_id": "MDc6TGljZW5zZTEz" + }, + "allow_forking": true, + "is_template": false, + "topics": [ + "language-grammars", + "language-statistics", + "linguistic", + "syntax-highlighting" + ], + "visibility": "public", + "forks": 3539, + "open_issues": 107, + "watchers": 9751, + "default_branch": "master" + }, + "network_count": 3539, + "subscribers_count": 29 +} diff --git a/swh/loader/metadata/tests/test_base.py b/swh/loader/metadata/tests/test_base.py --- a/swh/loader/metadata/tests/test_base.py +++ b/swh/loader/metadata/tests/test_base.py @@ -70,6 +70,9 @@ def get_origin_metadata(self): return [REMD] + def get_parent_origins(self): + return [] + def test_load(swh_storage, mocker): mocker.patch( diff --git a/swh/loader/metadata/tests/test_github.py b/swh/loader/metadata/tests/test_github.py --- a/swh/loader/metadata/tests/test_github.py +++ b/swh/loader/metadata/tests/test_github.py @@ -23,6 +23,8 @@ from .test_base import DummyLoader ORIGIN = Origin("https://github.com/octocat/Hello-World") +FORKED_ORIGIN = Origin("https://github.com/octocat/linguist") +DOUBLE_FORKED_ORIGIN = Origin("https://github.com/jmarlena/linguist") METADATA_AUTHORITY = MetadataAuthority( type=MetadataAuthorityType.FORGE, url="https://github.com" @@ -68,6 +70,37 @@ ) assert fetcher.get_origin_metadata() == [expected_metadata(now, datadir)] + assert fetcher.get_parent_origins() == [] + + +def test_github_metadata_fork(datadir, requests_mock_datadir, mocker): + now = datetime.datetime.now(tz=datetime.timezone.utc) + mocker.patch("swh.loader.metadata.base.now", return_value=now) + + fetcher = GitHubMetadataFetcher( + FORKED_ORIGIN, credentials=None, lister_name="github", lister_instance_name="" + ) + + assert fetcher.get_parent_origins() == [ + Origin(url="https://github.com/github/linguist"), + ] + + +def test_github_metadata_fork_of_fork(datadir, requests_mock_datadir, mocker): + now = datetime.datetime.now(tz=datetime.timezone.utc) + mocker.patch("swh.loader.metadata.base.now", return_value=now) + + fetcher = GitHubMetadataFetcher( + DOUBLE_FORKED_ORIGIN, + credentials=None, + lister_name="github", + lister_instance_name="", + ) + + assert fetcher.get_parent_origins() == [ + Origin(url="https://github.com/octocat/linguist"), + Origin(url="https://github.com/github/linguist"), + ] def test_github_metadata_from_loader(