Page MenuHomeSoftware Heritage

D7663.id27728.diff
No OneTemporary

D7663.id27728.diff

diff --git a/swh/loader/metadata/base.py b/swh/loader/metadata/base.py
--- a/swh/loader/metadata/base.py
+++ b/swh/loader/metadata/base.py
@@ -171,6 +171,11 @@
return self._origin_metadata_objects
+ def get_parent_origin(self) -> Optional[Origin]:
+ """If the given origin is a "forge fork" (ie. created with the "Fork" button
+ of GitHub-like forges), returns the origin it was forked from."""
+ raise NotImplementedError(f"{self.__class__.__name__}.get_parent_origin")
+
if TYPE_CHECKING:
# Makes mypy check BaseMetadataFetcher follows the MetadataFetcherProtocol
diff --git a/swh/loader/metadata/github.py b/swh/loader/metadata/github.py
--- a/swh/loader/metadata/github.py
+++ b/swh/loader/metadata/github.py
@@ -5,11 +5,13 @@
"""Metadata fetcher for GitHub."""
+import json
import re
from typing import List, Optional, Tuple
import urllib.parse
from swh.lister.github.utils import GitHubSession
+from swh.model.model import Origin
from . import USER_AGENT
from .base import BaseMetadataFetcher, InvalidOrigin
@@ -67,3 +69,17 @@
# archiving verbatim, though.
return [(METADATA_FORMAT, metadata_bytes)]
+
+ def get_parent_origin(self) -> Optional[Origin]:
+ for metadata in self.get_origin_metadata():
+ if metadata.format != METADATA_FORMAT:
+ continue
+ parent = json.loads(metadata.metadata).get("parent")
+ if parent is None:
+ continue
+ url = urllib.parse.urlunsplit(
+ ("https", "github.com", parent["full_name"], "", "")
+ )
+ return Origin(url=url)
+
+ return None
diff --git a/swh/loader/metadata/tests/data/https_api.github.com/repos_octocat_linguist b/swh/loader/metadata/tests/data/https_api.github.com/repos_octocat_linguist
new file mode 100644
--- /dev/null
+++ b/swh/loader/metadata/tests/data/https_api.github.com/repos_octocat_linguist
@@ -0,0 +1,327 @@
+{
+ "id": 64778136,
+ "node_id": "MDEwOlJlcG9zaXRvcnk2NDc3ODEzNg==",
+ "name": "linguist",
+ "full_name": "octocat/linguist",
+ "private": false,
+ "owner": {
+ "login": "octocat",
+ "id": 583231,
+ "node_id": "MDQ6VXNlcjU4MzIzMQ==",
+ "avatar_url": "https://avatars.githubusercontent.com/u/583231?v=4",
+ "gravatar_id": "",
+ "url": "https://api.github.com/users/octocat",
+ "html_url": "https://github.com/octocat",
+ "followers_url": "https://api.github.com/users/octocat/followers",
+ "following_url": "https://api.github.com/users/octocat/following{/other_user}",
+ "gists_url": "https://api.github.com/users/octocat/gists{/gist_id}",
+ "starred_url": "https://api.github.com/users/octocat/starred{/owner}{/repo}",
+ "subscriptions_url": "https://api.github.com/users/octocat/subscriptions",
+ "organizations_url": "https://api.github.com/users/octocat/orgs",
+ "repos_url": "https://api.github.com/users/octocat/repos",
+ "events_url": "https://api.github.com/users/octocat/events{/privacy}",
+ "received_events_url": "https://api.github.com/users/octocat/received_events",
+ "type": "User",
+ "site_admin": false
+ },
+ "html_url": "https://github.com/octocat/linguist",
+ "description": "Language Savant. If your repository's language is being reported incorrectly, send us a pull request!",
+ "fork": true,
+ "url": "https://api.github.com/repos/octocat/linguist",
+ "forks_url": "https://api.github.com/repos/octocat/linguist/forks",
+ "keys_url": "https://api.github.com/repos/octocat/linguist/keys{/key_id}",
+ "collaborators_url": "https://api.github.com/repos/octocat/linguist/collaborators{/collaborator}",
+ "teams_url": "https://api.github.com/repos/octocat/linguist/teams",
+ "hooks_url": "https://api.github.com/repos/octocat/linguist/hooks",
+ "issue_events_url": "https://api.github.com/repos/octocat/linguist/issues/events{/number}",
+ "events_url": "https://api.github.com/repos/octocat/linguist/events",
+ "assignees_url": "https://api.github.com/repos/octocat/linguist/assignees{/user}",
+ "branches_url": "https://api.github.com/repos/octocat/linguist/branches{/branch}",
+ "tags_url": "https://api.github.com/repos/octocat/linguist/tags",
+ "blobs_url": "https://api.github.com/repos/octocat/linguist/git/blobs{/sha}",
+ "git_tags_url": "https://api.github.com/repos/octocat/linguist/git/tags{/sha}",
+ "git_refs_url": "https://api.github.com/repos/octocat/linguist/git/refs{/sha}",
+ "trees_url": "https://api.github.com/repos/octocat/linguist/git/trees{/sha}",
+ "statuses_url": "https://api.github.com/repos/octocat/linguist/statuses/{sha}",
+ "languages_url": "https://api.github.com/repos/octocat/linguist/languages",
+ "stargazers_url": "https://api.github.com/repos/octocat/linguist/stargazers",
+ "contributors_url": "https://api.github.com/repos/octocat/linguist/contributors",
+ "subscribers_url": "https://api.github.com/repos/octocat/linguist/subscribers",
+ "subscription_url": "https://api.github.com/repos/octocat/linguist/subscription",
+ "commits_url": "https://api.github.com/repos/octocat/linguist/commits{/sha}",
+ "git_commits_url": "https://api.github.com/repos/octocat/linguist/git/commits{/sha}",
+ "comments_url": "https://api.github.com/repos/octocat/linguist/comments{/number}",
+ "issue_comment_url": "https://api.github.com/repos/octocat/linguist/issues/comments{/number}",
+ "contents_url": "https://api.github.com/repos/octocat/linguist/contents/{+path}",
+ "compare_url": "https://api.github.com/repos/octocat/linguist/compare/{base}...{head}",
+ "merges_url": "https://api.github.com/repos/octocat/linguist/merges",
+ "archive_url": "https://api.github.com/repos/octocat/linguist/{archive_format}{/ref}",
+ "downloads_url": "https://api.github.com/repos/octocat/linguist/downloads",
+ "issues_url": "https://api.github.com/repos/octocat/linguist/issues{/number}",
+ "pulls_url": "https://api.github.com/repos/octocat/linguist/pulls{/number}",
+ "milestones_url": "https://api.github.com/repos/octocat/linguist/milestones{/number}",
+ "notifications_url": "https://api.github.com/repos/octocat/linguist/notifications{?since,all,participating}",
+ "labels_url": "https://api.github.com/repos/octocat/linguist/labels{/name}",
+ "releases_url": "https://api.github.com/repos/octocat/linguist/releases{/id}",
+ "deployments_url": "https://api.github.com/repos/octocat/linguist/deployments",
+ "created_at": "2016-08-02T17:35:14Z",
+ "updated_at": "2022-04-23T17:19:19Z",
+ "pushed_at": "2022-04-26T09:28:02Z",
+ "git_url": "git://github.com/octocat/linguist.git",
+ "ssh_url": "git@github.com:octocat/linguist.git",
+ "clone_url": "https://github.com/octocat/linguist.git",
+ "svn_url": "https://github.com/octocat/linguist",
+ "homepage": "",
+ "size": 32899,
+ "stargazers_count": 120,
+ "watchers_count": 120,
+ "language": "Ruby",
+ "has_issues": false,
+ "has_projects": true,
+ "has_downloads": true,
+ "has_wiki": false,
+ "has_pages": false,
+ "forks_count": 129,
+ "mirror_url": null,
+ "archived": false,
+ "disabled": false,
+ "open_issues_count": 10,
+ "license": {
+ "key": "mit",
+ "name": "MIT License",
+ "spdx_id": "MIT",
+ "url": "https://api.github.com/licenses/mit",
+ "node_id": "MDc6TGljZW5zZTEz"
+ },
+ "allow_forking": true,
+ "is_template": false,
+ "topics": [
+
+ ],
+ "visibility": "public",
+ "forks": 129,
+ "open_issues": 10,
+ "watchers": 120,
+ "default_branch": "master",
+ "temp_clone_token": null,
+ "parent": {
+ "id": 1725199,
+ "node_id": "MDEwOlJlcG9zaXRvcnkxNzI1MTk5",
+ "name": "linguist",
+ "full_name": "github/linguist",
+ "private": false,
+ "owner": {
+ "login": "github",
+ "id": 9919,
+ "node_id": "MDEyOk9yZ2FuaXphdGlvbjk5MTk=",
+ "avatar_url": "https://avatars.githubusercontent.com/u/9919?v=4",
+ "gravatar_id": "",
+ "url": "https://api.github.com/users/github",
+ "html_url": "https://github.com/github",
+ "followers_url": "https://api.github.com/users/github/followers",
+ "following_url": "https://api.github.com/users/github/following{/other_user}",
+ "gists_url": "https://api.github.com/users/github/gists{/gist_id}",
+ "starred_url": "https://api.github.com/users/github/starred{/owner}{/repo}",
+ "subscriptions_url": "https://api.github.com/users/github/subscriptions",
+ "organizations_url": "https://api.github.com/users/github/orgs",
+ "repos_url": "https://api.github.com/users/github/repos",
+ "events_url": "https://api.github.com/users/github/events{/privacy}",
+ "received_events_url": "https://api.github.com/users/github/received_events",
+ "type": "Organization",
+ "site_admin": false
+ },
+ "html_url": "https://github.com/github/linguist",
+ "description": "Language Savant. If your repository's language is being reported incorrectly, send us a pull request!",
+ "fork": false,
+ "url": "https://api.github.com/repos/github/linguist",
+ "forks_url": "https://api.github.com/repos/github/linguist/forks",
+ "keys_url": "https://api.github.com/repos/github/linguist/keys{/key_id}",
+ "collaborators_url": "https://api.github.com/repos/github/linguist/collaborators{/collaborator}",
+ "teams_url": "https://api.github.com/repos/github/linguist/teams",
+ "hooks_url": "https://api.github.com/repos/github/linguist/hooks",
+ "issue_events_url": "https://api.github.com/repos/github/linguist/issues/events{/number}",
+ "events_url": "https://api.github.com/repos/github/linguist/events",
+ "assignees_url": "https://api.github.com/repos/github/linguist/assignees{/user}",
+ "branches_url": "https://api.github.com/repos/github/linguist/branches{/branch}",
+ "tags_url": "https://api.github.com/repos/github/linguist/tags",
+ "blobs_url": "https://api.github.com/repos/github/linguist/git/blobs{/sha}",
+ "git_tags_url": "https://api.github.com/repos/github/linguist/git/tags{/sha}",
+ "git_refs_url": "https://api.github.com/repos/github/linguist/git/refs{/sha}",
+ "trees_url": "https://api.github.com/repos/github/linguist/git/trees{/sha}",
+ "statuses_url": "https://api.github.com/repos/github/linguist/statuses/{sha}",
+ "languages_url": "https://api.github.com/repos/github/linguist/languages",
+ "stargazers_url": "https://api.github.com/repos/github/linguist/stargazers",
+ "contributors_url": "https://api.github.com/repos/github/linguist/contributors",
+ "subscribers_url": "https://api.github.com/repos/github/linguist/subscribers",
+ "subscription_url": "https://api.github.com/repos/github/linguist/subscription",
+ "commits_url": "https://api.github.com/repos/github/linguist/commits{/sha}",
+ "git_commits_url": "https://api.github.com/repos/github/linguist/git/commits{/sha}",
+ "comments_url": "https://api.github.com/repos/github/linguist/comments{/number}",
+ "issue_comment_url": "https://api.github.com/repos/github/linguist/issues/comments{/number}",
+ "contents_url": "https://api.github.com/repos/github/linguist/contents/{+path}",
+ "compare_url": "https://api.github.com/repos/github/linguist/compare/{base}...{head}",
+ "merges_url": "https://api.github.com/repos/github/linguist/merges",
+ "archive_url": "https://api.github.com/repos/github/linguist/{archive_format}{/ref}",
+ "downloads_url": "https://api.github.com/repos/github/linguist/downloads",
+ "issues_url": "https://api.github.com/repos/github/linguist/issues{/number}",
+ "pulls_url": "https://api.github.com/repos/github/linguist/pulls{/number}",
+ "milestones_url": "https://api.github.com/repos/github/linguist/milestones{/number}",
+ "notifications_url": "https://api.github.com/repos/github/linguist/notifications{?since,all,participating}",
+ "labels_url": "https://api.github.com/repos/github/linguist/labels{/name}",
+ "releases_url": "https://api.github.com/repos/github/linguist/releases{/id}",
+ "deployments_url": "https://api.github.com/repos/github/linguist/deployments",
+ "created_at": "2011-05-09T22:53:13Z",
+ "updated_at": "2022-04-26T08:44:55Z",
+ "pushed_at": "2022-04-26T10:27:23Z",
+ "git_url": "git://github.com/github/linguist.git",
+ "ssh_url": "git@github.com:github/linguist.git",
+ "clone_url": "https://github.com/github/linguist.git",
+ "svn_url": "https://github.com/github/linguist",
+ "homepage": "",
+ "size": 38000,
+ "stargazers_count": 9751,
+ "watchers_count": 9751,
+ "language": "Ruby",
+ "has_issues": true,
+ "has_projects": false,
+ "has_downloads": true,
+ "has_wiki": false,
+ "has_pages": false,
+ "forks_count": 3539,
+ "mirror_url": null,
+ "archived": false,
+ "disabled": false,
+ "open_issues_count": 107,
+ "license": {
+ "key": "mit",
+ "name": "MIT License",
+ "spdx_id": "MIT",
+ "url": "https://api.github.com/licenses/mit",
+ "node_id": "MDc6TGljZW5zZTEz"
+ },
+ "allow_forking": true,
+ "is_template": false,
+ "topics": [
+ "language-grammars",
+ "language-statistics",
+ "linguistic",
+ "syntax-highlighting"
+ ],
+ "visibility": "public",
+ "forks": 3539,
+ "open_issues": 107,
+ "watchers": 9751,
+ "default_branch": "master"
+ },
+ "source": {
+ "id": 1725199,
+ "node_id": "MDEwOlJlcG9zaXRvcnkxNzI1MTk5",
+ "name": "linguist",
+ "full_name": "github/linguist",
+ "private": false,
+ "owner": {
+ "login": "github",
+ "id": 9919,
+ "node_id": "MDEyOk9yZ2FuaXphdGlvbjk5MTk=",
+ "avatar_url": "https://avatars.githubusercontent.com/u/9919?v=4",
+ "gravatar_id": "",
+ "url": "https://api.github.com/users/github",
+ "html_url": "https://github.com/github",
+ "followers_url": "https://api.github.com/users/github/followers",
+ "following_url": "https://api.github.com/users/github/following{/other_user}",
+ "gists_url": "https://api.github.com/users/github/gists{/gist_id}",
+ "starred_url": "https://api.github.com/users/github/starred{/owner}{/repo}",
+ "subscriptions_url": "https://api.github.com/users/github/subscriptions",
+ "organizations_url": "https://api.github.com/users/github/orgs",
+ "repos_url": "https://api.github.com/users/github/repos",
+ "events_url": "https://api.github.com/users/github/events{/privacy}",
+ "received_events_url": "https://api.github.com/users/github/received_events",
+ "type": "Organization",
+ "site_admin": false
+ },
+ "html_url": "https://github.com/github/linguist",
+ "description": "Language Savant. If your repository's language is being reported incorrectly, send us a pull request!",
+ "fork": false,
+ "url": "https://api.github.com/repos/github/linguist",
+ "forks_url": "https://api.github.com/repos/github/linguist/forks",
+ "keys_url": "https://api.github.com/repos/github/linguist/keys{/key_id}",
+ "collaborators_url": "https://api.github.com/repos/github/linguist/collaborators{/collaborator}",
+ "teams_url": "https://api.github.com/repos/github/linguist/teams",
+ "hooks_url": "https://api.github.com/repos/github/linguist/hooks",
+ "issue_events_url": "https://api.github.com/repos/github/linguist/issues/events{/number}",
+ "events_url": "https://api.github.com/repos/github/linguist/events",
+ "assignees_url": "https://api.github.com/repos/github/linguist/assignees{/user}",
+ "branches_url": "https://api.github.com/repos/github/linguist/branches{/branch}",
+ "tags_url": "https://api.github.com/repos/github/linguist/tags",
+ "blobs_url": "https://api.github.com/repos/github/linguist/git/blobs{/sha}",
+ "git_tags_url": "https://api.github.com/repos/github/linguist/git/tags{/sha}",
+ "git_refs_url": "https://api.github.com/repos/github/linguist/git/refs{/sha}",
+ "trees_url": "https://api.github.com/repos/github/linguist/git/trees{/sha}",
+ "statuses_url": "https://api.github.com/repos/github/linguist/statuses/{sha}",
+ "languages_url": "https://api.github.com/repos/github/linguist/languages",
+ "stargazers_url": "https://api.github.com/repos/github/linguist/stargazers",
+ "contributors_url": "https://api.github.com/repos/github/linguist/contributors",
+ "subscribers_url": "https://api.github.com/repos/github/linguist/subscribers",
+ "subscription_url": "https://api.github.com/repos/github/linguist/subscription",
+ "commits_url": "https://api.github.com/repos/github/linguist/commits{/sha}",
+ "git_commits_url": "https://api.github.com/repos/github/linguist/git/commits{/sha}",
+ "comments_url": "https://api.github.com/repos/github/linguist/comments{/number}",
+ "issue_comment_url": "https://api.github.com/repos/github/linguist/issues/comments{/number}",
+ "contents_url": "https://api.github.com/repos/github/linguist/contents/{+path}",
+ "compare_url": "https://api.github.com/repos/github/linguist/compare/{base}...{head}",
+ "merges_url": "https://api.github.com/repos/github/linguist/merges",
+ "archive_url": "https://api.github.com/repos/github/linguist/{archive_format}{/ref}",
+ "downloads_url": "https://api.github.com/repos/github/linguist/downloads",
+ "issues_url": "https://api.github.com/repos/github/linguist/issues{/number}",
+ "pulls_url": "https://api.github.com/repos/github/linguist/pulls{/number}",
+ "milestones_url": "https://api.github.com/repos/github/linguist/milestones{/number}",
+ "notifications_url": "https://api.github.com/repos/github/linguist/notifications{?since,all,participating}",
+ "labels_url": "https://api.github.com/repos/github/linguist/labels{/name}",
+ "releases_url": "https://api.github.com/repos/github/linguist/releases{/id}",
+ "deployments_url": "https://api.github.com/repos/github/linguist/deployments",
+ "created_at": "2011-05-09T22:53:13Z",
+ "updated_at": "2022-04-26T08:44:55Z",
+ "pushed_at": "2022-04-26T10:27:23Z",
+ "git_url": "git://github.com/github/linguist.git",
+ "ssh_url": "git@github.com:github/linguist.git",
+ "clone_url": "https://github.com/github/linguist.git",
+ "svn_url": "https://github.com/github/linguist",
+ "homepage": "",
+ "size": 38000,
+ "stargazers_count": 9751,
+ "watchers_count": 9751,
+ "language": "Ruby",
+ "has_issues": true,
+ "has_projects": false,
+ "has_downloads": true,
+ "has_wiki": false,
+ "has_pages": false,
+ "forks_count": 3539,
+ "mirror_url": null,
+ "archived": false,
+ "disabled": false,
+ "open_issues_count": 107,
+ "license": {
+ "key": "mit",
+ "name": "MIT License",
+ "spdx_id": "MIT",
+ "url": "https://api.github.com/licenses/mit",
+ "node_id": "MDc6TGljZW5zZTEz"
+ },
+ "allow_forking": true,
+ "is_template": false,
+ "topics": [
+ "language-grammars",
+ "language-statistics",
+ "linguistic",
+ "syntax-highlighting"
+ ],
+ "visibility": "public",
+ "forks": 3539,
+ "open_issues": 107,
+ "watchers": 9751,
+ "default_branch": "master"
+ },
+ "network_count": 3539,
+ "subscribers_count": 29
+}
diff --git a/swh/loader/metadata/tests/test_github.py b/swh/loader/metadata/tests/test_github.py
--- a/swh/loader/metadata/tests/test_github.py
+++ b/swh/loader/metadata/tests/test_github.py
@@ -23,6 +23,7 @@
from .test_base import DummyLoader
ORIGIN = Origin("https://github.com/octocat/Hello-World")
+FORKED_ORIGIN = Origin("https://github.com/octocat/linguist")
METADATA_AUTHORITY = MetadataAuthority(
type=MetadataAuthorityType.FORGE, url="https://github.com"
@@ -68,6 +69,20 @@
)
assert fetcher.get_origin_metadata() == [expected_metadata(now, datadir)]
+ assert fetcher.get_parent_origin() is None
+
+
+def test_github_metadata_fork(datadir, requests_mock_datadir, mocker):
+ now = datetime.datetime.now(tz=datetime.timezone.utc)
+ mocker.patch("swh.loader.metadata.base.now", return_value=now)
+
+ fetcher = GitHubMetadataFetcher(
+ FORKED_ORIGIN, credentials=None, lister_name="github", lister_instance_name=""
+ )
+
+ assert fetcher.get_parent_origin() == Origin(
+ url="https://github.com/github/linguist"
+ )
def test_github_metadata_from_loader(

File Metadata

Mime Type
text/plain
Expires
Dec 19 2024, 11:49 AM (4 w, 2 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3227545

Event Timeline