diff --git a/swh/loader/metadata/base.py b/swh/loader/metadata/base.py index 8d87bb9..d624bfd 100644 --- a/swh/loader/metadata/base.py +++ b/swh/loader/metadata/base.py @@ -1,184 +1,190 @@ # Copyright (C) 2020-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """Base module for all metadata fetchers, which are called by the Git loader to get metadata from forges on origins being loaded.""" import datetime import sys from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, Type import urllib.parse import requests from swh.model.model import ( MetadataAuthority, MetadataAuthorityType, MetadataFetcher, Origin, RawExtrinsicMetadata, ) CredentialsType = Optional[Dict[str, Dict[str, List[Dict[str, str]]]]] class InvalidOrigin(Exception): pass def now() -> datetime.datetime: # Used by tests for mocking return datetime.datetime.now(tz=datetime.timezone.utc) class BaseMetadataFetcher: """The base class for a Software Heritage metadata fetchers Fetchers are hooks used by loader to retrieve extrinsic metadata from forges before archiving repositories. Each fetcher handles a specific type of forge (not VCS); each fetcher class generally matches a lister class, as they use the same APIs. Args: origin: the origin to retrieve metadata from credentials: This is the same format as for :class:`swh.lister.pattern.Lister`: dictionary of credentials for all fetchers. The first level identifies the fetcher's name, the second level the lister instance. The final level is a list of dicts containing the expected credentials for the given instance of that fetcher. session: optional HTTP session to use to send HTTP requests """ FETCHER_NAME: str """The config-friendly name of this fetcher, used to retrieve the first level of credentials.""" SUPPORTED_LISTERS: Set[str] """Set of forge types this metadata fetcher supports. The type names are the same as the names used by listers themselves. Generally, fetchers have a one-to-one matching with listers, in which case this is set of ``{FETCHER_NAME}``. """ def __init__( self, origin: Origin, credentials: CredentialsType, lister_name: str, lister_instance_name: str, ): if self.FETCHER_NAME is None: raise NotImplementedError(f"{self.__class__.__name__}.FETCHER_NAME") self.origin = origin self._check_origin() self._origin_metadata_objects: Optional[List[RawExtrinsicMetadata]] = None self._session: Optional[requests.Session] = None # Both names do not *have* to match, but they all do for now. assert lister_name == self.FETCHER_NAME self.credentials = list( (credentials or {}).get(lister_name, {}).get(lister_instance_name, []) ) def _make_session(self) -> requests.Session: session = requests.Session() fetcher = self._metadata_fetcher() user_agent = ( f"Software Heritage Metadata Fetcher ({fetcher.name} {fetcher.version})" ) session.headers["User-Agent"] = user_agent return session def session(self) -> requests.Session: if self._session is None: self._session = self._make_session() return self._session def _check_origin(self) -> bool: """Raise :exc:`InvalidOrigin` if the origin does not belong to the supported forge types of this fetcher.""" raise NotImplementedError(f"{self.__class__.__name__}._check_origin") def _get_origin_metadata_bytes(self) -> List[Tuple[str, bytes]]: """Return pairs of ``(format, metadata)``, used to build :class:`swh.model.model.RawExtrinsicMetadata` objects.""" raise NotImplementedError( f"{self.__class__.__name__}.get_origin_metadata_bytes" ) def _metadata_authority(self) -> MetadataAuthority: """Return information about the metadata authority that issued metadata we extract from the given origin""" (scheme, netloc, *_) = urllib.parse.urlsplit(self.origin.url) assert scheme and netloc, self.origin.url # A good default that should work for most, if not all, forges forge_url = urllib.parse.urlunsplit(("https", netloc, "", "", "")) return MetadataAuthority( url=forge_url, type=MetadataAuthorityType.FORGE, ) @classmethod def _get_package_version(cls) -> str: """Return the version of the current loader.""" module_name = cls.__module__ or "" module_name_parts = module_name.split(".") # Iterate rootward through the package hierarchy until we find a parent of this # loader's module with a __version__ attribute. for prefix_size in range(len(module_name_parts), 0, -1): package_name = ".".join(module_name_parts[0:prefix_size]) module = sys.modules[package_name] if hasattr(module, "__version__"): return module.__version__ # If this fetcher's class has no parent package with a __version__, # it should implement it itself. raise NotImplementedError( f"Could not dynamically find the version of {module_name}." ) @classmethod def _metadata_fetcher(cls) -> MetadataFetcher: """Return information about this metadata fetcher""" return MetadataFetcher( name=cls.__module__, version=cls._get_package_version(), ) def get_origin_metadata(self) -> List[RawExtrinsicMetadata]: """Return a list of metadata objects for the given origin.""" if self._origin_metadata_objects is None: self._origin_metadata_objects = [] for (format_, metadata_bytes) in self._get_origin_metadata_bytes(): self._origin_metadata_objects.append( RawExtrinsicMetadata( target=self.origin.swhid(), discovery_date=now(), authority=self._metadata_authority(), fetcher=self._metadata_fetcher(), format=format_, metadata=metadata_bytes, ) ) return self._origin_metadata_objects + def get_parent_origins(self) -> List[Origin]: + """If the given origin is a "forge fork" (ie. created with the "Fork" button + of GitHub-like forges), returns a list of origins it was forked from; + closest parent first.""" + raise NotImplementedError(f"{self.__class__.__name__}.get_parent_origins") + if TYPE_CHECKING: # Makes mypy check BaseMetadataFetcher follows the MetadataFetcherProtocol def _f() -> None: from swh.loader.core.metadata_fetchers import MetadataFetcherProtocol base_metadata_fetcher: Type[MetadataFetcherProtocol] base_metadata_fetcher = BaseMetadataFetcher print(base_metadata_fetcher) del _f diff --git a/swh/loader/metadata/github.py b/swh/loader/metadata/github.py index 7b71301..b4c9837 100644 --- a/swh/loader/metadata/github.py +++ b/swh/loader/metadata/github.py @@ -1,69 +1,86 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """Metadata fetcher for GitHub.""" +import json import re from typing import List, Optional, Tuple import urllib.parse from swh.lister.github.utils import GitHubSession +from swh.model.model import Origin from . import USER_AGENT from .base import BaseMetadataFetcher, InvalidOrigin HTTP_ACCEPT = "application/vnd.github.v3+json" """HTTP header sent on all API requests to GitHub.""" # The format is defined by a well-understood MIME type; we might as well use that. METADATA_FORMAT = HTTP_ACCEPT """Value of the ``format`` field of produced :class:`swh.model.model.RawExtrinsicMetadata` objects.""" _API_URL = "https://api.github.com/repos{path}" class GitHubMetadataFetcher(BaseMetadataFetcher): FETCHER_NAME = "github" SUPPORTED_LISTERS = {"github"} _github_session: Optional[GitHubSession] = None def github_session(self) -> GitHubSession: if self._github_session is None: self._github_session = GitHubSession( user_agent=USER_AGENT, credentials=self.credentials ) return self._github_session def _check_origin(self): (scheme, netloc, path, query, fragment) = urllib.parse.urlsplit(self.origin.url) if netloc != "github.com": # TODO: relax this check when we support self-hosted GitHub instances raise InvalidOrigin(f"netloc should be 'github.com', not '{netloc}'") if scheme != "https" or not re.match(r"/[^\s/]+/[^\s/]+", path): raise InvalidOrigin(f"Unsupported github.com URL: {self.origin.url}") if query != "" or fragment != "": raise InvalidOrigin( f"Unexpected end query or fragment in github.com URL: {self.origin.url}" ) def _get_origin_metadata_bytes(self) -> List[Tuple[str, bytes]]: (scheme, netloc, path, query, fragment) = urllib.parse.urlsplit(self.origin.url) response = self.github_session().request(_API_URL.format(path=path)) if response.status_code != 200: # TODO: retry return [] metadata_bytes = response.content # TODO?: strip API hyperlinks from metadata_bytes to save space? # They take 10KB for every repo, or 1KB when compressed by the database server. # This means processing metadata_bytes and changing the format, instead of # archiving verbatim, though. return [(METADATA_FORMAT, metadata_bytes)] + + def get_parent_origins(self) -> List[Origin]: + parents = [] + for metadata in self.get_origin_metadata(): + if metadata.format != METADATA_FORMAT: + continue + data = json.loads(metadata.metadata) + parent = data.get("parent") + source = data.get("source") + if parent is not None: + parents.append(Origin(url=parent["html_url"])) + if source is not None and source["html_url"] != parent["html_url"]: + parents.append(Origin(url=source["html_url"])) + + return parents diff --git a/swh/loader/metadata/tests/data/https_api.github.com/repos_jmarlena_linguist b/swh/loader/metadata/tests/data/https_api.github.com/repos_jmarlena_linguist new file mode 100644 index 0000000..2f9826f --- /dev/null +++ b/swh/loader/metadata/tests/data/https_api.github.com/repos_jmarlena_linguist @@ -0,0 +1,324 @@ +{ + "id": 64780761, + "node_id": "MDEwOlJlcG9zaXRvcnk2NDc4MDc2MQ==", + "name": "linguist", + "full_name": "jmarlena/linguist", + "private": false, + "owner": { + "login": "jmarlena", + "id": 6732600, + "node_id": "MDQ6VXNlcjY3MzI2MDA=", + "avatar_url": "https://avatars.githubusercontent.com/u/6732600?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/jmarlena", + "html_url": "https://github.com/jmarlena", + "followers_url": "https://api.github.com/users/jmarlena/followers", + "following_url": "https://api.github.com/users/jmarlena/following{/other_user}", + "gists_url": "https://api.github.com/users/jmarlena/gists{/gist_id}", + "starred_url": "https://api.github.com/users/jmarlena/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/jmarlena/subscriptions", + "organizations_url": "https://api.github.com/users/jmarlena/orgs", + "repos_url": "https://api.github.com/users/jmarlena/repos", + "events_url": "https://api.github.com/users/jmarlena/events{/privacy}", + "received_events_url": "https://api.github.com/users/jmarlena/received_events", + "type": "User", + "site_admin": false + }, + "html_url": "https://github.com/jmarlena/linguist", + "description": "Language Savant. If your repository's language is being reported incorrectly, send us a pull request!", + "fork": true, + "url": "https://api.github.com/repos/jmarlena/linguist", + "forks_url": "https://api.github.com/repos/jmarlena/linguist/forks", + "keys_url": "https://api.github.com/repos/jmarlena/linguist/keys{/key_id}", + "collaborators_url": "https://api.github.com/repos/jmarlena/linguist/collaborators{/collaborator}", + "teams_url": "https://api.github.com/repos/jmarlena/linguist/teams", + "hooks_url": "https://api.github.com/repos/jmarlena/linguist/hooks", + "issue_events_url": "https://api.github.com/repos/jmarlena/linguist/issues/events{/number}", + "events_url": "https://api.github.com/repos/jmarlena/linguist/events", + "assignees_url": "https://api.github.com/repos/jmarlena/linguist/assignees{/user}", + "branches_url": "https://api.github.com/repos/jmarlena/linguist/branches{/branch}", + "tags_url": "https://api.github.com/repos/jmarlena/linguist/tags", + "blobs_url": "https://api.github.com/repos/jmarlena/linguist/git/blobs{/sha}", + "git_tags_url": "https://api.github.com/repos/jmarlena/linguist/git/tags{/sha}", + "git_refs_url": "https://api.github.com/repos/jmarlena/linguist/git/refs{/sha}", + "trees_url": "https://api.github.com/repos/jmarlena/linguist/git/trees{/sha}", + "statuses_url": "https://api.github.com/repos/jmarlena/linguist/statuses/{sha}", + "languages_url": "https://api.github.com/repos/jmarlena/linguist/languages", + "stargazers_url": "https://api.github.com/repos/jmarlena/linguist/stargazers", + "contributors_url": "https://api.github.com/repos/jmarlena/linguist/contributors", + "subscribers_url": "https://api.github.com/repos/jmarlena/linguist/subscribers", + "subscription_url": "https://api.github.com/repos/jmarlena/linguist/subscription", + "commits_url": "https://api.github.com/repos/jmarlena/linguist/commits{/sha}", + "git_commits_url": "https://api.github.com/repos/jmarlena/linguist/git/commits{/sha}", + "comments_url": "https://api.github.com/repos/jmarlena/linguist/comments{/number}", + "issue_comment_url": "https://api.github.com/repos/jmarlena/linguist/issues/comments{/number}", + "contents_url": "https://api.github.com/repos/jmarlena/linguist/contents/{+path}", + "compare_url": "https://api.github.com/repos/jmarlena/linguist/compare/{base}...{head}", + "merges_url": "https://api.github.com/repos/jmarlena/linguist/merges", + "archive_url": "https://api.github.com/repos/jmarlena/linguist/{archive_format}{/ref}", + "downloads_url": "https://api.github.com/repos/jmarlena/linguist/downloads", + "issues_url": "https://api.github.com/repos/jmarlena/linguist/issues{/number}", + "pulls_url": "https://api.github.com/repos/jmarlena/linguist/pulls{/number}", + "milestones_url": "https://api.github.com/repos/jmarlena/linguist/milestones{/number}", + "notifications_url": "https://api.github.com/repos/jmarlena/linguist/notifications{?since,all,participating}", + "labels_url": "https://api.github.com/repos/jmarlena/linguist/labels{/name}", + "releases_url": "https://api.github.com/repos/jmarlena/linguist/releases{/id}", + "deployments_url": "https://api.github.com/repos/jmarlena/linguist/deployments", + "created_at": "2016-08-02T18:10:46Z", + "updated_at": "2021-04-25T02:01:40Z", + "pushed_at": "2016-08-02T19:05:42Z", + "git_url": "git://github.com/jmarlena/linguist.git", + "ssh_url": "git@github.com:jmarlena/linguist.git", + "clone_url": "https://github.com/jmarlena/linguist.git", + "svn_url": "https://github.com/jmarlena/linguist", + "homepage": "", + "size": 32899, + "stargazers_count": 0, + "watchers_count": 0, + "language": "Ruby", + "has_issues": false, + "has_projects": true, + "has_downloads": true, + "has_wiki": false, + "has_pages": false, + "forks_count": 1, + "mirror_url": null, + "archived": false, + "disabled": false, + "open_issues_count": 0, + "license": { + "key": "mit", + "name": "MIT License", + "spdx_id": "MIT", + "url": "https://api.github.com/licenses/mit", + "node_id": "MDc6TGljZW5zZTEz" + }, + "allow_forking": true, + "is_template": false, + "topics": [ + + ], + "visibility": "public", + "forks": 1, + "open_issues": 0, + "watchers": 0, + "default_branch": "master", + "temp_clone_token": null, + "parent": { + "id": 64778136, + "node_id": "MDEwOlJlcG9zaXRvcnk2NDc3ODEzNg==", + "name": "linguist", + "full_name": "octocat/linguist", + "private": false, + "owner": { + "login": "octocat", + "id": 583231, + "node_id": "MDQ6VXNlcjU4MzIzMQ==", + "avatar_url": "https://avatars.githubusercontent.com/u/583231?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/octocat", + "html_url": "https://github.com/octocat", + "followers_url": "https://api.github.com/users/octocat/followers", + "following_url": "https://api.github.com/users/octocat/following{/other_user}", + "gists_url": "https://api.github.com/users/octocat/gists{/gist_id}", + "starred_url": "https://api.github.com/users/octocat/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/octocat/subscriptions", + "organizations_url": "https://api.github.com/users/octocat/orgs", + "repos_url": "https://api.github.com/users/octocat/repos", + "events_url": "https://api.github.com/users/octocat/events{/privacy}", + "received_events_url": "https://api.github.com/users/octocat/received_events", + "type": "User", + "site_admin": false + }, + "html_url": "https://github.com/octocat/linguist", + "description": "Language Savant. If your repository's language is being reported incorrectly, send us a pull request!", + "fork": true, + "url": "https://api.github.com/repos/octocat/linguist", + "forks_url": "https://api.github.com/repos/octocat/linguist/forks", + "keys_url": "https://api.github.com/repos/octocat/linguist/keys{/key_id}", + "collaborators_url": "https://api.github.com/repos/octocat/linguist/collaborators{/collaborator}", + "teams_url": "https://api.github.com/repos/octocat/linguist/teams", + "hooks_url": "https://api.github.com/repos/octocat/linguist/hooks", + "issue_events_url": "https://api.github.com/repos/octocat/linguist/issues/events{/number}", + "events_url": "https://api.github.com/repos/octocat/linguist/events", + "assignees_url": "https://api.github.com/repos/octocat/linguist/assignees{/user}", + "branches_url": "https://api.github.com/repos/octocat/linguist/branches{/branch}", + "tags_url": "https://api.github.com/repos/octocat/linguist/tags", + "blobs_url": "https://api.github.com/repos/octocat/linguist/git/blobs{/sha}", + "git_tags_url": "https://api.github.com/repos/octocat/linguist/git/tags{/sha}", + "git_refs_url": "https://api.github.com/repos/octocat/linguist/git/refs{/sha}", + "trees_url": "https://api.github.com/repos/octocat/linguist/git/trees{/sha}", + "statuses_url": "https://api.github.com/repos/octocat/linguist/statuses/{sha}", + "languages_url": "https://api.github.com/repos/octocat/linguist/languages", + "stargazers_url": "https://api.github.com/repos/octocat/linguist/stargazers", + "contributors_url": "https://api.github.com/repos/octocat/linguist/contributors", + "subscribers_url": "https://api.github.com/repos/octocat/linguist/subscribers", + "subscription_url": "https://api.github.com/repos/octocat/linguist/subscription", + "commits_url": "https://api.github.com/repos/octocat/linguist/commits{/sha}", + "git_commits_url": "https://api.github.com/repos/octocat/linguist/git/commits{/sha}", + "comments_url": "https://api.github.com/repos/octocat/linguist/comments{/number}", + "issue_comment_url": "https://api.github.com/repos/octocat/linguist/issues/comments{/number}", + "contents_url": "https://api.github.com/repos/octocat/linguist/contents/{+path}", + "compare_url": "https://api.github.com/repos/octocat/linguist/compare/{base}...{head}", + "merges_url": "https://api.github.com/repos/octocat/linguist/merges", + "archive_url": "https://api.github.com/repos/octocat/linguist/{archive_format}{/ref}", + "downloads_url": "https://api.github.com/repos/octocat/linguist/downloads", + "issues_url": "https://api.github.com/repos/octocat/linguist/issues{/number}", + "pulls_url": "https://api.github.com/repos/octocat/linguist/pulls{/number}", + "milestones_url": "https://api.github.com/repos/octocat/linguist/milestones{/number}", + "notifications_url": "https://api.github.com/repos/octocat/linguist/notifications{?since,all,participating}", + "labels_url": "https://api.github.com/repos/octocat/linguist/labels{/name}", + "releases_url": "https://api.github.com/repos/octocat/linguist/releases{/id}", + "deployments_url": "https://api.github.com/repos/octocat/linguist/deployments", + "created_at": "2016-08-02T17:35:14Z", + "updated_at": "2022-04-26T12:31:02Z", + "pushed_at": "2022-04-26T09:28:02Z", + "git_url": "git://github.com/octocat/linguist.git", + "ssh_url": "git@github.com:octocat/linguist.git", + "clone_url": "https://github.com/octocat/linguist.git", + "svn_url": "https://github.com/octocat/linguist", + "homepage": "", + "size": 32899, + "stargazers_count": 119, + "watchers_count": 119, + "language": "Ruby", + "has_issues": false, + "has_projects": true, + "has_downloads": true, + "has_wiki": false, + "has_pages": false, + "forks_count": 129, + "mirror_url": null, + "archived": false, + "disabled": false, + "open_issues_count": 10, + "license": { + "key": "mit", + "name": "MIT License", + "spdx_id": "MIT", + "url": "https://api.github.com/licenses/mit", + "node_id": "MDc6TGljZW5zZTEz" + }, + "allow_forking": true, + "is_template": false, + "topics": [ + + ], + "visibility": "public", + "forks": 129, + "open_issues": 10, + "watchers": 119, + "default_branch": "master" + }, + "source": { + "id": 1725199, + "node_id": "MDEwOlJlcG9zaXRvcnkxNzI1MTk5", + "name": "linguist", + "full_name": "github/linguist", + "private": false, + "owner": { + "login": "github", + "id": 9919, + "node_id": "MDEyOk9yZ2FuaXphdGlvbjk5MTk=", + "avatar_url": "https://avatars.githubusercontent.com/u/9919?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/github", + "html_url": "https://github.com/github", + "followers_url": "https://api.github.com/users/github/followers", + "following_url": "https://api.github.com/users/github/following{/other_user}", + "gists_url": "https://api.github.com/users/github/gists{/gist_id}", + "starred_url": "https://api.github.com/users/github/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/github/subscriptions", + "organizations_url": "https://api.github.com/users/github/orgs", + "repos_url": "https://api.github.com/users/github/repos", + "events_url": "https://api.github.com/users/github/events{/privacy}", + "received_events_url": "https://api.github.com/users/github/received_events", + "type": "Organization", + "site_admin": false + }, + "html_url": "https://github.com/github/linguist", + "description": "Language Savant. If your repository's language is being reported incorrectly, send us a pull request!", + "fork": false, + "url": "https://api.github.com/repos/github/linguist", + "forks_url": "https://api.github.com/repos/github/linguist/forks", + "keys_url": "https://api.github.com/repos/github/linguist/keys{/key_id}", + "collaborators_url": "https://api.github.com/repos/github/linguist/collaborators{/collaborator}", + "teams_url": "https://api.github.com/repos/github/linguist/teams", + "hooks_url": "https://api.github.com/repos/github/linguist/hooks", + "issue_events_url": "https://api.github.com/repos/github/linguist/issues/events{/number}", + "events_url": "https://api.github.com/repos/github/linguist/events", + "assignees_url": "https://api.github.com/repos/github/linguist/assignees{/user}", + "branches_url": "https://api.github.com/repos/github/linguist/branches{/branch}", + "tags_url": "https://api.github.com/repos/github/linguist/tags", + "blobs_url": "https://api.github.com/repos/github/linguist/git/blobs{/sha}", + "git_tags_url": "https://api.github.com/repos/github/linguist/git/tags{/sha}", + "git_refs_url": "https://api.github.com/repos/github/linguist/git/refs{/sha}", + "trees_url": "https://api.github.com/repos/github/linguist/git/trees{/sha}", + "statuses_url": "https://api.github.com/repos/github/linguist/statuses/{sha}", + "languages_url": "https://api.github.com/repos/github/linguist/languages", + "stargazers_url": "https://api.github.com/repos/github/linguist/stargazers", + "contributors_url": "https://api.github.com/repos/github/linguist/contributors", + "subscribers_url": "https://api.github.com/repos/github/linguist/subscribers", + "subscription_url": "https://api.github.com/repos/github/linguist/subscription", + "commits_url": "https://api.github.com/repos/github/linguist/commits{/sha}", + "git_commits_url": "https://api.github.com/repos/github/linguist/git/commits{/sha}", + "comments_url": "https://api.github.com/repos/github/linguist/comments{/number}", + "issue_comment_url": "https://api.github.com/repos/github/linguist/issues/comments{/number}", + "contents_url": "https://api.github.com/repos/github/linguist/contents/{+path}", + "compare_url": "https://api.github.com/repos/github/linguist/compare/{base}...{head}", + "merges_url": "https://api.github.com/repos/github/linguist/merges", + "archive_url": "https://api.github.com/repos/github/linguist/{archive_format}{/ref}", + "downloads_url": "https://api.github.com/repos/github/linguist/downloads", + "issues_url": "https://api.github.com/repos/github/linguist/issues{/number}", + "pulls_url": "https://api.github.com/repos/github/linguist/pulls{/number}", + "milestones_url": "https://api.github.com/repos/github/linguist/milestones{/number}", + "notifications_url": "https://api.github.com/repos/github/linguist/notifications{?since,all,participating}", + "labels_url": "https://api.github.com/repos/github/linguist/labels{/name}", + "releases_url": "https://api.github.com/repos/github/linguist/releases{/id}", + "deployments_url": "https://api.github.com/repos/github/linguist/deployments", + "created_at": "2011-05-09T22:53:13Z", + "updated_at": "2022-04-26T13:05:32Z", + "pushed_at": "2022-04-26T10:27:23Z", + "git_url": "git://github.com/github/linguist.git", + "ssh_url": "git@github.com:github/linguist.git", + "clone_url": "https://github.com/github/linguist.git", + "svn_url": "https://github.com/github/linguist", + "homepage": "", + "size": 38000, + "stargazers_count": 9750, + "watchers_count": 9750, + "language": "Ruby", + "has_issues": true, + "has_projects": false, + "has_downloads": true, + "has_wiki": false, + "has_pages": false, + "forks_count": 3538, + "mirror_url": null, + "archived": false, + "disabled": false, + "open_issues_count": 106, + "license": { + "key": "mit", + "name": "MIT License", + "spdx_id": "MIT", + "url": "https://api.github.com/licenses/mit", + "node_id": "MDc6TGljZW5zZTEz" + }, + "allow_forking": true, + "is_template": false, + "topics": [ + "language-grammars", + "language-statistics", + "linguistic", + "syntax-highlighting" + ], + "visibility": "public", + "forks": 3538, + "open_issues": 106, + "watchers": 9750, + "default_branch": "master" + }, + "network_count": 3538, + "subscribers_count": 1 +} diff --git a/swh/loader/metadata/tests/data/https_api.github.com/repos_octocat_linguist b/swh/loader/metadata/tests/data/https_api.github.com/repos_octocat_linguist new file mode 100644 index 0000000..10a5809 --- /dev/null +++ b/swh/loader/metadata/tests/data/https_api.github.com/repos_octocat_linguist @@ -0,0 +1,327 @@ +{ + "id": 64778136, + "node_id": "MDEwOlJlcG9zaXRvcnk2NDc3ODEzNg==", + "name": "linguist", + "full_name": "octocat/linguist", + "private": false, + "owner": { + "login": "octocat", + "id": 583231, + "node_id": "MDQ6VXNlcjU4MzIzMQ==", + "avatar_url": "https://avatars.githubusercontent.com/u/583231?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/octocat", + "html_url": "https://github.com/octocat", + "followers_url": "https://api.github.com/users/octocat/followers", + "following_url": "https://api.github.com/users/octocat/following{/other_user}", + "gists_url": "https://api.github.com/users/octocat/gists{/gist_id}", + "starred_url": "https://api.github.com/users/octocat/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/octocat/subscriptions", + "organizations_url": "https://api.github.com/users/octocat/orgs", + "repos_url": "https://api.github.com/users/octocat/repos", + "events_url": "https://api.github.com/users/octocat/events{/privacy}", + "received_events_url": "https://api.github.com/users/octocat/received_events", + "type": "User", + "site_admin": false + }, + "html_url": "https://github.com/octocat/linguist", + "description": "Language Savant. If your repository's language is being reported incorrectly, send us a pull request!", + "fork": true, + "url": "https://api.github.com/repos/octocat/linguist", + "forks_url": "https://api.github.com/repos/octocat/linguist/forks", + "keys_url": "https://api.github.com/repos/octocat/linguist/keys{/key_id}", + "collaborators_url": "https://api.github.com/repos/octocat/linguist/collaborators{/collaborator}", + "teams_url": "https://api.github.com/repos/octocat/linguist/teams", + "hooks_url": "https://api.github.com/repos/octocat/linguist/hooks", + "issue_events_url": "https://api.github.com/repos/octocat/linguist/issues/events{/number}", + "events_url": "https://api.github.com/repos/octocat/linguist/events", + "assignees_url": "https://api.github.com/repos/octocat/linguist/assignees{/user}", + "branches_url": "https://api.github.com/repos/octocat/linguist/branches{/branch}", + "tags_url": "https://api.github.com/repos/octocat/linguist/tags", + "blobs_url": "https://api.github.com/repos/octocat/linguist/git/blobs{/sha}", + "git_tags_url": "https://api.github.com/repos/octocat/linguist/git/tags{/sha}", + "git_refs_url": "https://api.github.com/repos/octocat/linguist/git/refs{/sha}", + "trees_url": "https://api.github.com/repos/octocat/linguist/git/trees{/sha}", + "statuses_url": "https://api.github.com/repos/octocat/linguist/statuses/{sha}", + "languages_url": "https://api.github.com/repos/octocat/linguist/languages", + "stargazers_url": "https://api.github.com/repos/octocat/linguist/stargazers", + "contributors_url": "https://api.github.com/repos/octocat/linguist/contributors", + "subscribers_url": "https://api.github.com/repos/octocat/linguist/subscribers", + "subscription_url": "https://api.github.com/repos/octocat/linguist/subscription", + "commits_url": "https://api.github.com/repos/octocat/linguist/commits{/sha}", + "git_commits_url": "https://api.github.com/repos/octocat/linguist/git/commits{/sha}", + "comments_url": "https://api.github.com/repos/octocat/linguist/comments{/number}", + "issue_comment_url": "https://api.github.com/repos/octocat/linguist/issues/comments{/number}", + "contents_url": "https://api.github.com/repos/octocat/linguist/contents/{+path}", + "compare_url": "https://api.github.com/repos/octocat/linguist/compare/{base}...{head}", + "merges_url": "https://api.github.com/repos/octocat/linguist/merges", + "archive_url": "https://api.github.com/repos/octocat/linguist/{archive_format}{/ref}", + "downloads_url": "https://api.github.com/repos/octocat/linguist/downloads", + "issues_url": "https://api.github.com/repos/octocat/linguist/issues{/number}", + "pulls_url": "https://api.github.com/repos/octocat/linguist/pulls{/number}", + "milestones_url": "https://api.github.com/repos/octocat/linguist/milestones{/number}", + "notifications_url": "https://api.github.com/repos/octocat/linguist/notifications{?since,all,participating}", + "labels_url": "https://api.github.com/repos/octocat/linguist/labels{/name}", + "releases_url": "https://api.github.com/repos/octocat/linguist/releases{/id}", + "deployments_url": "https://api.github.com/repos/octocat/linguist/deployments", + "created_at": "2016-08-02T17:35:14Z", + "updated_at": "2022-04-23T17:19:19Z", + "pushed_at": "2022-04-26T09:28:02Z", + "git_url": "git://github.com/octocat/linguist.git", + "ssh_url": "git@github.com:octocat/linguist.git", + "clone_url": "https://github.com/octocat/linguist.git", + "svn_url": "https://github.com/octocat/linguist", + "homepage": "", + "size": 32899, + "stargazers_count": 120, + "watchers_count": 120, + "language": "Ruby", + "has_issues": false, + "has_projects": true, + "has_downloads": true, + "has_wiki": false, + "has_pages": false, + "forks_count": 129, + "mirror_url": null, + "archived": false, + "disabled": false, + "open_issues_count": 10, + "license": { + "key": "mit", + "name": "MIT License", + "spdx_id": "MIT", + "url": "https://api.github.com/licenses/mit", + "node_id": "MDc6TGljZW5zZTEz" + }, + "allow_forking": true, + "is_template": false, + "topics": [ + + ], + "visibility": "public", + "forks": 129, + "open_issues": 10, + "watchers": 120, + "default_branch": "master", + "temp_clone_token": null, + "parent": { + "id": 1725199, + "node_id": "MDEwOlJlcG9zaXRvcnkxNzI1MTk5", + "name": "linguist", + "full_name": "github/linguist", + "private": false, + "owner": { + "login": "github", + "id": 9919, + "node_id": "MDEyOk9yZ2FuaXphdGlvbjk5MTk=", + "avatar_url": "https://avatars.githubusercontent.com/u/9919?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/github", + "html_url": "https://github.com/github", + "followers_url": "https://api.github.com/users/github/followers", + "following_url": "https://api.github.com/users/github/following{/other_user}", + "gists_url": "https://api.github.com/users/github/gists{/gist_id}", + "starred_url": "https://api.github.com/users/github/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/github/subscriptions", + "organizations_url": "https://api.github.com/users/github/orgs", + "repos_url": "https://api.github.com/users/github/repos", + "events_url": "https://api.github.com/users/github/events{/privacy}", + "received_events_url": "https://api.github.com/users/github/received_events", + "type": "Organization", + "site_admin": false + }, + "html_url": "https://github.com/github/linguist", + "description": "Language Savant. If your repository's language is being reported incorrectly, send us a pull request!", + "fork": false, + "url": "https://api.github.com/repos/github/linguist", + "forks_url": "https://api.github.com/repos/github/linguist/forks", + "keys_url": "https://api.github.com/repos/github/linguist/keys{/key_id}", + "collaborators_url": "https://api.github.com/repos/github/linguist/collaborators{/collaborator}", + "teams_url": "https://api.github.com/repos/github/linguist/teams", + "hooks_url": "https://api.github.com/repos/github/linguist/hooks", + "issue_events_url": "https://api.github.com/repos/github/linguist/issues/events{/number}", + "events_url": "https://api.github.com/repos/github/linguist/events", + "assignees_url": "https://api.github.com/repos/github/linguist/assignees{/user}", + "branches_url": "https://api.github.com/repos/github/linguist/branches{/branch}", + "tags_url": "https://api.github.com/repos/github/linguist/tags", + "blobs_url": "https://api.github.com/repos/github/linguist/git/blobs{/sha}", + "git_tags_url": "https://api.github.com/repos/github/linguist/git/tags{/sha}", + "git_refs_url": "https://api.github.com/repos/github/linguist/git/refs{/sha}", + "trees_url": "https://api.github.com/repos/github/linguist/git/trees{/sha}", + "statuses_url": "https://api.github.com/repos/github/linguist/statuses/{sha}", + "languages_url": "https://api.github.com/repos/github/linguist/languages", + "stargazers_url": "https://api.github.com/repos/github/linguist/stargazers", + "contributors_url": "https://api.github.com/repos/github/linguist/contributors", + "subscribers_url": "https://api.github.com/repos/github/linguist/subscribers", + "subscription_url": "https://api.github.com/repos/github/linguist/subscription", + "commits_url": "https://api.github.com/repos/github/linguist/commits{/sha}", + "git_commits_url": "https://api.github.com/repos/github/linguist/git/commits{/sha}", + "comments_url": "https://api.github.com/repos/github/linguist/comments{/number}", + "issue_comment_url": "https://api.github.com/repos/github/linguist/issues/comments{/number}", + "contents_url": "https://api.github.com/repos/github/linguist/contents/{+path}", + "compare_url": "https://api.github.com/repos/github/linguist/compare/{base}...{head}", + "merges_url": "https://api.github.com/repos/github/linguist/merges", + "archive_url": "https://api.github.com/repos/github/linguist/{archive_format}{/ref}", + "downloads_url": "https://api.github.com/repos/github/linguist/downloads", + "issues_url": "https://api.github.com/repos/github/linguist/issues{/number}", + "pulls_url": "https://api.github.com/repos/github/linguist/pulls{/number}", + "milestones_url": "https://api.github.com/repos/github/linguist/milestones{/number}", + "notifications_url": "https://api.github.com/repos/github/linguist/notifications{?since,all,participating}", + "labels_url": "https://api.github.com/repos/github/linguist/labels{/name}", + "releases_url": "https://api.github.com/repos/github/linguist/releases{/id}", + "deployments_url": "https://api.github.com/repos/github/linguist/deployments", + "created_at": "2011-05-09T22:53:13Z", + "updated_at": "2022-04-26T08:44:55Z", + "pushed_at": "2022-04-26T10:27:23Z", + "git_url": "git://github.com/github/linguist.git", + "ssh_url": "git@github.com:github/linguist.git", + "clone_url": "https://github.com/github/linguist.git", + "svn_url": "https://github.com/github/linguist", + "homepage": "", + "size": 38000, + "stargazers_count": 9751, + "watchers_count": 9751, + "language": "Ruby", + "has_issues": true, + "has_projects": false, + "has_downloads": true, + "has_wiki": false, + "has_pages": false, + "forks_count": 3539, + "mirror_url": null, + "archived": false, + "disabled": false, + "open_issues_count": 107, + "license": { + "key": "mit", + "name": "MIT License", + "spdx_id": "MIT", + "url": "https://api.github.com/licenses/mit", + "node_id": "MDc6TGljZW5zZTEz" + }, + "allow_forking": true, + "is_template": false, + "topics": [ + "language-grammars", + "language-statistics", + "linguistic", + "syntax-highlighting" + ], + "visibility": "public", + "forks": 3539, + "open_issues": 107, + "watchers": 9751, + "default_branch": "master" + }, + "source": { + "id": 1725199, + "node_id": "MDEwOlJlcG9zaXRvcnkxNzI1MTk5", + "name": "linguist", + "full_name": "github/linguist", + "private": false, + "owner": { + "login": "github", + "id": 9919, + "node_id": "MDEyOk9yZ2FuaXphdGlvbjk5MTk=", + "avatar_url": "https://avatars.githubusercontent.com/u/9919?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/github", + "html_url": "https://github.com/github", + "followers_url": "https://api.github.com/users/github/followers", + "following_url": "https://api.github.com/users/github/following{/other_user}", + "gists_url": "https://api.github.com/users/github/gists{/gist_id}", + "starred_url": "https://api.github.com/users/github/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/github/subscriptions", + "organizations_url": "https://api.github.com/users/github/orgs", + "repos_url": "https://api.github.com/users/github/repos", + "events_url": "https://api.github.com/users/github/events{/privacy}", + "received_events_url": "https://api.github.com/users/github/received_events", + "type": "Organization", + "site_admin": false + }, + "html_url": "https://github.com/github/linguist", + "description": "Language Savant. If your repository's language is being reported incorrectly, send us a pull request!", + "fork": false, + "url": "https://api.github.com/repos/github/linguist", + "forks_url": "https://api.github.com/repos/github/linguist/forks", + "keys_url": "https://api.github.com/repos/github/linguist/keys{/key_id}", + "collaborators_url": "https://api.github.com/repos/github/linguist/collaborators{/collaborator}", + "teams_url": "https://api.github.com/repos/github/linguist/teams", + "hooks_url": "https://api.github.com/repos/github/linguist/hooks", + "issue_events_url": "https://api.github.com/repos/github/linguist/issues/events{/number}", + "events_url": "https://api.github.com/repos/github/linguist/events", + "assignees_url": "https://api.github.com/repos/github/linguist/assignees{/user}", + "branches_url": "https://api.github.com/repos/github/linguist/branches{/branch}", + "tags_url": "https://api.github.com/repos/github/linguist/tags", + "blobs_url": "https://api.github.com/repos/github/linguist/git/blobs{/sha}", + "git_tags_url": "https://api.github.com/repos/github/linguist/git/tags{/sha}", + "git_refs_url": "https://api.github.com/repos/github/linguist/git/refs{/sha}", + "trees_url": "https://api.github.com/repos/github/linguist/git/trees{/sha}", + "statuses_url": "https://api.github.com/repos/github/linguist/statuses/{sha}", + "languages_url": "https://api.github.com/repos/github/linguist/languages", + "stargazers_url": "https://api.github.com/repos/github/linguist/stargazers", + "contributors_url": "https://api.github.com/repos/github/linguist/contributors", + "subscribers_url": "https://api.github.com/repos/github/linguist/subscribers", + "subscription_url": "https://api.github.com/repos/github/linguist/subscription", + "commits_url": "https://api.github.com/repos/github/linguist/commits{/sha}", + "git_commits_url": "https://api.github.com/repos/github/linguist/git/commits{/sha}", + "comments_url": "https://api.github.com/repos/github/linguist/comments{/number}", + "issue_comment_url": "https://api.github.com/repos/github/linguist/issues/comments{/number}", + "contents_url": "https://api.github.com/repos/github/linguist/contents/{+path}", + "compare_url": "https://api.github.com/repos/github/linguist/compare/{base}...{head}", + "merges_url": "https://api.github.com/repos/github/linguist/merges", + "archive_url": "https://api.github.com/repos/github/linguist/{archive_format}{/ref}", + "downloads_url": "https://api.github.com/repos/github/linguist/downloads", + "issues_url": "https://api.github.com/repos/github/linguist/issues{/number}", + "pulls_url": "https://api.github.com/repos/github/linguist/pulls{/number}", + "milestones_url": "https://api.github.com/repos/github/linguist/milestones{/number}", + "notifications_url": "https://api.github.com/repos/github/linguist/notifications{?since,all,participating}", + "labels_url": "https://api.github.com/repos/github/linguist/labels{/name}", + "releases_url": "https://api.github.com/repos/github/linguist/releases{/id}", + "deployments_url": "https://api.github.com/repos/github/linguist/deployments", + "created_at": "2011-05-09T22:53:13Z", + "updated_at": "2022-04-26T08:44:55Z", + "pushed_at": "2022-04-26T10:27:23Z", + "git_url": "git://github.com/github/linguist.git", + "ssh_url": "git@github.com:github/linguist.git", + "clone_url": "https://github.com/github/linguist.git", + "svn_url": "https://github.com/github/linguist", + "homepage": "", + "size": 38000, + "stargazers_count": 9751, + "watchers_count": 9751, + "language": "Ruby", + "has_issues": true, + "has_projects": false, + "has_downloads": true, + "has_wiki": false, + "has_pages": false, + "forks_count": 3539, + "mirror_url": null, + "archived": false, + "disabled": false, + "open_issues_count": 107, + "license": { + "key": "mit", + "name": "MIT License", + "spdx_id": "MIT", + "url": "https://api.github.com/licenses/mit", + "node_id": "MDc6TGljZW5zZTEz" + }, + "allow_forking": true, + "is_template": false, + "topics": [ + "language-grammars", + "language-statistics", + "linguistic", + "syntax-highlighting" + ], + "visibility": "public", + "forks": 3539, + "open_issues": 107, + "watchers": 9751, + "default_branch": "master" + }, + "network_count": 3539, + "subscribers_count": 29 +} diff --git a/swh/loader/metadata/tests/test_base.py b/swh/loader/metadata/tests/test_base.py index 664b3af..e947e8a 100644 --- a/swh/loader/metadata/tests/test_base.py +++ b/swh/loader/metadata/tests/test_base.py @@ -1,108 +1,111 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import datetime import pytest from swh.loader.core.loader import BaseLoader from swh.loader.metadata.base import BaseMetadataFetcher from swh.model.model import ( MetadataAuthority, MetadataAuthorityType, MetadataFetcher, Origin, RawExtrinsicMetadata, ) import swh.storage.exc ORIGIN = Origin(url="some-url") METADATA_AUTHORITY = MetadataAuthority( type=MetadataAuthorityType.FORGE, url="http://example.org/" ) REMD = RawExtrinsicMetadata( target=ORIGIN.swhid(), discovery_date=datetime.datetime.now(tz=datetime.timezone.utc), authority=METADATA_AUTHORITY, fetcher=MetadataFetcher( name="test fetcher", version="0.0.1", ), format="test-format", metadata=b'{"foo": "bar"}', ) class DummyLoader(BaseLoader): """Base Loader to overload and simplify the base class (technical: to avoid repetition in other *Loader classes)""" visit_type = "git" def __init__(self, storage, *args, **kwargs): super().__init__(storage, *args, **kwargs) def cleanup(self): pass def prepare(self, *args, **kwargs): pass def fetch_data(self): pass def get_snapshot_id(self): return None def store_data(self): pass class DummyMetadataFetcher(BaseMetadataFetcher): SUPPORTED_LISTERS = {"fake-lister"} def __init__(self, origin, credentials, lister_name, lister_instance_name): pass def get_origin_metadata(self): return [REMD] + def get_parent_origins(self): + return [] + def test_load(swh_storage, mocker): mocker.patch( "swh.loader.core.metadata_fetchers._fetchers", return_value=[DummyMetadataFetcher], ) loader = DummyLoader( storage=swh_storage, origin_url=ORIGIN.url, lister_name="fake-lister", lister_instance_name="", ) loader.load() assert swh_storage.raw_extrinsic_metadata_get( ORIGIN.swhid(), METADATA_AUTHORITY ).results == [REMD] def test_load_unknown_lister(swh_storage, mocker): mocker.patch( "swh.loader.core.metadata_fetchers._fetchers", return_value=[DummyMetadataFetcher], ) loader = DummyLoader( storage=swh_storage, origin_url=ORIGIN.url, lister_name="other-lister", lister_instance_name="", ) loader.load() with pytest.raises(swh.storage.exc.StorageArgumentException): swh_storage.raw_extrinsic_metadata_get(ORIGIN.swhid(), METADATA_AUTHORITY) diff --git a/swh/loader/metadata/tests/test_github.py b/swh/loader/metadata/tests/test_github.py index 4063078..48d8832 100644 --- a/swh/loader/metadata/tests/test_github.py +++ b/swh/loader/metadata/tests/test_github.py @@ -1,95 +1,128 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import datetime from pathlib import Path from typing import Type import pkg_resources from swh.loader.core.metadata_fetchers import MetadataFetcherProtocol from swh.loader.metadata import __version__ from swh.loader.metadata.github import GitHubMetadataFetcher from swh.model.model import ( MetadataAuthority, MetadataAuthorityType, MetadataFetcher, Origin, RawExtrinsicMetadata, ) from .test_base import DummyLoader ORIGIN = Origin("https://github.com/octocat/Hello-World") +FORKED_ORIGIN = Origin("https://github.com/octocat/linguist") +DOUBLE_FORKED_ORIGIN = Origin("https://github.com/jmarlena/linguist") METADATA_AUTHORITY = MetadataAuthority( type=MetadataAuthorityType.FORGE, url="https://github.com" ) def expected_metadata(dt, datadir): data_file_path = Path(datadir) / "https_api.github.com/repos_octocat_Hello-World" with data_file_path.open("rb") as fd: expected_metadata_bytes = fd.read() return RawExtrinsicMetadata( target=ORIGIN.swhid(), discovery_date=dt, authority=METADATA_AUTHORITY, fetcher=MetadataFetcher(name="swh.loader.metadata.github", version=__version__), format="application/vnd.github.v3+json", metadata=expected_metadata_bytes, ) def test_type() -> None: # check with mypy fetcher_cls: Type[MetadataFetcherProtocol] fetcher_cls = GitHubMetadataFetcher print(fetcher_cls) # check at runtime fetcher = GitHubMetadataFetcher( ORIGIN, credentials=None, lister_name="github", lister_instance_name="", ) assert isinstance(fetcher, MetadataFetcherProtocol) def test_github_metadata(datadir, requests_mock_datadir, mocker): now = datetime.datetime.now(tz=datetime.timezone.utc) mocker.patch("swh.loader.metadata.base.now", return_value=now) fetcher = GitHubMetadataFetcher( ORIGIN, credentials=None, lister_name="github", lister_instance_name="" ) assert fetcher.get_origin_metadata() == [expected_metadata(now, datadir)] + assert fetcher.get_parent_origins() == [] + + +def test_github_metadata_fork(datadir, requests_mock_datadir, mocker): + now = datetime.datetime.now(tz=datetime.timezone.utc) + mocker.patch("swh.loader.metadata.base.now", return_value=now) + + fetcher = GitHubMetadataFetcher( + FORKED_ORIGIN, credentials=None, lister_name="github", lister_instance_name="" + ) + + assert fetcher.get_parent_origins() == [ + Origin(url="https://github.com/github/linguist"), + ] + + +def test_github_metadata_fork_of_fork(datadir, requests_mock_datadir, mocker): + now = datetime.datetime.now(tz=datetime.timezone.utc) + mocker.patch("swh.loader.metadata.base.now", return_value=now) + + fetcher = GitHubMetadataFetcher( + DOUBLE_FORKED_ORIGIN, + credentials=None, + lister_name="github", + lister_instance_name="", + ) + + assert fetcher.get_parent_origins() == [ + Origin(url="https://github.com/octocat/linguist"), + Origin(url="https://github.com/github/linguist"), + ] def test_github_metadata_from_loader( swh_storage, mocker, datadir, requests_mock_datadir ): # Fail early if this package is not fully installed assert "github" in { entry_point.name for entry_point in pkg_resources.iter_entry_points("swh.loader.metadata") } now = datetime.datetime.now(tz=datetime.timezone.utc) mocker.patch("swh.loader.metadata.base.now", return_value=now) loader = DummyLoader( storage=swh_storage, origin_url=ORIGIN.url, lister_name="github", lister_instance_name="", ) loader.load() assert swh_storage.raw_extrinsic_metadata_get( ORIGIN.swhid(), METADATA_AUTHORITY ).results == [expected_metadata(now, datadir)]