Page MenuHomeSoftware Heritage

D7633.diff
No OneTemporary

D7633.diff

diff --git a/swh/foo/bar.py b/conftest.py
rename from swh/foo/bar.py
rename to conftest.py
--- a/swh/foo/bar.py
+++ b/conftest.py
@@ -1,4 +1,9 @@
-# Copyright (C) 2019 The Software Heritage developers
+# Copyright (C) 2019-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+
+pytest_plugins = [
+ "swh.storage.pytest_plugin",
+ "swh.loader.pytest_plugin",
+]
diff --git a/docs/README.rst b/docs/README.rst
--- a/docs/README.rst
+++ b/docs/README.rst
@@ -1,4 +1,5 @@
-Software Heritage - Python module template
-==========================================
+Software Heritage - Metadata Fetchers
+=====================================
-Python module template, used as skeleton to create new modules.
+This package hooks into the loaders, and loads extrinsic metadata at the same time
+code artifacts are retrieved.
diff --git a/docs/index.rst b/docs/index.rst
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -1,4 +1,4 @@
-.. _swh-py-template:
+.. _swh-loader-metadata:
.. include:: README.rst
diff --git a/mypy.ini b/mypy.ini
--- a/mypy.ini
+++ b/mypy.ini
@@ -5,6 +5,9 @@
# 3rd party libraries without stubs (yet)
+[mypy-iso8601.*]
+ignore_missing_imports = True
+
[mypy-pkg_resources.*]
ignore_missing_imports = True
diff --git a/requirements-swh.txt b/requirements-swh.txt
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -1,2 +1,4 @@
-# Add here internal Software Heritage dependencies, one per line.
swh.core[http] >= 0.3 # [http] is required by swh.core.pytest_plugin
+swh.lister >= 2.9.0
+swh.loader.core >= 3.1.0
+swh.storage >= 0.29.0
diff --git a/requirements-test.txt b/requirements-test.txt
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -1 +1,7 @@
pytest
+pytest-mock
+requests_mock
+types-requests
+types-PyYAML
+
+swh-storage[testing]
diff --git a/setup.py b/setup.py
--- a/setup.py
+++ b/setup.py
@@ -36,18 +36,15 @@
return requirements
-# Edit this part to match your module, replace foo by its name
-# Full sample:
-# https://forge.softwareheritage.org/diffusion/DCORE/browse/master/setup.py
setup(
- name="swh.foo", # example: swh.loader.pypi
- description="Software Heritage <Module's intent>",
+ name="swh.loader.metadata",
+ description="Software Heritage Extrinsic Metadata Fetchers",
long_description=long_description,
long_description_content_type="text/x-rst",
python_requires=">=3.7",
author="Software Heritage developers",
author_email="swh-devel@inria.fr",
- url="https://forge.softwareheritage.org/diffusion/<module-git-code>",
+ url="https://forge.softwareheritage.org/diffusion/swh-loader-metadata",
packages=find_packages(), # packages's modules
install_requires=parse_requirements(None, "swh"),
tests_require=parse_requirements("test"),
@@ -56,8 +53,8 @@
extras_require={"testing": parse_requirements("test")},
include_package_data=True,
entry_points="""
- [swh.cli.subcommands]
- foo=swh.foo.cli
+ [swh.loader.metadata]
+ github=swh.loader.metadata.github:GitHubMetadataFetcher
""",
classifiers=[
"Programming Language :: Python :: 3",
@@ -69,7 +66,7 @@
project_urls={
"Bug Reports": "https://forge.softwareheritage.org/maniphest",
"Funding": "https://www.softwareheritage.org/donate",
- "Source": "https://forge.softwareheritage.org/source/swh-<module>",
- "Documentation": "https://docs.softwareheritage.org/devel/swh-<module>/",
+ "Source": "https://forge.softwareheritage.org/source/swh-loader-metadata",
+ "Documentation": "https://docs.softwareheritage.org/devel/swh-loader-metadata/",
},
)
diff --git a/swh/foo/cli.py b/swh/foo/cli.py
deleted file mode 100644
--- a/swh/foo/cli.py
+++ /dev/null
@@ -1,18 +0,0 @@
-import click
-
-from swh.core.cli import CONTEXT_SETTINGS
-from swh.core.cli import swh as swh_cli_group
-
-
-@swh_cli_group.group(name="foo", context_settings=CONTEXT_SETTINGS)
-@click.pass_context
-def foo_cli_group(ctx):
- """Foo main command."""
-
-
-@foo_cli_group.command()
-@click.option("--bar", help="Something")
-@click.pass_context
-def bar(ctx, bar):
- """Do something."""
- click.echo("bar")
diff --git a/swh/foo/tests/__init__.py b/swh/foo/tests/__init__.py
deleted file mode 100644
diff --git a/swh/foo/tests/test_nothing.py b/swh/foo/tests/test_nothing.py
deleted file mode 100644
--- a/swh/foo/tests/test_nothing.py
+++ /dev/null
@@ -1,3 +0,0 @@
-def test_nothing():
- # Placeholder; remove this when we add actual tests
- pass
diff --git a/swh/loader/__init__.py b/swh/loader/__init__.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/__init__.py
@@ -0,0 +1,3 @@
+from pkgutil import extend_path
+
+__path__ = extend_path(__path__, __name__)
diff --git a/swh/loader/metadata/__init__.py b/swh/loader/metadata/__init__.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/metadata/__init__.py
@@ -0,0 +1,15 @@
+# Copyright (C) 2019-2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import pkg_resources
+
+try:
+ __version__ = pkg_resources.get_distribution("swh.loader.metadata").version
+except pkg_resources.DistributionNotFound:
+ __version__ = "devel"
+
+
+USER_AGENT_TEMPLATE = "Software Heritage Metadata Loader (%s)"
+USER_AGENT = USER_AGENT_TEMPLATE % __version__
diff --git a/swh/foo/bar.py b/swh/loader/metadata/bar.py
rename from swh/foo/bar.py
rename to swh/loader/metadata/bar.py
diff --git a/swh/loader/metadata/base.py b/swh/loader/metadata/base.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/metadata/base.py
@@ -0,0 +1,184 @@
+# Copyright (C) 2020-2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+"""Base module for all metadata fetchers, which are called by the Git loader
+to get metadata from forges on origins being loaded."""
+
+import datetime
+import sys
+from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, Type
+import urllib.parse
+
+import requests
+
+from swh.model.model import (
+ MetadataAuthority,
+ MetadataAuthorityType,
+ MetadataFetcher,
+ Origin,
+ RawExtrinsicMetadata,
+)
+
+CredentialsType = Optional[Dict[str, Dict[str, List[Dict[str, str]]]]]
+
+
+class InvalidOrigin(Exception):
+ pass
+
+
+def now() -> datetime.datetime:
+ # Used by tests for mocking
+ return datetime.datetime.now(tz=datetime.timezone.utc)
+
+
+class BaseMetadataFetcher:
+ """The base class for a Software Heritage metadata fetchers
+
+ Fetchers are hooks used by loader to retrieve extrinsic metadata from
+ forges before archiving repositories.
+
+ Each fetcher handles a specific type of forge (not VCS); each fetcher
+ class generally matches a lister class, as they use the same APIs.
+
+ Args:
+ origin: the origin to retrieve metadata from
+ credentials: This is the same format as for :class:`swh.lister.pattern.Lister`:
+ dictionary of credentials for all fetchers. The first level
+ identifies the fetcher's name, the second level the lister
+ instance. The final level is a list of dicts containing the
+ expected credentials for the given instance of that fetcher.
+ session: optional HTTP session to use to send HTTP requests
+ """
+
+ FETCHER_NAME: str
+ """The config-friendly name of this fetcher, used to retrieve the first
+ level of credentials."""
+
+ SUPPORTED_LISTERS: Set[str]
+ """Set of forge types this metadata fetcher supports. The type names are the same
+ as the names used by listers themselves.
+
+ Generally, fetchers have a one-to-one matching with listers, in which case
+ this is set of ``{FETCHER_NAME}``.
+ """
+
+ def __init__(
+ self,
+ origin: Origin,
+ credentials: CredentialsType,
+ lister_name: str,
+ lister_instance_name: str,
+ ):
+ if self.FETCHER_NAME is None:
+ raise NotImplementedError(f"{self.__class__.__name__}.FETCHER_NAME")
+ self.origin = origin
+ self._check_origin()
+ self._origin_metadata_objects: Optional[List[RawExtrinsicMetadata]] = None
+ self._session: Optional[requests.Session] = None
+
+ # Both names do not *have* to match, but they all do for now.
+ assert lister_name == self.FETCHER_NAME
+
+ self.credentials = list(
+ (credentials or {}).get(lister_name, {}).get(lister_instance_name, [])
+ )
+
+ def _make_session(self) -> requests.Session:
+ session = requests.Session()
+ fetcher = self._metadata_fetcher()
+ user_agent = (
+ f"Software Heritage Metadata Fetcher ({fetcher.name} {fetcher.version})"
+ )
+ session.headers["User-Agent"] = user_agent
+ return session
+
+ def session(self) -> requests.Session:
+ if self._session is None:
+ self._session = self._make_session()
+ return self._session
+
+ def _check_origin(self) -> bool:
+ """Raise :exc:`InvalidOrigin` if the origin does not belong to the supported
+ forge types of this fetcher."""
+ raise NotImplementedError(f"{self.__class__.__name__}._check_origin")
+
+ def _get_origin_metadata_bytes(self) -> List[Tuple[str, bytes]]:
+ """Return pairs of ``(format, metadata)``, used to build
+ :class:`swh.model.model.RawExtrinsicMetadata` objects."""
+ raise NotImplementedError(
+ f"{self.__class__.__name__}.get_origin_metadata_bytes"
+ )
+
+ def _metadata_authority(self) -> MetadataAuthority:
+ """Return information about the metadata authority that issued metadata
+ we extract from the given origin"""
+ (scheme, netloc, *_) = urllib.parse.urlsplit(self.origin.url)
+
+ assert scheme and netloc, self.origin.url
+
+ # A good default that should work for most, if not all, forges
+ forge_url = urllib.parse.urlunsplit(("https", netloc, "", "", ""))
+ return MetadataAuthority(
+ url=forge_url,
+ type=MetadataAuthorityType.FORGE,
+ )
+
+ @classmethod
+ def _get_package_version(cls) -> str:
+ """Return the version of the current loader."""
+ module_name = cls.__module__ or ""
+ module_name_parts = module_name.split(".")
+
+ # Iterate rootward through the package hierarchy until we find a parent of this
+ # loader's module with a __version__ attribute.
+ for prefix_size in range(len(module_name_parts), 0, -1):
+ package_name = ".".join(module_name_parts[0:prefix_size])
+ module = sys.modules[package_name]
+ if hasattr(module, "__version__"):
+ return module.__version__
+
+ # If this fetcher's class has no parent package with a __version__,
+ # it should implement it itself.
+ raise NotImplementedError(
+ f"Could not dynamically find the version of {module_name}."
+ )
+
+ @classmethod
+ def _metadata_fetcher(cls) -> MetadataFetcher:
+ """Return information about this metadata fetcher"""
+ return MetadataFetcher(
+ name=cls.__module__,
+ version=cls._get_package_version(),
+ )
+
+ def get_origin_metadata(self) -> List[RawExtrinsicMetadata]:
+ """Return a list of metadata objects for the given origin."""
+ if self._origin_metadata_objects is None:
+ self._origin_metadata_objects = []
+ for (format_, metadata_bytes) in self._get_origin_metadata_bytes():
+ self._origin_metadata_objects.append(
+ RawExtrinsicMetadata(
+ target=self.origin.swhid(),
+ discovery_date=now(),
+ authority=self._metadata_authority(),
+ fetcher=self._metadata_fetcher(),
+ format=format_,
+ metadata=metadata_bytes,
+ )
+ )
+
+ return self._origin_metadata_objects
+
+
+if TYPE_CHECKING:
+ # Makes mypy check BaseMetadataFetcher follows the MetadataFetcherProtocol
+ def _f() -> None:
+ from swh.loader.core.metadata_fetchers import MetadataFetcherProtocol
+
+ base_metadata_fetcher: Type[MetadataFetcherProtocol]
+ base_metadata_fetcher = BaseMetadataFetcher
+ print(base_metadata_fetcher)
+
+ del _f
diff --git a/swh/loader/metadata/github.py b/swh/loader/metadata/github.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/metadata/github.py
@@ -0,0 +1,69 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+"""Metadata fetcher for GitHub."""
+
+import re
+from typing import List, Optional, Tuple
+import urllib.parse
+
+from swh.lister.github.utils import GitHubSession
+
+from . import USER_AGENT
+from .base import BaseMetadataFetcher, InvalidOrigin
+
+HTTP_ACCEPT = "application/vnd.github.v3+json"
+"""HTTP header sent on all API requests to GitHub."""
+
+# The format is defined by a well-understood MIME type; we might as well use that.
+METADATA_FORMAT = HTTP_ACCEPT
+"""Value of the ``format`` field of produced
+:class:`swh.model.model.RawExtrinsicMetadata` objects."""
+
+_API_URL = "https://api.github.com/repos{path}"
+
+
+class GitHubMetadataFetcher(BaseMetadataFetcher):
+ FETCHER_NAME = "github"
+ SUPPORTED_LISTERS = {"github"}
+
+ _github_session: Optional[GitHubSession] = None
+
+ def github_session(self) -> GitHubSession:
+ if self._github_session is None:
+ self._github_session = GitHubSession(
+ user_agent=USER_AGENT, credentials=self.credentials
+ )
+ return self._github_session
+
+ def _check_origin(self):
+ (scheme, netloc, path, query, fragment) = urllib.parse.urlsplit(self.origin.url)
+ if netloc != "github.com":
+ # TODO: relax this check when we support self-hosted GitHub instances
+ raise InvalidOrigin(f"netloc should be 'github.com', not '{netloc}'")
+
+ if scheme != "https" or not re.match(r"/[^\s/]+/[^\s/]+", path):
+ raise InvalidOrigin(f"Unsupported github.com URL: {self.origin.url}")
+
+ if query != "" or fragment != "":
+ raise InvalidOrigin(
+ f"Unexpected end query or fragment in github.com URL: {self.origin.url}"
+ )
+
+ def _get_origin_metadata_bytes(self) -> List[Tuple[str, bytes]]:
+ (scheme, netloc, path, query, fragment) = urllib.parse.urlsplit(self.origin.url)
+ response = self.github_session().request(_API_URL.format(path=path))
+ if response.status_code != 200:
+ # TODO: retry
+ return []
+
+ metadata_bytes = response.content
+
+ # TODO?: strip API hyperlinks from metadata_bytes to save space?
+ # They take 10KB for every repo, or 1KB when compressed by the database server.
+ # This means processing metadata_bytes and changing the format, instead of
+ # archiving verbatim, though.
+
+ return [(METADATA_FORMAT, metadata_bytes)]
diff --git a/swh/foo/py.typed b/swh/loader/metadata/py.typed
rename from swh/foo/py.typed
rename to swh/loader/metadata/py.typed
diff --git a/swh/foo/__init__.py b/swh/loader/metadata/tests/__init__.py
rename from swh/foo/__init__.py
rename to swh/loader/metadata/tests/__init__.py
diff --git a/swh/loader/metadata/tests/data/https_api.github.com/repos_octocat_Hello-World b/swh/loader/metadata/tests/data/https_api.github.com/repos_octocat_Hello-World
new file mode 100644
--- /dev/null
+++ b/swh/loader/metadata/tests/data/https_api.github.com/repos_octocat_Hello-World
@@ -0,0 +1,103 @@
+{
+ "id": 1296269,
+ "node_id": "MDEwOlJlcG9zaXRvcnkxMjk2MjY5",
+ "name": "Hello-World",
+ "full_name": "octocat/Hello-World",
+ "private": false,
+ "owner": {
+ "login": "octocat",
+ "id": 583231,
+ "node_id": "MDQ6VXNlcjU4MzIzMQ==",
+ "avatar_url": "https://avatars.githubusercontent.com/u/583231?v=4",
+ "gravatar_id": "",
+ "url": "https://api.github.com/users/octocat",
+ "html_url": "https://github.com/octocat",
+ "followers_url": "https://api.github.com/users/octocat/followers",
+ "following_url": "https://api.github.com/users/octocat/following{/other_user}",
+ "gists_url": "https://api.github.com/users/octocat/gists{/gist_id}",
+ "starred_url": "https://api.github.com/users/octocat/starred{/owner}{/repo}",
+ "subscriptions_url": "https://api.github.com/users/octocat/subscriptions",
+ "organizations_url": "https://api.github.com/users/octocat/orgs",
+ "repos_url": "https://api.github.com/users/octocat/repos",
+ "events_url": "https://api.github.com/users/octocat/events{/privacy}",
+ "received_events_url": "https://api.github.com/users/octocat/received_events",
+ "type": "User",
+ "site_admin": false
+ },
+ "html_url": "https://github.com/octocat/Hello-World",
+ "description": "My first repository on GitHub!",
+ "fork": false,
+ "url": "https://api.github.com/repos/octocat/Hello-World",
+ "forks_url": "https://api.github.com/repos/octocat/Hello-World/forks",
+ "keys_url": "https://api.github.com/repos/octocat/Hello-World/keys{/key_id}",
+ "collaborators_url": "https://api.github.com/repos/octocat/Hello-World/collaborators{/collaborator}",
+ "teams_url": "https://api.github.com/repos/octocat/Hello-World/teams",
+ "hooks_url": "https://api.github.com/repos/octocat/Hello-World/hooks",
+ "issue_events_url": "https://api.github.com/repos/octocat/Hello-World/issues/events{/number}",
+ "events_url": "https://api.github.com/repos/octocat/Hello-World/events",
+ "assignees_url": "https://api.github.com/repos/octocat/Hello-World/assignees{/user}",
+ "branches_url": "https://api.github.com/repos/octocat/Hello-World/branches{/branch}",
+ "tags_url": "https://api.github.com/repos/octocat/Hello-World/tags",
+ "blobs_url": "https://api.github.com/repos/octocat/Hello-World/git/blobs{/sha}",
+ "git_tags_url": "https://api.github.com/repos/octocat/Hello-World/git/tags{/sha}",
+ "git_refs_url": "https://api.github.com/repos/octocat/Hello-World/git/refs{/sha}",
+ "trees_url": "https://api.github.com/repos/octocat/Hello-World/git/trees{/sha}",
+ "statuses_url": "https://api.github.com/repos/octocat/Hello-World/statuses/{sha}",
+ "languages_url": "https://api.github.com/repos/octocat/Hello-World/languages",
+ "stargazers_url": "https://api.github.com/repos/octocat/Hello-World/stargazers",
+ "contributors_url": "https://api.github.com/repos/octocat/Hello-World/contributors",
+ "subscribers_url": "https://api.github.com/repos/octocat/Hello-World/subscribers",
+ "subscription_url": "https://api.github.com/repos/octocat/Hello-World/subscription",
+ "commits_url": "https://api.github.com/repos/octocat/Hello-World/commits{/sha}",
+ "git_commits_url": "https://api.github.com/repos/octocat/Hello-World/git/commits{/sha}",
+ "comments_url": "https://api.github.com/repos/octocat/Hello-World/comments{/number}",
+ "issue_comment_url": "https://api.github.com/repos/octocat/Hello-World/issues/comments{/number}",
+ "contents_url": "https://api.github.com/repos/octocat/Hello-World/contents/{+path}",
+ "compare_url": "https://api.github.com/repos/octocat/Hello-World/compare/{base}...{head}",
+ "merges_url": "https://api.github.com/repos/octocat/Hello-World/merges",
+ "archive_url": "https://api.github.com/repos/octocat/Hello-World/{archive_format}{/ref}",
+ "downloads_url": "https://api.github.com/repos/octocat/Hello-World/downloads",
+ "issues_url": "https://api.github.com/repos/octocat/Hello-World/issues{/number}",
+ "pulls_url": "https://api.github.com/repos/octocat/Hello-World/pulls{/number}",
+ "milestones_url": "https://api.github.com/repos/octocat/Hello-World/milestones{/number}",
+ "notifications_url": "https://api.github.com/repos/octocat/Hello-World/notifications{?since,all,participating}",
+ "labels_url": "https://api.github.com/repos/octocat/Hello-World/labels{/name}",
+ "releases_url": "https://api.github.com/repos/octocat/Hello-World/releases{/id}",
+ "deployments_url": "https://api.github.com/repos/octocat/Hello-World/deployments",
+ "created_at": "2011-01-26T19:01:12Z",
+ "updated_at": "2022-04-21T05:34:00Z",
+ "pushed_at": "2022-04-15T15:39:28Z",
+ "git_url": "git://github.com/octocat/Hello-World.git",
+ "ssh_url": "git@github.com:octocat/Hello-World.git",
+ "clone_url": "https://github.com/octocat/Hello-World.git",
+ "svn_url": "https://github.com/octocat/Hello-World",
+ "homepage": "",
+ "size": 1,
+ "stargazers_count": 1844,
+ "watchers_count": 1844,
+ "language": null,
+ "has_issues": true,
+ "has_projects": true,
+ "has_downloads": true,
+ "has_wiki": true,
+ "has_pages": false,
+ "forks_count": 1729,
+ "mirror_url": null,
+ "archived": false,
+ "disabled": false,
+ "open_issues_count": 867,
+ "license": null,
+ "allow_forking": true,
+ "is_template": false,
+ "topics": [
+
+ ],
+ "visibility": "public",
+ "forks": 1729,
+ "open_issues": 867,
+ "watchers": 1844,
+ "default_branch": "master",
+ "temp_clone_token": null,
+ "network_count": 1729,
+ "subscribers_count": 1724
+}
diff --git a/swh/loader/metadata/tests/test_base.py b/swh/loader/metadata/tests/test_base.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/metadata/tests/test_base.py
@@ -0,0 +1,108 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import datetime
+
+import pytest
+
+from swh.loader.core.loader import BaseLoader
+from swh.loader.metadata.base import BaseMetadataFetcher
+from swh.model.model import (
+ MetadataAuthority,
+ MetadataAuthorityType,
+ MetadataFetcher,
+ Origin,
+ RawExtrinsicMetadata,
+)
+import swh.storage.exc
+
+ORIGIN = Origin(url="some-url")
+
+METADATA_AUTHORITY = MetadataAuthority(
+ type=MetadataAuthorityType.FORGE, url="http://example.org/"
+)
+REMD = RawExtrinsicMetadata(
+ target=ORIGIN.swhid(),
+ discovery_date=datetime.datetime.now(tz=datetime.timezone.utc),
+ authority=METADATA_AUTHORITY,
+ fetcher=MetadataFetcher(
+ name="test fetcher",
+ version="0.0.1",
+ ),
+ format="test-format",
+ metadata=b'{"foo": "bar"}',
+)
+
+
+class DummyLoader(BaseLoader):
+ """Base Loader to overload and simplify the base class (technical: to avoid repetition
+ in other *Loader classes)"""
+
+ visit_type = "git"
+
+ def __init__(self, storage, *args, **kwargs):
+ super().__init__(storage, *args, **kwargs)
+
+ def cleanup(self):
+ pass
+
+ def prepare(self, *args, **kwargs):
+ pass
+
+ def fetch_data(self):
+ pass
+
+ def get_snapshot_id(self):
+ return None
+
+ def store_data(self):
+ pass
+
+
+class DummyMetadataFetcher(BaseMetadataFetcher):
+ SUPPORTED_LISTERS = {"fake-lister"}
+
+ def __init__(self, origin, credentials, lister_name, lister_instance_name):
+ pass
+
+ def get_origin_metadata(self):
+ return [REMD]
+
+
+def test_load(swh_storage, mocker):
+ mocker.patch(
+ "swh.loader.core.metadata_fetchers._fetchers",
+ return_value=[DummyMetadataFetcher],
+ )
+
+ loader = DummyLoader(
+ storage=swh_storage,
+ origin_url=ORIGIN.url,
+ lister_name="fake-lister",
+ lister_instance_name="",
+ )
+ loader.load()
+
+ assert swh_storage.raw_extrinsic_metadata_get(
+ ORIGIN.swhid(), METADATA_AUTHORITY
+ ).results == [REMD]
+
+
+def test_load_unknown_lister(swh_storage, mocker):
+ mocker.patch(
+ "swh.loader.core.metadata_fetchers._fetchers",
+ return_value=[DummyMetadataFetcher],
+ )
+
+ loader = DummyLoader(
+ storage=swh_storage,
+ origin_url=ORIGIN.url,
+ lister_name="other-lister",
+ lister_instance_name="",
+ )
+ loader.load()
+
+ with pytest.raises(swh.storage.exc.StorageArgumentException):
+ swh_storage.raw_extrinsic_metadata_get(ORIGIN.swhid(), METADATA_AUTHORITY)
diff --git a/swh/loader/metadata/tests/test_github.py b/swh/loader/metadata/tests/test_github.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/metadata/tests/test_github.py
@@ -0,0 +1,95 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import datetime
+from pathlib import Path
+from typing import Type
+
+import pkg_resources
+
+from swh.loader.core.metadata_fetchers import MetadataFetcherProtocol
+from swh.loader.metadata import __version__
+from swh.loader.metadata.github import GitHubMetadataFetcher
+from swh.model.model import (
+ MetadataAuthority,
+ MetadataAuthorityType,
+ MetadataFetcher,
+ Origin,
+ RawExtrinsicMetadata,
+)
+
+from .test_base import DummyLoader
+
+ORIGIN = Origin("https://github.com/octocat/Hello-World")
+
+METADATA_AUTHORITY = MetadataAuthority(
+ type=MetadataAuthorityType.FORGE, url="https://github.com"
+)
+
+
+def expected_metadata(dt, datadir):
+ data_file_path = Path(datadir) / "https_api.github.com/repos_octocat_Hello-World"
+ with data_file_path.open("rb") as fd:
+ expected_metadata_bytes = fd.read()
+ return RawExtrinsicMetadata(
+ target=ORIGIN.swhid(),
+ discovery_date=dt,
+ authority=METADATA_AUTHORITY,
+ fetcher=MetadataFetcher(name="swh.loader.metadata.github", version=__version__),
+ format="application/vnd.github.v3+json",
+ metadata=expected_metadata_bytes,
+ )
+
+
+def test_type() -> None:
+ # check with mypy
+ fetcher_cls: Type[MetadataFetcherProtocol]
+ fetcher_cls = GitHubMetadataFetcher
+ print(fetcher_cls)
+
+ # check at runtime
+ fetcher = GitHubMetadataFetcher(
+ ORIGIN,
+ credentials=None,
+ lister_name="github",
+ lister_instance_name="",
+ )
+ assert isinstance(fetcher, MetadataFetcherProtocol)
+
+
+def test_github_metadata(datadir, requests_mock_datadir, mocker):
+ now = datetime.datetime.now(tz=datetime.timezone.utc)
+ mocker.patch("swh.loader.metadata.base.now", return_value=now)
+
+ fetcher = GitHubMetadataFetcher(
+ ORIGIN, credentials=None, lister_name="github", lister_instance_name=""
+ )
+
+ assert fetcher.get_origin_metadata() == [expected_metadata(now, datadir)]
+
+
+def test_github_metadata_from_loader(
+ swh_storage, mocker, datadir, requests_mock_datadir
+):
+ # Fail early if this package is not fully installed
+ assert "github" in {
+ entry_point.name
+ for entry_point in pkg_resources.iter_entry_points("swh.loader.metadata")
+ }
+
+ now = datetime.datetime.now(tz=datetime.timezone.utc)
+ mocker.patch("swh.loader.metadata.base.now", return_value=now)
+
+ loader = DummyLoader(
+ storage=swh_storage,
+ origin_url=ORIGIN.url,
+ lister_name="github",
+ lister_instance_name="",
+ )
+ loader.load()
+
+ assert swh_storage.raw_extrinsic_metadata_get(
+ ORIGIN.swhid(), METADATA_AUTHORITY
+ ).results == [expected_metadata(now, datadir)]
diff --git a/tox.ini b/tox.ini
--- a/tox.ini
+++ b/tox.ini
@@ -6,10 +6,11 @@
testing
deps =
pytest-cov
+ swh.storage[testing]
commands =
pytest --doctest-modules \
- {envsitepackagesdir}/swh/foo \
- --cov={envsitepackagesdir}/swh/foo \
+ {envsitepackagesdir}/swh/loader/metadata \
+ --cov={envsitepackagesdir}/swh/loader/metadata \
--cov-branch {posargs}
[testenv:black]
@@ -31,7 +32,7 @@
extras =
testing
deps =
- mypy==0.920
+ mypy==0.942
commands =
mypy swh

File Metadata

Mime Type
text/plain
Expires
Wed, Jul 2, 10:44 AM (2 w, 2 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3217044

Event Timeline