Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9312154
D7633.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
27 KB
Subscribers
None
D7633.diff
View Options
diff --git a/swh/foo/bar.py b/conftest.py
rename from swh/foo/bar.py
rename to conftest.py
--- a/swh/foo/bar.py
+++ b/conftest.py
@@ -1,4 +1,9 @@
-# Copyright (C) 2019 The Software Heritage developers
+# Copyright (C) 2019-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+
+pytest_plugins = [
+ "swh.storage.pytest_plugin",
+ "swh.loader.pytest_plugin",
+]
diff --git a/docs/README.rst b/docs/README.rst
--- a/docs/README.rst
+++ b/docs/README.rst
@@ -1,4 +1,5 @@
-Software Heritage - Python module template
-==========================================
+Software Heritage - Metadata Fetchers
+=====================================
-Python module template, used as skeleton to create new modules.
+This package hooks into the loaders, and loads extrinsic metadata at the same time
+code artifacts are retrieved.
diff --git a/docs/index.rst b/docs/index.rst
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -1,4 +1,4 @@
-.. _swh-py-template:
+.. _swh-loader-metadata:
.. include:: README.rst
diff --git a/mypy.ini b/mypy.ini
--- a/mypy.ini
+++ b/mypy.ini
@@ -5,6 +5,9 @@
# 3rd party libraries without stubs (yet)
+[mypy-iso8601.*]
+ignore_missing_imports = True
+
[mypy-pkg_resources.*]
ignore_missing_imports = True
diff --git a/requirements-swh.txt b/requirements-swh.txt
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -1,2 +1,4 @@
-# Add here internal Software Heritage dependencies, one per line.
swh.core[http] >= 0.3 # [http] is required by swh.core.pytest_plugin
+swh.lister >= 2.9.0
+swh.loader.core >= 3.1.0
+swh.storage >= 0.29.0
diff --git a/requirements-test.txt b/requirements-test.txt
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -1 +1,7 @@
pytest
+pytest-mock
+requests_mock
+types-requests
+types-PyYAML
+
+swh-storage[testing]
diff --git a/setup.py b/setup.py
--- a/setup.py
+++ b/setup.py
@@ -36,18 +36,15 @@
return requirements
-# Edit this part to match your module, replace foo by its name
-# Full sample:
-# https://forge.softwareheritage.org/diffusion/DCORE/browse/master/setup.py
setup(
- name="swh.foo", # example: swh.loader.pypi
- description="Software Heritage <Module's intent>",
+ name="swh.loader.metadata",
+ description="Software Heritage Extrinsic Metadata Fetchers",
long_description=long_description,
long_description_content_type="text/x-rst",
python_requires=">=3.7",
author="Software Heritage developers",
author_email="swh-devel@inria.fr",
- url="https://forge.softwareheritage.org/diffusion/<module-git-code>",
+ url="https://forge.softwareheritage.org/diffusion/swh-loader-metadata",
packages=find_packages(), # packages's modules
install_requires=parse_requirements(None, "swh"),
tests_require=parse_requirements("test"),
@@ -56,8 +53,8 @@
extras_require={"testing": parse_requirements("test")},
include_package_data=True,
entry_points="""
- [swh.cli.subcommands]
- foo=swh.foo.cli
+ [swh.loader.metadata]
+ github=swh.loader.metadata.github:GitHubMetadataFetcher
""",
classifiers=[
"Programming Language :: Python :: 3",
@@ -69,7 +66,7 @@
project_urls={
"Bug Reports": "https://forge.softwareheritage.org/maniphest",
"Funding": "https://www.softwareheritage.org/donate",
- "Source": "https://forge.softwareheritage.org/source/swh-<module>",
- "Documentation": "https://docs.softwareheritage.org/devel/swh-<module>/",
+ "Source": "https://forge.softwareheritage.org/source/swh-loader-metadata",
+ "Documentation": "https://docs.softwareheritage.org/devel/swh-loader-metadata/",
},
)
diff --git a/swh/foo/cli.py b/swh/foo/cli.py
deleted file mode 100644
--- a/swh/foo/cli.py
+++ /dev/null
@@ -1,18 +0,0 @@
-import click
-
-from swh.core.cli import CONTEXT_SETTINGS
-from swh.core.cli import swh as swh_cli_group
-
-
-@swh_cli_group.group(name="foo", context_settings=CONTEXT_SETTINGS)
-@click.pass_context
-def foo_cli_group(ctx):
- """Foo main command."""
-
-
-@foo_cli_group.command()
-@click.option("--bar", help="Something")
-@click.pass_context
-def bar(ctx, bar):
- """Do something."""
- click.echo("bar")
diff --git a/swh/foo/tests/__init__.py b/swh/foo/tests/__init__.py
deleted file mode 100644
diff --git a/swh/foo/tests/test_nothing.py b/swh/foo/tests/test_nothing.py
deleted file mode 100644
--- a/swh/foo/tests/test_nothing.py
+++ /dev/null
@@ -1,3 +0,0 @@
-def test_nothing():
- # Placeholder; remove this when we add actual tests
- pass
diff --git a/swh/loader/__init__.py b/swh/loader/__init__.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/__init__.py
@@ -0,0 +1,3 @@
+from pkgutil import extend_path
+
+__path__ = extend_path(__path__, __name__)
diff --git a/swh/loader/metadata/__init__.py b/swh/loader/metadata/__init__.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/metadata/__init__.py
@@ -0,0 +1,15 @@
+# Copyright (C) 2019-2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import pkg_resources
+
+try:
+ __version__ = pkg_resources.get_distribution("swh.loader.metadata").version
+except pkg_resources.DistributionNotFound:
+ __version__ = "devel"
+
+
+USER_AGENT_TEMPLATE = "Software Heritage Metadata Loader (%s)"
+USER_AGENT = USER_AGENT_TEMPLATE % __version__
diff --git a/swh/foo/bar.py b/swh/loader/metadata/bar.py
rename from swh/foo/bar.py
rename to swh/loader/metadata/bar.py
diff --git a/swh/loader/metadata/base.py b/swh/loader/metadata/base.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/metadata/base.py
@@ -0,0 +1,184 @@
+# Copyright (C) 2020-2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+"""Base module for all metadata fetchers, which are called by the Git loader
+to get metadata from forges on origins being loaded."""
+
+import datetime
+import sys
+from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, Type
+import urllib.parse
+
+import requests
+
+from swh.model.model import (
+ MetadataAuthority,
+ MetadataAuthorityType,
+ MetadataFetcher,
+ Origin,
+ RawExtrinsicMetadata,
+)
+
+CredentialsType = Optional[Dict[str, Dict[str, List[Dict[str, str]]]]]
+
+
+class InvalidOrigin(Exception):
+ pass
+
+
+def now() -> datetime.datetime:
+ # Used by tests for mocking
+ return datetime.datetime.now(tz=datetime.timezone.utc)
+
+
+class BaseMetadataFetcher:
+ """The base class for a Software Heritage metadata fetchers
+
+ Fetchers are hooks used by loader to retrieve extrinsic metadata from
+ forges before archiving repositories.
+
+ Each fetcher handles a specific type of forge (not VCS); each fetcher
+ class generally matches a lister class, as they use the same APIs.
+
+ Args:
+ origin: the origin to retrieve metadata from
+ credentials: This is the same format as for :class:`swh.lister.pattern.Lister`:
+ dictionary of credentials for all fetchers. The first level
+ identifies the fetcher's name, the second level the lister
+ instance. The final level is a list of dicts containing the
+ expected credentials for the given instance of that fetcher.
+ session: optional HTTP session to use to send HTTP requests
+ """
+
+ FETCHER_NAME: str
+ """The config-friendly name of this fetcher, used to retrieve the first
+ level of credentials."""
+
+ SUPPORTED_LISTERS: Set[str]
+ """Set of forge types this metadata fetcher supports. The type names are the same
+ as the names used by listers themselves.
+
+ Generally, fetchers have a one-to-one matching with listers, in which case
+ this is set of ``{FETCHER_NAME}``.
+ """
+
+ def __init__(
+ self,
+ origin: Origin,
+ credentials: CredentialsType,
+ lister_name: str,
+ lister_instance_name: str,
+ ):
+ if self.FETCHER_NAME is None:
+ raise NotImplementedError(f"{self.__class__.__name__}.FETCHER_NAME")
+ self.origin = origin
+ self._check_origin()
+ self._origin_metadata_objects: Optional[List[RawExtrinsicMetadata]] = None
+ self._session: Optional[requests.Session] = None
+
+ # Both names do not *have* to match, but they all do for now.
+ assert lister_name == self.FETCHER_NAME
+
+ self.credentials = list(
+ (credentials or {}).get(lister_name, {}).get(lister_instance_name, [])
+ )
+
+ def _make_session(self) -> requests.Session:
+ session = requests.Session()
+ fetcher = self._metadata_fetcher()
+ user_agent = (
+ f"Software Heritage Metadata Fetcher ({fetcher.name} {fetcher.version})"
+ )
+ session.headers["User-Agent"] = user_agent
+ return session
+
+ def session(self) -> requests.Session:
+ if self._session is None:
+ self._session = self._make_session()
+ return self._session
+
+ def _check_origin(self) -> bool:
+ """Raise :exc:`InvalidOrigin` if the origin does not belong to the supported
+ forge types of this fetcher."""
+ raise NotImplementedError(f"{self.__class__.__name__}._check_origin")
+
+ def _get_origin_metadata_bytes(self) -> List[Tuple[str, bytes]]:
+ """Return pairs of ``(format, metadata)``, used to build
+ :class:`swh.model.model.RawExtrinsicMetadata` objects."""
+ raise NotImplementedError(
+ f"{self.__class__.__name__}.get_origin_metadata_bytes"
+ )
+
+ def _metadata_authority(self) -> MetadataAuthority:
+ """Return information about the metadata authority that issued metadata
+ we extract from the given origin"""
+ (scheme, netloc, *_) = urllib.parse.urlsplit(self.origin.url)
+
+ assert scheme and netloc, self.origin.url
+
+ # A good default that should work for most, if not all, forges
+ forge_url = urllib.parse.urlunsplit(("https", netloc, "", "", ""))
+ return MetadataAuthority(
+ url=forge_url,
+ type=MetadataAuthorityType.FORGE,
+ )
+
+ @classmethod
+ def _get_package_version(cls) -> str:
+ """Return the version of the current loader."""
+ module_name = cls.__module__ or ""
+ module_name_parts = module_name.split(".")
+
+ # Iterate rootward through the package hierarchy until we find a parent of this
+ # loader's module with a __version__ attribute.
+ for prefix_size in range(len(module_name_parts), 0, -1):
+ package_name = ".".join(module_name_parts[0:prefix_size])
+ module = sys.modules[package_name]
+ if hasattr(module, "__version__"):
+ return module.__version__
+
+ # If this fetcher's class has no parent package with a __version__,
+ # it should implement it itself.
+ raise NotImplementedError(
+ f"Could not dynamically find the version of {module_name}."
+ )
+
+ @classmethod
+ def _metadata_fetcher(cls) -> MetadataFetcher:
+ """Return information about this metadata fetcher"""
+ return MetadataFetcher(
+ name=cls.__module__,
+ version=cls._get_package_version(),
+ )
+
+ def get_origin_metadata(self) -> List[RawExtrinsicMetadata]:
+ """Return a list of metadata objects for the given origin."""
+ if self._origin_metadata_objects is None:
+ self._origin_metadata_objects = []
+ for (format_, metadata_bytes) in self._get_origin_metadata_bytes():
+ self._origin_metadata_objects.append(
+ RawExtrinsicMetadata(
+ target=self.origin.swhid(),
+ discovery_date=now(),
+ authority=self._metadata_authority(),
+ fetcher=self._metadata_fetcher(),
+ format=format_,
+ metadata=metadata_bytes,
+ )
+ )
+
+ return self._origin_metadata_objects
+
+
+if TYPE_CHECKING:
+ # Makes mypy check BaseMetadataFetcher follows the MetadataFetcherProtocol
+ def _f() -> None:
+ from swh.loader.core.metadata_fetchers import MetadataFetcherProtocol
+
+ base_metadata_fetcher: Type[MetadataFetcherProtocol]
+ base_metadata_fetcher = BaseMetadataFetcher
+ print(base_metadata_fetcher)
+
+ del _f
diff --git a/swh/loader/metadata/github.py b/swh/loader/metadata/github.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/metadata/github.py
@@ -0,0 +1,69 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+"""Metadata fetcher for GitHub."""
+
+import re
+from typing import List, Optional, Tuple
+import urllib.parse
+
+from swh.lister.github.utils import GitHubSession
+
+from . import USER_AGENT
+from .base import BaseMetadataFetcher, InvalidOrigin
+
+HTTP_ACCEPT = "application/vnd.github.v3+json"
+"""HTTP header sent on all API requests to GitHub."""
+
+# The format is defined by a well-understood MIME type; we might as well use that.
+METADATA_FORMAT = HTTP_ACCEPT
+"""Value of the ``format`` field of produced
+:class:`swh.model.model.RawExtrinsicMetadata` objects."""
+
+_API_URL = "https://api.github.com/repos{path}"
+
+
+class GitHubMetadataFetcher(BaseMetadataFetcher):
+ FETCHER_NAME = "github"
+ SUPPORTED_LISTERS = {"github"}
+
+ _github_session: Optional[GitHubSession] = None
+
+ def github_session(self) -> GitHubSession:
+ if self._github_session is None:
+ self._github_session = GitHubSession(
+ user_agent=USER_AGENT, credentials=self.credentials
+ )
+ return self._github_session
+
+ def _check_origin(self):
+ (scheme, netloc, path, query, fragment) = urllib.parse.urlsplit(self.origin.url)
+ if netloc != "github.com":
+ # TODO: relax this check when we support self-hosted GitHub instances
+ raise InvalidOrigin(f"netloc should be 'github.com', not '{netloc}'")
+
+ if scheme != "https" or not re.match(r"/[^\s/]+/[^\s/]+", path):
+ raise InvalidOrigin(f"Unsupported github.com URL: {self.origin.url}")
+
+ if query != "" or fragment != "":
+ raise InvalidOrigin(
+ f"Unexpected end query or fragment in github.com URL: {self.origin.url}"
+ )
+
+ def _get_origin_metadata_bytes(self) -> List[Tuple[str, bytes]]:
+ (scheme, netloc, path, query, fragment) = urllib.parse.urlsplit(self.origin.url)
+ response = self.github_session().request(_API_URL.format(path=path))
+ if response.status_code != 200:
+ # TODO: retry
+ return []
+
+ metadata_bytes = response.content
+
+ # TODO?: strip API hyperlinks from metadata_bytes to save space?
+ # They take 10KB for every repo, or 1KB when compressed by the database server.
+ # This means processing metadata_bytes and changing the format, instead of
+ # archiving verbatim, though.
+
+ return [(METADATA_FORMAT, metadata_bytes)]
diff --git a/swh/foo/py.typed b/swh/loader/metadata/py.typed
rename from swh/foo/py.typed
rename to swh/loader/metadata/py.typed
diff --git a/swh/foo/__init__.py b/swh/loader/metadata/tests/__init__.py
rename from swh/foo/__init__.py
rename to swh/loader/metadata/tests/__init__.py
diff --git a/swh/loader/metadata/tests/data/https_api.github.com/repos_octocat_Hello-World b/swh/loader/metadata/tests/data/https_api.github.com/repos_octocat_Hello-World
new file mode 100644
--- /dev/null
+++ b/swh/loader/metadata/tests/data/https_api.github.com/repos_octocat_Hello-World
@@ -0,0 +1,103 @@
+{
+ "id": 1296269,
+ "node_id": "MDEwOlJlcG9zaXRvcnkxMjk2MjY5",
+ "name": "Hello-World",
+ "full_name": "octocat/Hello-World",
+ "private": false,
+ "owner": {
+ "login": "octocat",
+ "id": 583231,
+ "node_id": "MDQ6VXNlcjU4MzIzMQ==",
+ "avatar_url": "https://avatars.githubusercontent.com/u/583231?v=4",
+ "gravatar_id": "",
+ "url": "https://api.github.com/users/octocat",
+ "html_url": "https://github.com/octocat",
+ "followers_url": "https://api.github.com/users/octocat/followers",
+ "following_url": "https://api.github.com/users/octocat/following{/other_user}",
+ "gists_url": "https://api.github.com/users/octocat/gists{/gist_id}",
+ "starred_url": "https://api.github.com/users/octocat/starred{/owner}{/repo}",
+ "subscriptions_url": "https://api.github.com/users/octocat/subscriptions",
+ "organizations_url": "https://api.github.com/users/octocat/orgs",
+ "repos_url": "https://api.github.com/users/octocat/repos",
+ "events_url": "https://api.github.com/users/octocat/events{/privacy}",
+ "received_events_url": "https://api.github.com/users/octocat/received_events",
+ "type": "User",
+ "site_admin": false
+ },
+ "html_url": "https://github.com/octocat/Hello-World",
+ "description": "My first repository on GitHub!",
+ "fork": false,
+ "url": "https://api.github.com/repos/octocat/Hello-World",
+ "forks_url": "https://api.github.com/repos/octocat/Hello-World/forks",
+ "keys_url": "https://api.github.com/repos/octocat/Hello-World/keys{/key_id}",
+ "collaborators_url": "https://api.github.com/repos/octocat/Hello-World/collaborators{/collaborator}",
+ "teams_url": "https://api.github.com/repos/octocat/Hello-World/teams",
+ "hooks_url": "https://api.github.com/repos/octocat/Hello-World/hooks",
+ "issue_events_url": "https://api.github.com/repos/octocat/Hello-World/issues/events{/number}",
+ "events_url": "https://api.github.com/repos/octocat/Hello-World/events",
+ "assignees_url": "https://api.github.com/repos/octocat/Hello-World/assignees{/user}",
+ "branches_url": "https://api.github.com/repos/octocat/Hello-World/branches{/branch}",
+ "tags_url": "https://api.github.com/repos/octocat/Hello-World/tags",
+ "blobs_url": "https://api.github.com/repos/octocat/Hello-World/git/blobs{/sha}",
+ "git_tags_url": "https://api.github.com/repos/octocat/Hello-World/git/tags{/sha}",
+ "git_refs_url": "https://api.github.com/repos/octocat/Hello-World/git/refs{/sha}",
+ "trees_url": "https://api.github.com/repos/octocat/Hello-World/git/trees{/sha}",
+ "statuses_url": "https://api.github.com/repos/octocat/Hello-World/statuses/{sha}",
+ "languages_url": "https://api.github.com/repos/octocat/Hello-World/languages",
+ "stargazers_url": "https://api.github.com/repos/octocat/Hello-World/stargazers",
+ "contributors_url": "https://api.github.com/repos/octocat/Hello-World/contributors",
+ "subscribers_url": "https://api.github.com/repos/octocat/Hello-World/subscribers",
+ "subscription_url": "https://api.github.com/repos/octocat/Hello-World/subscription",
+ "commits_url": "https://api.github.com/repos/octocat/Hello-World/commits{/sha}",
+ "git_commits_url": "https://api.github.com/repos/octocat/Hello-World/git/commits{/sha}",
+ "comments_url": "https://api.github.com/repos/octocat/Hello-World/comments{/number}",
+ "issue_comment_url": "https://api.github.com/repos/octocat/Hello-World/issues/comments{/number}",
+ "contents_url": "https://api.github.com/repos/octocat/Hello-World/contents/{+path}",
+ "compare_url": "https://api.github.com/repos/octocat/Hello-World/compare/{base}...{head}",
+ "merges_url": "https://api.github.com/repos/octocat/Hello-World/merges",
+ "archive_url": "https://api.github.com/repos/octocat/Hello-World/{archive_format}{/ref}",
+ "downloads_url": "https://api.github.com/repos/octocat/Hello-World/downloads",
+ "issues_url": "https://api.github.com/repos/octocat/Hello-World/issues{/number}",
+ "pulls_url": "https://api.github.com/repos/octocat/Hello-World/pulls{/number}",
+ "milestones_url": "https://api.github.com/repos/octocat/Hello-World/milestones{/number}",
+ "notifications_url": "https://api.github.com/repos/octocat/Hello-World/notifications{?since,all,participating}",
+ "labels_url": "https://api.github.com/repos/octocat/Hello-World/labels{/name}",
+ "releases_url": "https://api.github.com/repos/octocat/Hello-World/releases{/id}",
+ "deployments_url": "https://api.github.com/repos/octocat/Hello-World/deployments",
+ "created_at": "2011-01-26T19:01:12Z",
+ "updated_at": "2022-04-21T05:34:00Z",
+ "pushed_at": "2022-04-15T15:39:28Z",
+ "git_url": "git://github.com/octocat/Hello-World.git",
+ "ssh_url": "git@github.com:octocat/Hello-World.git",
+ "clone_url": "https://github.com/octocat/Hello-World.git",
+ "svn_url": "https://github.com/octocat/Hello-World",
+ "homepage": "",
+ "size": 1,
+ "stargazers_count": 1844,
+ "watchers_count": 1844,
+ "language": null,
+ "has_issues": true,
+ "has_projects": true,
+ "has_downloads": true,
+ "has_wiki": true,
+ "has_pages": false,
+ "forks_count": 1729,
+ "mirror_url": null,
+ "archived": false,
+ "disabled": false,
+ "open_issues_count": 867,
+ "license": null,
+ "allow_forking": true,
+ "is_template": false,
+ "topics": [
+
+ ],
+ "visibility": "public",
+ "forks": 1729,
+ "open_issues": 867,
+ "watchers": 1844,
+ "default_branch": "master",
+ "temp_clone_token": null,
+ "network_count": 1729,
+ "subscribers_count": 1724
+}
diff --git a/swh/loader/metadata/tests/test_base.py b/swh/loader/metadata/tests/test_base.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/metadata/tests/test_base.py
@@ -0,0 +1,108 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import datetime
+
+import pytest
+
+from swh.loader.core.loader import BaseLoader
+from swh.loader.metadata.base import BaseMetadataFetcher
+from swh.model.model import (
+ MetadataAuthority,
+ MetadataAuthorityType,
+ MetadataFetcher,
+ Origin,
+ RawExtrinsicMetadata,
+)
+import swh.storage.exc
+
+ORIGIN = Origin(url="some-url")
+
+METADATA_AUTHORITY = MetadataAuthority(
+ type=MetadataAuthorityType.FORGE, url="http://example.org/"
+)
+REMD = RawExtrinsicMetadata(
+ target=ORIGIN.swhid(),
+ discovery_date=datetime.datetime.now(tz=datetime.timezone.utc),
+ authority=METADATA_AUTHORITY,
+ fetcher=MetadataFetcher(
+ name="test fetcher",
+ version="0.0.1",
+ ),
+ format="test-format",
+ metadata=b'{"foo": "bar"}',
+)
+
+
+class DummyLoader(BaseLoader):
+ """Base Loader to overload and simplify the base class (technical: to avoid repetition
+ in other *Loader classes)"""
+
+ visit_type = "git"
+
+ def __init__(self, storage, *args, **kwargs):
+ super().__init__(storage, *args, **kwargs)
+
+ def cleanup(self):
+ pass
+
+ def prepare(self, *args, **kwargs):
+ pass
+
+ def fetch_data(self):
+ pass
+
+ def get_snapshot_id(self):
+ return None
+
+ def store_data(self):
+ pass
+
+
+class DummyMetadataFetcher(BaseMetadataFetcher):
+ SUPPORTED_LISTERS = {"fake-lister"}
+
+ def __init__(self, origin, credentials, lister_name, lister_instance_name):
+ pass
+
+ def get_origin_metadata(self):
+ return [REMD]
+
+
+def test_load(swh_storage, mocker):
+ mocker.patch(
+ "swh.loader.core.metadata_fetchers._fetchers",
+ return_value=[DummyMetadataFetcher],
+ )
+
+ loader = DummyLoader(
+ storage=swh_storage,
+ origin_url=ORIGIN.url,
+ lister_name="fake-lister",
+ lister_instance_name="",
+ )
+ loader.load()
+
+ assert swh_storage.raw_extrinsic_metadata_get(
+ ORIGIN.swhid(), METADATA_AUTHORITY
+ ).results == [REMD]
+
+
+def test_load_unknown_lister(swh_storage, mocker):
+ mocker.patch(
+ "swh.loader.core.metadata_fetchers._fetchers",
+ return_value=[DummyMetadataFetcher],
+ )
+
+ loader = DummyLoader(
+ storage=swh_storage,
+ origin_url=ORIGIN.url,
+ lister_name="other-lister",
+ lister_instance_name="",
+ )
+ loader.load()
+
+ with pytest.raises(swh.storage.exc.StorageArgumentException):
+ swh_storage.raw_extrinsic_metadata_get(ORIGIN.swhid(), METADATA_AUTHORITY)
diff --git a/swh/loader/metadata/tests/test_github.py b/swh/loader/metadata/tests/test_github.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/metadata/tests/test_github.py
@@ -0,0 +1,95 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import datetime
+from pathlib import Path
+from typing import Type
+
+import pkg_resources
+
+from swh.loader.core.metadata_fetchers import MetadataFetcherProtocol
+from swh.loader.metadata import __version__
+from swh.loader.metadata.github import GitHubMetadataFetcher
+from swh.model.model import (
+ MetadataAuthority,
+ MetadataAuthorityType,
+ MetadataFetcher,
+ Origin,
+ RawExtrinsicMetadata,
+)
+
+from .test_base import DummyLoader
+
+ORIGIN = Origin("https://github.com/octocat/Hello-World")
+
+METADATA_AUTHORITY = MetadataAuthority(
+ type=MetadataAuthorityType.FORGE, url="https://github.com"
+)
+
+
+def expected_metadata(dt, datadir):
+ data_file_path = Path(datadir) / "https_api.github.com/repos_octocat_Hello-World"
+ with data_file_path.open("rb") as fd:
+ expected_metadata_bytes = fd.read()
+ return RawExtrinsicMetadata(
+ target=ORIGIN.swhid(),
+ discovery_date=dt,
+ authority=METADATA_AUTHORITY,
+ fetcher=MetadataFetcher(name="swh.loader.metadata.github", version=__version__),
+ format="application/vnd.github.v3+json",
+ metadata=expected_metadata_bytes,
+ )
+
+
+def test_type() -> None:
+ # check with mypy
+ fetcher_cls: Type[MetadataFetcherProtocol]
+ fetcher_cls = GitHubMetadataFetcher
+ print(fetcher_cls)
+
+ # check at runtime
+ fetcher = GitHubMetadataFetcher(
+ ORIGIN,
+ credentials=None,
+ lister_name="github",
+ lister_instance_name="",
+ )
+ assert isinstance(fetcher, MetadataFetcherProtocol)
+
+
+def test_github_metadata(datadir, requests_mock_datadir, mocker):
+ now = datetime.datetime.now(tz=datetime.timezone.utc)
+ mocker.patch("swh.loader.metadata.base.now", return_value=now)
+
+ fetcher = GitHubMetadataFetcher(
+ ORIGIN, credentials=None, lister_name="github", lister_instance_name=""
+ )
+
+ assert fetcher.get_origin_metadata() == [expected_metadata(now, datadir)]
+
+
+def test_github_metadata_from_loader(
+ swh_storage, mocker, datadir, requests_mock_datadir
+):
+ # Fail early if this package is not fully installed
+ assert "github" in {
+ entry_point.name
+ for entry_point in pkg_resources.iter_entry_points("swh.loader.metadata")
+ }
+
+ now = datetime.datetime.now(tz=datetime.timezone.utc)
+ mocker.patch("swh.loader.metadata.base.now", return_value=now)
+
+ loader = DummyLoader(
+ storage=swh_storage,
+ origin_url=ORIGIN.url,
+ lister_name="github",
+ lister_instance_name="",
+ )
+ loader.load()
+
+ assert swh_storage.raw_extrinsic_metadata_get(
+ ORIGIN.swhid(), METADATA_AUTHORITY
+ ).results == [expected_metadata(now, datadir)]
diff --git a/tox.ini b/tox.ini
--- a/tox.ini
+++ b/tox.ini
@@ -6,10 +6,11 @@
testing
deps =
pytest-cov
+ swh.storage[testing]
commands =
pytest --doctest-modules \
- {envsitepackagesdir}/swh/foo \
- --cov={envsitepackagesdir}/swh/foo \
+ {envsitepackagesdir}/swh/loader/metadata \
+ --cov={envsitepackagesdir}/swh/loader/metadata \
--cov-branch {posargs}
[testenv:black]
@@ -31,7 +32,7 @@
extras =
testing
deps =
- mypy==0.920
+ mypy==0.942
commands =
mypy swh
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Jul 2, 10:44 AM (2 w, 2 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3217044
Attached To
D7633: Initialize base metadata fetcher + github
Event Timeline
Log In to Comment