diff --git a/requirements-swh.txt b/requirements-swh.txt index 1678cb4..7c34143 100644 --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -1,2 +1,2 @@ -swh.core[db,github] >= 2.15 +swh.core[db,github] >= 2.16.1 swh.scheduler >= 0.8 diff --git a/swh/lister/packagist/lister.py b/swh/lister/packagist/lister.py index 30d12d9..251c25a 100644 --- a/swh/lister/packagist/lister.py +++ b/swh/lister/packagist/lister.py @@ -1,167 +1,179 @@ # Copyright (C) 2019-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from dataclasses import dataclass from datetime import datetime, timezone import logging from typing import Any, Dict, Iterator, List, Optional import iso8601 import requests +from swh.core.github.utils import GitHubSession from swh.scheduler.interface import SchedulerInterface from swh.scheduler.model import ListedOrigin from ..pattern import CredentialsType, Lister logger = logging.getLogger(__name__) PackagistPageType = List[str] @dataclass class PackagistListerState: """State of Packagist lister""" last_listing_date: Optional[datetime] = None """Last date when packagist lister was executed""" class PackagistLister(Lister[PackagistListerState, PackagistPageType]): """ List all Packagist projects and send associated origins to scheduler. The lister queries the Packagist API, whose documentation can be found at https://packagist.org/apidoc. For each package, its metadata are retrieved using Packagist API endpoints whose responses are served from static files, which are guaranteed to be efficient on the Packagist side (no dymamic queries). Furthermore, subsequent listing will send the "If-Modified-Since" HTTP header to only retrieve packages metadata updated since the previous listing operation in order to save bandwidth and return only origins which might have new released versions. """ LISTER_NAME = "Packagist" PACKAGIST_PACKAGES_LIST_URL = "https://packagist.org/packages/list.json" PACKAGIST_REPO_BASE_URL = "https://repo.packagist.org/p" def __init__( self, scheduler: SchedulerInterface, credentials: CredentialsType = None, ): super().__init__( scheduler=scheduler, url=self.PACKAGIST_PACKAGES_LIST_URL, instance="packagist", credentials=credentials, ) self.session.headers.update({"Accept": "application/json"}) self.listing_date = datetime.now().astimezone(tz=timezone.utc) + self.github_session = GitHubSession( + credentials=self.credentials, + user_agent=str(self.session.headers["User-Agent"]), + ) def state_from_dict(self, d: Dict[str, Any]) -> PackagistListerState: last_listing_date = d.get("last_listing_date") if last_listing_date is not None: d["last_listing_date"] = iso8601.parse_date(last_listing_date) return PackagistListerState(**d) def state_to_dict(self, state: PackagistListerState) -> Dict[str, Any]: d: Dict[str, Optional[str]] = {"last_listing_date": None} last_listing_date = state.last_listing_date if last_listing_date is not None: d["last_listing_date"] = last_listing_date.isoformat() return d def api_request(self, url: str) -> Any: response = self.http_request(url) # response is empty when status code is 304 return response.json() if response.status_code == 200 else {} def get_pages(self) -> Iterator[PackagistPageType]: """ Yield a single page listing all Packagist projects. """ yield self.api_request(self.PACKAGIST_PACKAGES_LIST_URL)["packageNames"] def get_origins_from_page(self, page: PackagistPageType) -> Iterator[ListedOrigin]: """ Iterate on all Packagist projects and yield ListedOrigin instances. """ assert self.lister_obj.id is not None # save some bandwidth by only getting packages metadata updated since # last listing if self.state.last_listing_date is not None: if_modified_since = self.state.last_listing_date.strftime( "%a, %d %b %Y %H:%M:%S GMT" ) self.session.headers["If-Modified-Since"] = if_modified_since # to ensure origins will not be listed multiple times origin_urls = set() for package_name in page: try: metadata = self.api_request( f"{self.PACKAGIST_REPO_BASE_URL}/{package_name}.json" ) if not metadata.get("packages", {}): # package metadata not updated since last listing continue if package_name not in metadata["packages"]: # missing package metadata in response continue versions_info = metadata["packages"][package_name].values() except requests.HTTPError: # error when getting package metadata (usually 404 when a # package has been removed), skip it and process next package continue origin_url = None visit_type = None last_update = None # extract origin url for package, vcs type and latest release date for version_info in versions_info: origin_url = version_info.get("source", {}).get("url", "") if not origin_url: continue # can be git, hg or svn visit_type = version_info.get("source", {}).get("type", "") dist_time_str = version_info.get("time", "") if not dist_time_str: continue dist_time = iso8601.parse_date(dist_time_str) if last_update is None or dist_time > last_update: last_update = dist_time # skip package with already seen origin url or with missing required info if visit_type is None or origin_url is None or origin_url in origin_urls: continue + if visit_type == "git": + # Non-github urls will be returned as is, github ones will be canonical + # ones + origin_url = ( + self.github_session.get_canonical_url(origin_url) or origin_url + ) + # bitbucket closed its mercurial hosting service, those origins can not be # loaded into the archive anymore if visit_type == "hg" and origin_url.startswith("https://bitbucket.org/"): continue origin_urls.add(origin_url) logger.debug( "Found package %s last updated on %s", package_name, last_update ) yield ListedOrigin( lister_id=self.lister_obj.id, url=origin_url, visit_type=visit_type, last_update=last_update, ) def finalize(self) -> None: self.state.last_listing_date = self.listing_date self.updated = True diff --git a/swh/lister/packagist/tests/data/https_api.github.com/repos_gitlky_wx_article b/swh/lister/packagist/tests/data/https_api.github.com/repos_gitlky_wx_article new file mode 100644 index 0000000..3892be4 --- /dev/null +++ b/swh/lister/packagist/tests/data/https_api.github.com/repos_gitlky_wx_article @@ -0,0 +1 @@ +{"html_url": "https://github.com/gitlky/wx_article"} diff --git a/swh/lister/packagist/tests/data/https_api.github.com/repos_spryker-eco_computop-api b/swh/lister/packagist/tests/data/https_api.github.com/repos_spryker-eco_computop-api new file mode 100644 index 0000000..0e84d32 --- /dev/null +++ b/swh/lister/packagist/tests/data/https_api.github.com/repos_spryker-eco_computop-api @@ -0,0 +1,130 @@ +{ + "id": 133818271, + "node_id": "MDEwOlJlcG9zaXRvcnkxMzM4MTgyNzE=", + "name": "computop-api", + "full_name": "spryker-eco/computop-api", + "private": false, + "owner": { + "login": "spryker-eco", + "id": 25103059, + "node_id": "MDEyOk9yZ2FuaXphdGlvbjI1MTAzMDU5", + "avatar_url": "https://avatars.githubusercontent.com/u/25103059?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/spryker-eco", + "html_url": "https://github.com/spryker-eco", + "followers_url": "https://api.github.com/users/spryker-eco/followers", + "following_url": "https://api.github.com/users/spryker-eco/following{/other_user}", + "gists_url": "https://api.github.com/users/spryker-eco/gists{/gist_id}", + "starred_url": "https://api.github.com/users/spryker-eco/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/spryker-eco/subscriptions", + "organizations_url": "https://api.github.com/users/spryker-eco/orgs", + "repos_url": "https://api.github.com/users/spryker-eco/repos", + "events_url": "https://api.github.com/users/spryker-eco/events{/privacy}", + "received_events_url": "https://api.github.com/users/spryker-eco/received_events", + "type": "Organization", + "site_admin": false + }, + "html_url": "https://github.com/spryker-eco/computop-api", + "description": "Spryker Eco ComputopApi module", + "fork": false, + "url": "https://api.github.com/repos/spryker-eco/computop-api", + "forks_url": "https://api.github.com/repos/spryker-eco/computop-api/forks", + "keys_url": "https://api.github.com/repos/spryker-eco/computop-api/keys{/key_id}", + "collaborators_url": "https://api.github.com/repos/spryker-eco/computop-api/collaborators{/collaborator}", + "teams_url": "https://api.github.com/repos/spryker-eco/computop-api/teams", + "hooks_url": "https://api.github.com/repos/spryker-eco/computop-api/hooks", + "issue_events_url": "https://api.github.com/repos/spryker-eco/computop-api/issues/events{/number}", + "events_url": "https://api.github.com/repos/spryker-eco/computop-api/events", + "assignees_url": "https://api.github.com/repos/spryker-eco/computop-api/assignees{/user}", + "branches_url": "https://api.github.com/repos/spryker-eco/computop-api/branches{/branch}", + "tags_url": "https://api.github.com/repos/spryker-eco/computop-api/tags", + "blobs_url": "https://api.github.com/repos/spryker-eco/computop-api/git/blobs{/sha}", + "git_tags_url": "https://api.github.com/repos/spryker-eco/computop-api/git/tags{/sha}", + "git_refs_url": "https://api.github.com/repos/spryker-eco/computop-api/git/refs{/sha}", + "trees_url": "https://api.github.com/repos/spryker-eco/computop-api/git/trees{/sha}", + "statuses_url": "https://api.github.com/repos/spryker-eco/computop-api/statuses/{sha}", + "languages_url": "https://api.github.com/repos/spryker-eco/computop-api/languages", + "stargazers_url": "https://api.github.com/repos/spryker-eco/computop-api/stargazers", + "contributors_url": "https://api.github.com/repos/spryker-eco/computop-api/contributors", + "subscribers_url": "https://api.github.com/repos/spryker-eco/computop-api/subscribers", + "subscription_url": "https://api.github.com/repos/spryker-eco/computop-api/subscription", + "commits_url": "https://api.github.com/repos/spryker-eco/computop-api/commits{/sha}", + "git_commits_url": "https://api.github.com/repos/spryker-eco/computop-api/git/commits{/sha}", + "comments_url": "https://api.github.com/repos/spryker-eco/computop-api/comments{/number}", + "issue_comment_url": "https://api.github.com/repos/spryker-eco/computop-api/issues/comments{/number}", + "contents_url": "https://api.github.com/repos/spryker-eco/computop-api/contents/{+path}", + "compare_url": "https://api.github.com/repos/spryker-eco/computop-api/compare/{base}...{head}", + "merges_url": "https://api.github.com/repos/spryker-eco/computop-api/merges", + "archive_url": "https://api.github.com/repos/spryker-eco/computop-api/{archive_format}{/ref}", + "downloads_url": "https://api.github.com/repos/spryker-eco/computop-api/downloads", + "issues_url": "https://api.github.com/repos/spryker-eco/computop-api/issues{/number}", + "pulls_url": "https://api.github.com/repos/spryker-eco/computop-api/pulls{/number}", + "milestones_url": "https://api.github.com/repos/spryker-eco/computop-api/milestones{/number}", + "notifications_url": "https://api.github.com/repos/spryker-eco/computop-api/notifications{?since,all,participating}", + "labels_url": "https://api.github.com/repos/spryker-eco/computop-api/labels{/name}", + "releases_url": "https://api.github.com/repos/spryker-eco/computop-api/releases{/id}", + "deployments_url": "https://api.github.com/repos/spryker-eco/computop-api/deployments", + "created_at": "2018-05-17T13:34:07Z", + "updated_at": "2021-12-28T13:55:55Z", + "pushed_at": "2022-03-18T14:05:09Z", + "git_url": "git://github.com/spryker-eco/computop-api.git", + "ssh_url": "git@github.com:spryker-eco/computop-api.git", + "clone_url": "https://github.com/spryker-eco/computop-api.git", + "svn_url": "https://github.com/spryker-eco/computop-api", + "homepage": "https://spryker.com", + "size": 198, + "stargazers_count": 0, + "watchers_count": 0, + "language": "PHP", + "has_issues": true, + "has_projects": false, + "has_downloads": true, + "has_wiki": true, + "has_pages": false, + "forks_count": 0, + "mirror_url": null, + "archived": false, + "disabled": false, + "open_issues_count": 0, + "license": { + "key": "mit", + "name": "MIT License", + "spdx_id": "MIT", + "url": "https://api.github.com/licenses/mit", + "node_id": "MDc6TGljZW5zZTEz" + }, + "allow_forking": true, + "is_template": false, + "web_commit_signoff_required": false, + "topics": [ + + ], + "visibility": "public", + "forks": 0, + "open_issues": 0, + "watchers": 0, + "default_branch": "master", + "temp_clone_token": null, + "organization": { + "login": "spryker-eco", + "id": 25103059, + "node_id": "MDEyOk9yZ2FuaXphdGlvbjI1MTAzMDU5", + "avatar_url": "https://avatars.githubusercontent.com/u/25103059?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/spryker-eco", + "html_url": "https://github.com/spryker-eco", + "followers_url": "https://api.github.com/users/spryker-eco/followers", + "following_url": "https://api.github.com/users/spryker-eco/following{/other_user}", + "gists_url": "https://api.github.com/users/spryker-eco/gists{/gist_id}", + "starred_url": "https://api.github.com/users/spryker-eco/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/spryker-eco/subscriptions", + "organizations_url": "https://api.github.com/users/spryker-eco/orgs", + "repos_url": "https://api.github.com/users/spryker-eco/repos", + "events_url": "https://api.github.com/users/spryker-eco/events{/privacy}", + "received_events_url": "https://api.github.com/users/spryker-eco/received_events", + "type": "Organization", + "site_admin": false + }, + "network_count": 0, + "subscribers_count": 33 +} diff --git a/swh/lister/packagist/tests/data/https_api.github.com/repos_ycms_module-main b/swh/lister/packagist/tests/data/https_api.github.com/repos_ycms_module-main new file mode 100644 index 0000000..e1b4664 --- /dev/null +++ b/swh/lister/packagist/tests/data/https_api.github.com/repos_ycms_module-main @@ -0,0 +1,104 @@ +{ + "id": 38592537, + "node_id": "MDEwOlJlcG9zaXRvcnkzODU5MjUzNw==", + "name": "module-main", + "full_name": "GameCHN/module-main", + "private": false, + "owner": { + "login": "GameCHN", + "id": 13175811, + "node_id": "MDQ6VXNlcjEzMTc1ODEx", + "avatar_url": "https://avatars.githubusercontent.com/u/13175811?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/GameCHN", + "html_url": "https://github.com/GameCHN", + "followers_url": "https://api.github.com/users/GameCHN/followers", + "following_url": "https://api.github.com/users/GameCHN/following{/other_user}", + "gists_url": "https://api.github.com/users/GameCHN/gists{/gist_id}", + "starred_url": "https://api.github.com/users/GameCHN/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/GameCHN/subscriptions", + "organizations_url": "https://api.github.com/users/GameCHN/orgs", + "repos_url": "https://api.github.com/users/GameCHN/repos", + "events_url": "https://api.github.com/users/GameCHN/events{/privacy}", + "received_events_url": "https://api.github.com/users/GameCHN/received_events", + "type": "User", + "site_admin": false + }, + "html_url": "https://github.com/GameCHN/module-main", + "description": null, + "fork": false, + "url": "https://api.github.com/repos/GameCHN/module-main", + "forks_url": "https://api.github.com/repos/GameCHN/module-main/forks", + "keys_url": "https://api.github.com/repos/GameCHN/module-main/keys{/key_id}", + "collaborators_url": "https://api.github.com/repos/GameCHN/module-main/collaborators{/collaborator}", + "teams_url": "https://api.github.com/repos/GameCHN/module-main/teams", + "hooks_url": "https://api.github.com/repos/GameCHN/module-main/hooks", + "issue_events_url": "https://api.github.com/repos/GameCHN/module-main/issues/events{/number}", + "events_url": "https://api.github.com/repos/GameCHN/module-main/events", + "assignees_url": "https://api.github.com/repos/GameCHN/module-main/assignees{/user}", + "branches_url": "https://api.github.com/repos/GameCHN/module-main/branches{/branch}", + "tags_url": "https://api.github.com/repos/GameCHN/module-main/tags", + "blobs_url": "https://api.github.com/repos/GameCHN/module-main/git/blobs{/sha}", + "git_tags_url": "https://api.github.com/repos/GameCHN/module-main/git/tags{/sha}", + "git_refs_url": "https://api.github.com/repos/GameCHN/module-main/git/refs{/sha}", + "trees_url": "https://api.github.com/repos/GameCHN/module-main/git/trees{/sha}", + "statuses_url": "https://api.github.com/repos/GameCHN/module-main/statuses/{sha}", + "languages_url": "https://api.github.com/repos/GameCHN/module-main/languages", + "stargazers_url": "https://api.github.com/repos/GameCHN/module-main/stargazers", + "contributors_url": "https://api.github.com/repos/GameCHN/module-main/contributors", + "subscribers_url": "https://api.github.com/repos/GameCHN/module-main/subscribers", + "subscription_url": "https://api.github.com/repos/GameCHN/module-main/subscription", + "commits_url": "https://api.github.com/repos/GameCHN/module-main/commits{/sha}", + "git_commits_url": "https://api.github.com/repos/GameCHN/module-main/git/commits{/sha}", + "comments_url": "https://api.github.com/repos/GameCHN/module-main/comments{/number}", + "issue_comment_url": "https://api.github.com/repos/GameCHN/module-main/issues/comments{/number}", + "contents_url": "https://api.github.com/repos/GameCHN/module-main/contents/{+path}", + "compare_url": "https://api.github.com/repos/GameCHN/module-main/compare/{base}...{head}", + "merges_url": "https://api.github.com/repos/GameCHN/module-main/merges", + "archive_url": "https://api.github.com/repos/GameCHN/module-main/{archive_format}{/ref}", + "downloads_url": "https://api.github.com/repos/GameCHN/module-main/downloads", + "issues_url": "https://api.github.com/repos/GameCHN/module-main/issues{/number}", + "pulls_url": "https://api.github.com/repos/GameCHN/module-main/pulls{/number}", + "milestones_url": "https://api.github.com/repos/GameCHN/module-main/milestones{/number}", + "notifications_url": "https://api.github.com/repos/GameCHN/module-main/notifications{?since,all,participating}", + "labels_url": "https://api.github.com/repos/GameCHN/module-main/labels{/name}", + "releases_url": "https://api.github.com/repos/GameCHN/module-main/releases{/id}", + "deployments_url": "https://api.github.com/repos/GameCHN/module-main/deployments", + "created_at": "2015-07-06T02:08:07Z", + "updated_at": "2015-07-06T02:09:45Z", + "pushed_at": "2015-08-23T04:42:42Z", + "git_url": "git://github.com/GameCHN/module-main.git", + "ssh_url": "git@github.com:GameCHN/module-main.git", + "clone_url": "https://github.com/GameCHN/module-main.git", + "svn_url": "https://github.com/GameCHN/module-main", + "homepage": null, + "size": 172, + "stargazers_count": 0, + "watchers_count": 0, + "language": "PHP", + "has_issues": true, + "has_projects": true, + "has_downloads": true, + "has_wiki": true, + "has_pages": false, + "forks_count": 0, + "mirror_url": null, + "archived": false, + "disabled": false, + "open_issues_count": 0, + "license": null, + "allow_forking": true, + "is_template": false, + "web_commit_signoff_required": false, + "topics": [ + + ], + "visibility": "public", + "forks": 0, + "open_issues": 0, + "watchers": 0, + "default_branch": "master", + "temp_clone_token": null, + "network_count": 0, + "subscribers_count": 2 +} diff --git a/swh/lister/packagist/tests/data/idevlab_essential.json b/swh/lister/packagist/tests/data/idevlab_essential.json new file mode 100644 index 0000000..9891d8c --- /dev/null +++ b/swh/lister/packagist/tests/data/idevlab_essential.json @@ -0,0 +1,309 @@ +{ + "packages": { + "idevlab/essential": { + "1.0.1": { + "name": "idevlab/essential", + "description": "All the methods and tools useful for the development of the various idevlab projects.", + "keywords": [], + "homepage": "", + "version": "1.0.1", + "version_normalized": "1.0.1.0", + "license": [], + "authors": [ + { + "name": "Florian Sinama", + "email": "f.sinama@gmail.com" + } + ], + "source": { + "url": "git@gitlab.com:idevlab/Essential.git", + "type": "git", + "reference": "6ff62de2e789aae308f3ff6fb11ea5955c806e19" + }, + "dist": { + "url": "https://gitlab.com/api/v4/projects/idevlab%2FEssential/repository/archive.zip?sha=6ff62de2e789aae308f3ff6fb11ea5955c806e19", + "type": "zip", + "shasum": "", + "reference": "6ff62de2e789aae308f3ff6fb11ea5955c806e19" + }, + "type": "library", + "time": "2022-08-17T21:57:20+00:00", + "autoload": { + "psr-4": { + "Idevlab\\Essential\\": "src/" + } + }, + "require": { + "php": "^8.1" + }, + "uid": 6622550 + }, + "1.1.0": { + "name": "idevlab/essential", + "description": "All the methods and tools useful for the development of the various idevlab projects.", + "keywords": [], + "homepage": "", + "version": "1.1.0", + "version_normalized": "1.1.0.0", + "license": [], + "authors": [ + { + "name": "Florian Sinama", + "email": "f.sinama@gmail.com" + } + ], + "source": { + "url": "git@gitlab.com:idevlab/Essential.git", + "type": "git", + "reference": "e69bdd42f03d7c453be072eef0c62f7cfeae2af8" + }, + "dist": { + "url": "https://gitlab.com/api/v4/projects/idevlab%2FEssential/repository/archive.zip?sha=e69bdd42f03d7c453be072eef0c62f7cfeae2af8", + "type": "zip", + "shasum": "", + "reference": "e69bdd42f03d7c453be072eef0c62f7cfeae2af8" + }, + "type": "library", + "time": "2022-08-17T22:11:20+00:00", + "autoload": { + "psr-4": { + "Idevlab\\Essential\\": "src/" + } + }, + "require": { + "php": "^8.1", + "ext-mbstring": "*" + }, + "uid": 6622551 + }, + "1.1.1": { + "name": "idevlab/essential", + "description": "All the methods and tools useful for the development of the various idevlab projects.", + "keywords": [], + "homepage": "", + "version": "1.1.1", + "version_normalized": "1.1.1.0", + "license": [], + "authors": [ + { + "name": "Florian Sinama", + "email": "f.sinama@gmail.com" + } + ], + "source": { + "url": "git@gitlab.com:idevlab/Essential.git", + "type": "git", + "reference": "c6f5113ffad27402a9cafe593efd518517bc321c" + }, + "dist": { + "url": "https://gitlab.com/api/v4/projects/idevlab%2FEssential/repository/archive.zip?sha=c6f5113ffad27402a9cafe593efd518517bc321c", + "type": "zip", + "shasum": "", + "reference": "c6f5113ffad27402a9cafe593efd518517bc321c" + }, + "type": "library", + "time": "2022-08-17T22:31:14+00:00", + "autoload": { + "psr-4": { + "Idevlab\\Essential\\": "src/" + } + }, + "require": { + "php": "^8.1", + "ext-mbstring": "*" + }, + "uid": 6622552 + }, + "1.2.0": { + "name": "idevlab/essential", + "description": "All the methods and tools useful for the development of the various idevlab projects.", + "keywords": [], + "homepage": "", + "version": "1.2.0", + "version_normalized": "1.2.0.0", + "license": [], + "authors": [ + { + "name": "Florian Sinama", + "email": "f.sinama@gmail.com" + } + ], + "source": { + "url": "git@gitlab.com:idevlab/Essential.git", + "type": "git", + "reference": "263fd95fcadfbfd4af6108749be0699d69d4df90" + }, + "dist": { + "url": "https://gitlab.com/api/v4/projects/idevlab%2FEssential/repository/archive.zip?sha=263fd95fcadfbfd4af6108749be0699d69d4df90", + "type": "zip", + "shasum": "", + "reference": "263fd95fcadfbfd4af6108749be0699d69d4df90" + }, + "type": "library", + "time": "2022-10-12T10:34:29+00:00", + "autoload": { + "psr-4": { + "Idevlab\\Essential\\": "src/" + } + }, + "require": { + "php": "^8.1", + "ext-mbstring": "*" + }, + "uid": 6624846 + }, + "dev-develop": { + "name": "idevlab/essential", + "description": "All the methods and tools useful for the development of the various idevlab projects.", + "keywords": [], + "homepage": "", + "version": "dev-develop", + "version_normalized": "dev-develop", + "license": [], + "authors": [ + { + "name": "Florian Sinama", + "email": "f.sinama@gmail.com" + } + ], + "source": { + "url": "git@gitlab.com:idevlab/Essential.git", + "type": "git", + "reference": "8125bcc747e1bf5086a3195f74a682b7be0aea6a" + }, + "dist": { + "url": "https://gitlab.com/api/v4/projects/idevlab%2FEssential/repository/archive.zip?sha=8125bcc747e1bf5086a3195f74a682b7be0aea6a", + "type": "zip", + "shasum": "", + "reference": "8125bcc747e1bf5086a3195f74a682b7be0aea6a" + }, + "type": "library", + "time": "2022-10-12T10:34:29+00:00", + "autoload": { + "psr-4": { + "Idevlab\\Essential\\": "src/" + } + }, + "require": { + "php": "^8.1", + "ext-mbstring": "*" + }, + "uid": 6622554 + }, + "dev-main": { + "name": "idevlab/essential", + "description": "All the methods and tools useful for the development of the various idevlab projects.", + "keywords": [], + "homepage": "", + "version": "dev-main", + "version_normalized": "dev-main", + "license": [], + "authors": [ + { + "name": "Florian Sinama", + "email": "f.sinama@gmail.com" + } + ], + "source": { + "url": "git@gitlab.com:idevlab/Essential.git", + "type": "git", + "reference": "263fd95fcadfbfd4af6108749be0699d69d4df90" + }, + "dist": { + "url": "https://gitlab.com/api/v4/projects/idevlab%2FEssential/repository/archive.zip?sha=263fd95fcadfbfd4af6108749be0699d69d4df90", + "type": "zip", + "shasum": "", + "reference": "263fd95fcadfbfd4af6108749be0699d69d4df90" + }, + "type": "library", + "time": "2022-10-12T10:34:29+00:00", + "autoload": { + "psr-4": { + "Idevlab\\Essential\\": "src/" + } + }, + "default-branch": true, + "require": { + "php": "^8.1", + "ext-mbstring": "*" + }, + "uid": 6622553 + }, + "dev-release/1.2.0": { + "name": "idevlab/essential", + "description": "All the methods and tools useful for the development of the various idevlab projects.", + "keywords": [], + "homepage": "", + "version": "dev-release/1.2.0", + "version_normalized": "dev-release/1.2.0", + "license": [], + "authors": [ + { + "name": "Florian Sinama", + "email": "f.sinama@gmail.com" + } + ], + "source": { + "url": "git@gitlab.com:idevlab/Essential.git", + "type": "git", + "reference": "1688c6cb5f3b0e9f21a5bbcb9f9951138ab51a66" + }, + "dist": { + "url": "https://gitlab.com/api/v4/projects/idevlab%2FEssential/repository/archive.zip?sha=1688c6cb5f3b0e9f21a5bbcb9f9951138ab51a66", + "type": "zip", + "shasum": "", + "reference": "1688c6cb5f3b0e9f21a5bbcb9f9951138ab51a66" + }, + "type": "library", + "time": "2022-10-12T10:34:14+00:00", + "autoload": { + "psr-4": { + "Idevlab\\Essential\\": "src/" + } + }, + "require": { + "php": "^8.1", + "ext-mbstring": "*" + }, + "uid": 6624845 + }, + "v1.0.0": { + "name": "idevlab/essential", + "description": "All the methods and tools useful for the development of the various idevlab projects.", + "keywords": [], + "homepage": "", + "version": "v1.0.0", + "version_normalized": "1.0.0.0", + "license": [], + "authors": [ + { + "name": "Florian Sinama", + "email": "f.sinama@gmail.com" + } + ], + "source": { + "url": "git@gitlab.com:idevlab/Essential.git", + "type": "git", + "reference": "b86767e1baf9ed9c218abcba963361876d55138a" + }, + "dist": { + "url": "https://gitlab.com/api/v4/projects/idevlab%2FEssential/repository/archive.zip?sha=b86767e1baf9ed9c218abcba963361876d55138a", + "type": "zip", + "shasum": "", + "reference": "b86767e1baf9ed9c218abcba963361876d55138a" + }, + "type": "library", + "time": "2022-08-17T21:34:05+00:00", + "autoload": { + "psr-4": { + "Idevlab\\Essential\\": "src/" + } + }, + "require": { + "php": "^8.1" + }, + "uid": 6622549 + } + } + } +} diff --git a/swh/lister/packagist/tests/data/ycms_module-main.json b/swh/lister/packagist/tests/data/ycms_module-main.json new file mode 100644 index 0000000..ae22e39 --- /dev/null +++ b/swh/lister/packagist/tests/data/ycms_module-main.json @@ -0,0 +1,41 @@ +{ + "packages": { + "ycms/module-main": { + "dev-master": { + "name": "ycms/module-main", + "description": "", + "keywords": [], + "homepage": "", + "version": "dev-master", + "version_normalized": "9999999-dev", + "license": [], + "authors": [ + { + "name": "YCMS Labs", + "email": "ycms.net@gmail.com" + } + ], + "source": { + "type": "git", + "url": "git@github.com:ycms/module-main.git", + "reference": "1173796881fbd7202009a68a2a59a5150bf2dbc6" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/ycms/module-main/zipball/1173796881fbd7202009a68a2a59a5150bf2dbc6", + "reference": "1173796881fbd7202009a68a2a59a5150bf2dbc6", + "shasum": "" + }, + "type": "wordpress-plugin", + "time": "2015-08-23T04:42:33+00:00", + "autoload": { + "psr-4": { + "YC\\Main\\": "" + } + }, + "default-branch": true, + "uid": 4064797 + } + } + } +} diff --git a/swh/lister/packagist/tests/test_lister.py b/swh/lister/packagist/tests/test_lister.py index 7b5919a..e2782ee 100644 --- a/swh/lister/packagist/tests/test_lister.py +++ b/swh/lister/packagist/tests/test_lister.py @@ -1,161 +1,199 @@ # Copyright (C) 2019-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import datetime import json from pathlib import Path from swh.lister.packagist.lister import PackagistLister _packages_list = { "packageNames": [ "ljjackson/linnworks", "lky/wx_article", "spryker-eco/computop-api", + "idevlab/essential", ] } def _package_metadata(datadir, package_name): return json.loads( Path(datadir, f"{package_name.replace('/', '_')}.json").read_text() ) def _request_without_if_modified_since(request): return request.headers.get("If-Modified-Since") is None def _request_with_if_modified_since(request): return request.headers.get("If-Modified-Since") is not None -def test_packagist_lister(swh_scheduler, requests_mock, datadir): +def test_packagist_lister(swh_scheduler, requests_mock, datadir, requests_mock_datadir): # first listing, should return one origin per package lister = PackagistLister(scheduler=swh_scheduler) requests_mock.get(lister.PACKAGIST_PACKAGES_LIST_URL, json=_packages_list) packages_metadata = {} for package_name in _packages_list["packageNames"]: metadata = _package_metadata(datadir, package_name) packages_metadata[package_name] = metadata requests_mock.get( f"{lister.PACKAGIST_REPO_BASE_URL}/{package_name}.json", json=metadata, additional_matcher=_request_without_if_modified_since, ) stats = lister.run() assert stats.pages == 1 assert stats.origins == len(_packages_list["packageNames"]) assert lister.updated expected_origins = { ( - "https://github.com/gitlky/wx_article.git", + "https://github.com/gitlky/wx_article", # standard case "git", datetime.datetime.fromisoformat("2018-08-30T07:37:09+00:00"), ), ( - "https://github.com/ljjackson/linnworks.git", + "https://github.com/ljjackson/linnworks.git", # API goes 404 "git", datetime.datetime.fromisoformat("2018-11-01T21:45:50+00:00"), ), ( - "https://github.com/spryker-eco/computop-api.git", + "https://github.com/spryker-eco/computop-api", # SSH URL in manifest "git", datetime.datetime.fromisoformat("2020-06-22T15:50:29+00:00"), ), + ( + "git@gitlab.com:idevlab/Essential.git", # not GitHub + "git", + datetime.datetime.fromisoformat("2022-10-12T10:34:29+00:00"), + ), } assert expected_origins == { (o.url, o.visit_type, o.last_update) for o in swh_scheduler.get_listed_origins(lister.lister_obj.id).results } # second listing, should return 0 origins as no package metadata # has been updated since first listing lister = PackagistLister(scheduler=swh_scheduler) for package_name in _packages_list["packageNames"]: requests_mock.get( f"{lister.PACKAGIST_REPO_BASE_URL}/{package_name}.json", additional_matcher=_request_with_if_modified_since, status_code=304, ) assert lister.get_state_from_scheduler().last_listing_date is not None stats = lister.run() assert stats.pages == 1 assert stats.origins == 0 assert lister.updated assert expected_origins == { (o.url, o.visit_type, o.last_update) for o in swh_scheduler.get_listed_origins(lister.lister_obj.id).results } def test_packagist_lister_missing_metadata(swh_scheduler, requests_mock, datadir): lister = PackagistLister(scheduler=swh_scheduler) requests_mock.get(lister.PACKAGIST_PACKAGES_LIST_URL, json=_packages_list) for package_name in _packages_list["packageNames"]: requests_mock.get( f"{lister.PACKAGIST_REPO_BASE_URL}/{package_name}.json", additional_matcher=_request_without_if_modified_since, status_code=404, ) stats = lister.run() assert stats.pages == 1 assert stats.origins == 0 def test_packagist_lister_empty_metadata(swh_scheduler, requests_mock, datadir): lister = PackagistLister(scheduler=swh_scheduler) requests_mock.get(lister.PACKAGIST_PACKAGES_LIST_URL, json=_packages_list) for package_name in _packages_list["packageNames"]: requests_mock.get( f"{lister.PACKAGIST_REPO_BASE_URL}/{package_name}.json", additional_matcher=_request_without_if_modified_since, json={"packages": {}}, ) stats = lister.run() assert stats.pages == 1 assert stats.origins == 0 def test_packagist_lister_package_with_bitbucket_hg_origin( swh_scheduler, requests_mock, datadir ): package_name = "den1n/contextmenu" lister = PackagistLister(scheduler=swh_scheduler) requests_mock.get( lister.PACKAGIST_PACKAGES_LIST_URL, json={"packageNames": [package_name]} ) requests_mock.get( f"{lister.PACKAGIST_REPO_BASE_URL}/{package_name}.json", additional_matcher=_request_without_if_modified_since, json=_package_metadata(datadir, package_name), ) stats = lister.run() assert stats.pages == 1 assert stats.origins == 0 +def test_packagist_lister_package_normalize_github_origin( + swh_scheduler, requests_mock, datadir, requests_mock_datadir +): + package_name = "ycms/module-main" + lister = PackagistLister(scheduler=swh_scheduler) + requests_mock.get( + lister.PACKAGIST_PACKAGES_LIST_URL, json={"packageNames": [package_name]} + ) + requests_mock.get( + f"{lister.PACKAGIST_REPO_BASE_URL}/{package_name}.json", + additional_matcher=_request_without_if_modified_since, + json=_package_metadata(datadir, package_name), + ) + + stats = lister.run() + + assert stats.pages == 1 + assert stats.origins == 1 + + expected_origins = { + ( + "https://github.com/GameCHN/module-main", + "git", + datetime.datetime.fromisoformat("2015-08-23T04:42:33+00:00"), + ), + } + assert expected_origins == { + (o.url, o.visit_type, o.last_update) + for o in swh_scheduler.get_listed_origins(lister.lister_obj.id).results + } + + def test_lister_from_configfile(swh_scheduler_config, mocker): load_from_envvar = mocker.patch("swh.lister.pattern.load_from_envvar") load_from_envvar.return_value = { "scheduler": {"cls": "local", **swh_scheduler_config}, "credentials": {}, } lister = PackagistLister.from_configfile() assert lister.scheduler is not None assert lister.credentials is not None