diff --git a/swh/lister/maven/tests/data/https_api.github.com/repos_aldialimucaj_sprova4j b/swh/lister/maven/tests/data/https_api.github.com/repos_aldialimucaj_sprova4j new file mode 100644 index 0000000..8f5deb4 --- /dev/null +++ b/swh/lister/maven/tests/data/https_api.github.com/repos_aldialimucaj_sprova4j @@ -0,0 +1,110 @@ +{ + "id": 133475101, + "node_id": "MDEwOlJlcG9zaXRvcnkxMzM0NzUxMDE=", + "name": "sprova4j", + "full_name": "aldialimucaj/sprova4j", + "private": false, + "owner": { + "login": "aldialimucaj", + "id": 782755, + "node_id": "MDQ6VXNlcjc4Mjc1NQ==", + "avatar_url": "https://avatars.githubusercontent.com/u/782755?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/aldialimucaj", + "html_url": "https://github.com/aldialimucaj", + "followers_url": "https://api.github.com/users/aldialimucaj/followers", + "following_url": "https://api.github.com/users/aldialimucaj/following{/other_user}", + "gists_url": "https://api.github.com/users/aldialimucaj/gists{/gist_id}", + "starred_url": "https://api.github.com/users/aldialimucaj/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/aldialimucaj/subscriptions", + "organizations_url": "https://api.github.com/users/aldialimucaj/orgs", + "repos_url": "https://api.github.com/users/aldialimucaj/repos", + "events_url": "https://api.github.com/users/aldialimucaj/events{/privacy}", + "received_events_url": "https://api.github.com/users/aldialimucaj/received_events", + "type": "User", + "site_admin": false + }, + "html_url": "https://github.com/aldialimucaj/sprova4j", + "description": "Java client for Sprova Test Framework", + "fork": false, + "url": "https://api.github.com/repos/aldialimucaj/sprova4j", + "forks_url": "https://api.github.com/repos/aldialimucaj/sprova4j/forks", + "keys_url": "https://api.github.com/repos/aldialimucaj/sprova4j/keys{/key_id}", + "collaborators_url": "https://api.github.com/repos/aldialimucaj/sprova4j/collaborators{/collaborator}", + "teams_url": "https://api.github.com/repos/aldialimucaj/sprova4j/teams", + "hooks_url": "https://api.github.com/repos/aldialimucaj/sprova4j/hooks", + "issue_events_url": "https://api.github.com/repos/aldialimucaj/sprova4j/issues/events{/number}", + "events_url": "https://api.github.com/repos/aldialimucaj/sprova4j/events", + "assignees_url": "https://api.github.com/repos/aldialimucaj/sprova4j/assignees{/user}", + "branches_url": "https://api.github.com/repos/aldialimucaj/sprova4j/branches{/branch}", + "tags_url": "https://api.github.com/repos/aldialimucaj/sprova4j/tags", + "blobs_url": "https://api.github.com/repos/aldialimucaj/sprova4j/git/blobs{/sha}", + "git_tags_url": "https://api.github.com/repos/aldialimucaj/sprova4j/git/tags{/sha}", + "git_refs_url": "https://api.github.com/repos/aldialimucaj/sprova4j/git/refs{/sha}", + "trees_url": "https://api.github.com/repos/aldialimucaj/sprova4j/git/trees{/sha}", + "statuses_url": "https://api.github.com/repos/aldialimucaj/sprova4j/statuses/{sha}", + "languages_url": "https://api.github.com/repos/aldialimucaj/sprova4j/languages", + "stargazers_url": "https://api.github.com/repos/aldialimucaj/sprova4j/stargazers", + "contributors_url": "https://api.github.com/repos/aldialimucaj/sprova4j/contributors", + "subscribers_url": "https://api.github.com/repos/aldialimucaj/sprova4j/subscribers", + "subscription_url": "https://api.github.com/repos/aldialimucaj/sprova4j/subscription", + "commits_url": "https://api.github.com/repos/aldialimucaj/sprova4j/commits{/sha}", + "git_commits_url": "https://api.github.com/repos/aldialimucaj/sprova4j/git/commits{/sha}", + "comments_url": "https://api.github.com/repos/aldialimucaj/sprova4j/comments{/number}", + "issue_comment_url": "https://api.github.com/repos/aldialimucaj/sprova4j/issues/comments{/number}", + "contents_url": "https://api.github.com/repos/aldialimucaj/sprova4j/contents/{+path}", + "compare_url": "https://api.github.com/repos/aldialimucaj/sprova4j/compare/{base}...{head}", + "merges_url": "https://api.github.com/repos/aldialimucaj/sprova4j/merges", + "archive_url": "https://api.github.com/repos/aldialimucaj/sprova4j/{archive_format}{/ref}", + "downloads_url": "https://api.github.com/repos/aldialimucaj/sprova4j/downloads", + "issues_url": "https://api.github.com/repos/aldialimucaj/sprova4j/issues{/number}", + "pulls_url": "https://api.github.com/repos/aldialimucaj/sprova4j/pulls{/number}", + "milestones_url": "https://api.github.com/repos/aldialimucaj/sprova4j/milestones{/number}", + "notifications_url": "https://api.github.com/repos/aldialimucaj/sprova4j/notifications{?since,all,participating}", + "labels_url": "https://api.github.com/repos/aldialimucaj/sprova4j/labels{/name}", + "releases_url": "https://api.github.com/repos/aldialimucaj/sprova4j/releases{/id}", + "deployments_url": "https://api.github.com/repos/aldialimucaj/sprova4j/deployments", + "created_at": "2018-05-15T07:15:09Z", + "updated_at": "2019-08-13T11:29:54Z", + "pushed_at": "2018-11-08T20:43:59Z", + "git_url": "git://github.com/aldialimucaj/sprova4j.git", + "ssh_url": "git@github.com:aldialimucaj/sprova4j.git", + "clone_url": "https://github.com/aldialimucaj/sprova4j.git", + "svn_url": "https://github.com/aldialimucaj/sprova4j", + "homepage": null, + "size": 87, + "stargazers_count": 2, + "watchers_count": 2, + "language": "Java", + "has_issues": true, + "has_projects": true, + "has_downloads": true, + "has_wiki": true, + "has_pages": false, + "forks_count": 0, + "mirror_url": null, + "archived": false, + "disabled": false, + "open_issues_count": 0, + "license": { + "key": "apache-2.0", + "name": "Apache License 2.0", + "spdx_id": "Apache-2.0", + "url": "https://api.github.com/licenses/apache-2.0", + "node_id": "MDc6TGljZW5zZTI=" + }, + "allow_forking": true, + "is_template": false, + "web_commit_signoff_required": false, + "topics": [ + + ], + "visibility": "public", + "forks": 0, + "open_issues": 0, + "watchers": 2, + "default_branch": "master", + "temp_clone_token": null, + "network_count": 0, + "subscribers_count": 1 +} diff --git a/swh/lister/maven/tests/data/https_api.github.com/repos_arangodb-community_arangodb-graphql-java b/swh/lister/maven/tests/data/https_api.github.com/repos_arangodb-community_arangodb-graphql-java new file mode 100644 index 0000000..3342375 --- /dev/null +++ b/swh/lister/maven/tests/data/https_api.github.com/repos_arangodb-community_arangodb-graphql-java @@ -0,0 +1,130 @@ +{ + "id": 203772666, + "node_id": "MDEwOlJlcG9zaXRvcnkyMDM3NzI2NjY=", + "name": "arangodb-graphql-java", + "full_name": "ArangoDB-Community/arangodb-graphql-java", + "private": false, + "owner": { + "login": "ArangoDB-Community", + "id": 37540306, + "node_id": "MDEyOk9yZ2FuaXphdGlvbjM3NTQwMzA2", + "avatar_url": "https://avatars.githubusercontent.com/u/37540306?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/ArangoDB-Community", + "html_url": "https://github.com/ArangoDB-Community", + "followers_url": "https://api.github.com/users/ArangoDB-Community/followers", + "following_url": "https://api.github.com/users/ArangoDB-Community/following{/other_user}", + "gists_url": "https://api.github.com/users/ArangoDB-Community/gists{/gist_id}", + "starred_url": "https://api.github.com/users/ArangoDB-Community/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/ArangoDB-Community/subscriptions", + "organizations_url": "https://api.github.com/users/ArangoDB-Community/orgs", + "repos_url": "https://api.github.com/users/ArangoDB-Community/repos", + "events_url": "https://api.github.com/users/ArangoDB-Community/events{/privacy}", + "received_events_url": "https://api.github.com/users/ArangoDB-Community/received_events", + "type": "Organization", + "site_admin": false + }, + "html_url": "https://github.com/ArangoDB-Community/arangodb-graphql-java", + "description": null, + "fork": false, + "url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java", + "forks_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/forks", + "keys_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/keys{/key_id}", + "collaborators_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/collaborators{/collaborator}", + "teams_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/teams", + "hooks_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/hooks", + "issue_events_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/issues/events{/number}", + "events_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/events", + "assignees_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/assignees{/user}", + "branches_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/branches{/branch}", + "tags_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/tags", + "blobs_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/git/blobs{/sha}", + "git_tags_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/git/tags{/sha}", + "git_refs_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/git/refs{/sha}", + "trees_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/git/trees{/sha}", + "statuses_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/statuses/{sha}", + "languages_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/languages", + "stargazers_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/stargazers", + "contributors_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/contributors", + "subscribers_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/subscribers", + "subscription_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/subscription", + "commits_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/commits{/sha}", + "git_commits_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/git/commits{/sha}", + "comments_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/comments{/number}", + "issue_comment_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/issues/comments{/number}", + "contents_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/contents/{+path}", + "compare_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/compare/{base}...{head}", + "merges_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/merges", + "archive_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/{archive_format}{/ref}", + "downloads_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/downloads", + "issues_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/issues{/number}", + "pulls_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/pulls{/number}", + "milestones_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/milestones{/number}", + "notifications_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/notifications{?since,all,participating}", + "labels_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/labels{/name}", + "releases_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/releases{/id}", + "deployments_url": "https://api.github.com/repos/ArangoDB-Community/arangodb-graphql-java/deployments", + "created_at": "2019-08-22T10:38:53Z", + "updated_at": "2022-05-03T21:29:48Z", + "pushed_at": "2022-09-16T21:06:34Z", + "git_url": "git://github.com/ArangoDB-Community/arangodb-graphql-java.git", + "ssh_url": "git@github.com:ArangoDB-Community/arangodb-graphql-java.git", + "clone_url": "https://github.com/ArangoDB-Community/arangodb-graphql-java.git", + "svn_url": "https://github.com/ArangoDB-Community/arangodb-graphql-java", + "homepage": null, + "size": 91, + "stargazers_count": 5, + "watchers_count": 5, + "language": "Java", + "has_issues": true, + "has_projects": false, + "has_downloads": true, + "has_wiki": false, + "has_pages": false, + "forks_count": 1, + "mirror_url": null, + "archived": false, + "disabled": false, + "open_issues_count": 1, + "license": { + "key": "apache-2.0", + "name": "Apache License 2.0", + "spdx_id": "Apache-2.0", + "url": "https://api.github.com/licenses/apache-2.0", + "node_id": "MDc6TGljZW5zZTI=" + }, + "allow_forking": true, + "is_template": false, + "web_commit_signoff_required": false, + "topics": [ + + ], + "visibility": "public", + "forks": 1, + "open_issues": 1, + "watchers": 5, + "default_branch": "master", + "temp_clone_token": null, + "organization": { + "login": "ArangoDB-Community", + "id": 37540306, + "node_id": "MDEyOk9yZ2FuaXphdGlvbjM3NTQwMzA2", + "avatar_url": "https://avatars.githubusercontent.com/u/37540306?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/ArangoDB-Community", + "html_url": "https://github.com/ArangoDB-Community", + "followers_url": "https://api.github.com/users/ArangoDB-Community/followers", + "following_url": "https://api.github.com/users/ArangoDB-Community/following{/other_user}", + "gists_url": "https://api.github.com/users/ArangoDB-Community/gists{/gist_id}", + "starred_url": "https://api.github.com/users/ArangoDB-Community/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/ArangoDB-Community/subscriptions", + "organizations_url": "https://api.github.com/users/ArangoDB-Community/orgs", + "repos_url": "https://api.github.com/users/ArangoDB-Community/repos", + "events_url": "https://api.github.com/users/ArangoDB-Community/events{/privacy}", + "received_events_url": "https://api.github.com/users/ArangoDB-Community/received_events", + "type": "Organization", + "site_admin": false + }, + "network_count": 1, + "subscribers_count": 17 +} diff --git a/swh/lister/maven/tests/data/https_api.github.com/repos_webx_citrus b/swh/lister/maven/tests/data/https_api.github.com/repos_webx_citrus new file mode 100644 index 0000000..c731849 --- /dev/null +++ b/swh/lister/maven/tests/data/https_api.github.com/repos_webx_citrus @@ -0,0 +1,124 @@ +{ + "id": 2618757, + "node_id": "MDEwOlJlcG9zaXRvcnkyNjE4NzU3", + "name": "citrus", + "full_name": "webx/citrus", + "private": false, + "owner": { + "login": "webx", + "id": 1142574, + "node_id": "MDEyOk9yZ2FuaXphdGlvbjExNDI1NzQ=", + "avatar_url": "https://avatars.githubusercontent.com/u/1142574?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/webx", + "html_url": "https://github.com/webx", + "followers_url": "https://api.github.com/users/webx/followers", + "following_url": "https://api.github.com/users/webx/following{/other_user}", + "gists_url": "https://api.github.com/users/webx/gists{/gist_id}", + "starred_url": "https://api.github.com/users/webx/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/webx/subscriptions", + "organizations_url": "https://api.github.com/users/webx/orgs", + "repos_url": "https://api.github.com/users/webx/repos", + "events_url": "https://api.github.com/users/webx/events{/privacy}", + "received_events_url": "https://api.github.com/users/webx/received_events", + "type": "Organization", + "site_admin": false + }, + "html_url": "https://github.com/webx/citrus", + "description": "Java-based Web Framework for Alibaba Group", + "fork": false, + "url": "https://api.github.com/repos/webx/citrus", + "forks_url": "https://api.github.com/repos/webx/citrus/forks", + "keys_url": "https://api.github.com/repos/webx/citrus/keys{/key_id}", + "collaborators_url": "https://api.github.com/repos/webx/citrus/collaborators{/collaborator}", + "teams_url": "https://api.github.com/repos/webx/citrus/teams", + "hooks_url": "https://api.github.com/repos/webx/citrus/hooks", + "issue_events_url": "https://api.github.com/repos/webx/citrus/issues/events{/number}", + "events_url": "https://api.github.com/repos/webx/citrus/events", + "assignees_url": "https://api.github.com/repos/webx/citrus/assignees{/user}", + "branches_url": "https://api.github.com/repos/webx/citrus/branches{/branch}", + "tags_url": "https://api.github.com/repos/webx/citrus/tags", + "blobs_url": "https://api.github.com/repos/webx/citrus/git/blobs{/sha}", + "git_tags_url": "https://api.github.com/repos/webx/citrus/git/tags{/sha}", + "git_refs_url": "https://api.github.com/repos/webx/citrus/git/refs{/sha}", + "trees_url": "https://api.github.com/repos/webx/citrus/git/trees{/sha}", + "statuses_url": "https://api.github.com/repos/webx/citrus/statuses/{sha}", + "languages_url": "https://api.github.com/repos/webx/citrus/languages", + "stargazers_url": "https://api.github.com/repos/webx/citrus/stargazers", + "contributors_url": "https://api.github.com/repos/webx/citrus/contributors", + "subscribers_url": "https://api.github.com/repos/webx/citrus/subscribers", + "subscription_url": "https://api.github.com/repos/webx/citrus/subscription", + "commits_url": "https://api.github.com/repos/webx/citrus/commits{/sha}", + "git_commits_url": "https://api.github.com/repos/webx/citrus/git/commits{/sha}", + "comments_url": "https://api.github.com/repos/webx/citrus/comments{/number}", + "issue_comment_url": "https://api.github.com/repos/webx/citrus/issues/comments{/number}", + "contents_url": "https://api.github.com/repos/webx/citrus/contents/{+path}", + "compare_url": "https://api.github.com/repos/webx/citrus/compare/{base}...{head}", + "merges_url": "https://api.github.com/repos/webx/citrus/merges", + "archive_url": "https://api.github.com/repos/webx/citrus/{archive_format}{/ref}", + "downloads_url": "https://api.github.com/repos/webx/citrus/downloads", + "issues_url": "https://api.github.com/repos/webx/citrus/issues{/number}", + "pulls_url": "https://api.github.com/repos/webx/citrus/pulls{/number}", + "milestones_url": "https://api.github.com/repos/webx/citrus/milestones{/number}", + "notifications_url": "https://api.github.com/repos/webx/citrus/notifications{?since,all,participating}", + "labels_url": "https://api.github.com/repos/webx/citrus/labels{/name}", + "releases_url": "https://api.github.com/repos/webx/citrus/releases{/id}", + "deployments_url": "https://api.github.com/repos/webx/citrus/deployments", + "created_at": "2011-10-21T07:34:55Z", + "updated_at": "2022-10-12T13:24:54Z", + "pushed_at": "2022-10-10T02:41:17Z", + "git_url": "git://github.com/webx/citrus.git", + "ssh_url": "git@github.com:webx/citrus.git", + "clone_url": "https://github.com/webx/citrus.git", + "svn_url": "https://github.com/webx/citrus", + "homepage": "http://www.openwebx.org/", + "size": 7512, + "stargazers_count": 756, + "watchers_count": 756, + "language": "Java", + "has_issues": true, + "has_projects": true, + "has_downloads": true, + "has_wiki": true, + "has_pages": false, + "forks_count": 376, + "mirror_url": null, + "archived": false, + "disabled": false, + "open_issues_count": 32, + "license": null, + "allow_forking": true, + "is_template": false, + "web_commit_signoff_required": false, + "topics": [ + + ], + "visibility": "public", + "forks": 376, + "open_issues": 32, + "watchers": 756, + "default_branch": "master", + "temp_clone_token": null, + "organization": { + "login": "webx", + "id": 1142574, + "node_id": "MDEyOk9yZ2FuaXphdGlvbjExNDI1NzQ=", + "avatar_url": "https://avatars.githubusercontent.com/u/1142574?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/webx", + "html_url": "https://github.com/webx", + "followers_url": "https://api.github.com/users/webx/followers", + "following_url": "https://api.github.com/users/webx/following{/other_user}", + "gists_url": "https://api.github.com/users/webx/gists{/gist_id}", + "starred_url": "https://api.github.com/users/webx/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/webx/subscriptions", + "organizations_url": "https://api.github.com/users/webx/orgs", + "repos_url": "https://api.github.com/users/webx/repos", + "events_url": "https://api.github.com/users/webx/events{/privacy}", + "received_events_url": "https://api.github.com/users/webx/received_events", + "type": "Organization", + "site_admin": false + }, + "network_count": 376, + "subscribers_count": 136 +} diff --git a/swh/lister/maven/tests/test_lister.py b/swh/lister/maven/tests/test_lister.py index 9936a9b..9bacd4e 100644 --- a/swh/lister/maven/tests/test_lister.py +++ b/swh/lister/maven/tests/test_lister.py @@ -1,359 +1,348 @@ # Copyright (C) 2021-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from pathlib import Path import iso8601 import pytest import requests from swh.lister.maven.lister import MavenLister MVN_URL = "https://repo1.maven.org/maven2/" # main maven repo url INDEX_URL = "http://indexes/export.fld" # index directory url URL_POM_1 = MVN_URL + "al/aldi/sprova4j/0.1.0/sprova4j-0.1.0.pom" URL_POM_2 = MVN_URL + "al/aldi/sprova4j/0.1.1/sprova4j-0.1.1.pom" URL_POM_3 = MVN_URL + "com/arangodb/arangodb-graphql/1.2/arangodb-graphql-1.2.pom" USER_REPO0 = "aldialimucaj/sprova4j" GIT_REPO_URL0_HTTPS = f"https://github.com/{USER_REPO0}" GIT_REPO_URL0_API = f"https://api.github.com/repos/{USER_REPO0}" ORIGIN_GIT = GIT_REPO_URL0_HTTPS USER_REPO1 = "ArangoDB-Community/arangodb-graphql-java" GIT_REPO_URL1_HTTPS = f"https://github.com/{USER_REPO1}" GIT_REPO_URL1_GIT = f"git://github.com/{USER_REPO1}.git" GIT_REPO_URL1_API = f"https://api.github.com/repos/{USER_REPO1}" ORIGIN_GIT_INCR = GIT_REPO_URL1_HTTPS USER_REPO2 = "webx/citrus" GIT_REPO_URL2_HTTPS = f"https://github.com/{USER_REPO2}" GIT_REPO_URL2_API = f"https://api.github.com/repos/{USER_REPO2}" ORIGIN_SRC = MVN_URL + "al/aldi/sprova4j" LIST_SRC_DATA = ( { "type": "maven", "url": "https://repo1.maven.org/maven2/al/aldi/sprova4j" + "/0.1.0/sprova4j-0.1.0-sources.jar", "time": "2021-07-12T17:06:59+00:00", "gid": "al.aldi", "aid": "sprova4j", "version": "0.1.0", "base_url": MVN_URL, }, { "type": "maven", "url": "https://repo1.maven.org/maven2/al/aldi/sprova4j" + "/0.1.1/sprova4j-0.1.1-sources.jar", "time": "2021-07-12T17:37:05+00:00", "gid": "al.aldi", "aid": "sprova4j", "version": "0.1.1", "base_url": MVN_URL, }, ) @pytest.fixture def maven_index_full(datadir) -> bytes: return Path(datadir, "http_indexes", "export_full.fld").read_bytes() @pytest.fixture def maven_index_incr_first(datadir) -> bytes: return Path(datadir, "http_indexes", "export_incr_first.fld").read_bytes() @pytest.fixture def maven_index_null_mtime(datadir) -> bytes: return Path(datadir, "http_indexes", "export_null_mtime.fld").read_bytes() @pytest.fixture(autouse=True) def network_requests_mock(requests_mock, requests_mock_datadir, maven_index_full): - # If github api calls for the configured scm repository, returns its canonical url. - for url_api, url_html in [ - (GIT_REPO_URL0_API, GIT_REPO_URL0_HTTPS), - (GIT_REPO_URL1_API, GIT_REPO_URL1_HTTPS), - (GIT_REPO_URL2_API, GIT_REPO_URL2_HTTPS), - ]: - requests_mock.get( - url_api, - json={"html_url": url_html}, - ) - requests_mock.get(INDEX_URL, content=maven_index_full) @pytest.fixture(autouse=True) def retry_sleep_mock(mocker): mocker.patch.object(MavenLister.http_request.retry, "sleep") def test_maven_full_listing(swh_scheduler): """Covers full listing of multiple pages, checking page results and listed origins, statelessness.""" # Run the lister. lister = MavenLister( scheduler=swh_scheduler, url=MVN_URL, instance="maven.org", index_url=INDEX_URL, incremental=False, ) stats = lister.run() # Start test checks. assert stats.pages == 5 scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results origin_urls = [origin.url for origin in scheduler_origins] # 3 git origins + 1 maven origin with 2 releases (one per jar) assert set(origin_urls) == {ORIGIN_GIT, ORIGIN_GIT_INCR, ORIGIN_SRC} assert len(set(origin_urls)) == len(origin_urls) for origin in scheduler_origins: if origin.visit_type == "maven": for src in LIST_SRC_DATA: last_update_src = iso8601.parse_date(src["time"]) assert last_update_src <= origin.last_update assert origin.extra_loader_arguments["artifacts"] == list(LIST_SRC_DATA) scheduler_state = lister.get_state_from_scheduler() assert scheduler_state is not None assert scheduler_state.last_seen_doc == -1 assert scheduler_state.last_seen_pom == -1 def test_maven_full_listing_malformed( swh_scheduler, requests_mock, datadir, ): """Covers full listing of multiple pages, checking page results with a malformed scm entry in pom.""" lister = MavenLister( scheduler=swh_scheduler, url=MVN_URL, instance="maven.org", index_url=INDEX_URL, incremental=False, ) # Set up test. requests_mock.get( URL_POM_1, content=Path(datadir, "sprova4j-0.1.0.malformed.pom").read_bytes() ) # Then run the lister. stats = lister.run() # Start test checks. assert stats.pages == 5 scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results origin_urls = [origin.url for origin in scheduler_origins] # 2 git origins + 1 maven origin with 2 releases (one per jar) assert set(origin_urls) == {ORIGIN_GIT, ORIGIN_GIT_INCR, ORIGIN_SRC} assert len(origin_urls) == len(set(origin_urls)) for origin in scheduler_origins: if origin.visit_type == "maven": for src in LIST_SRC_DATA: last_update_src = iso8601.parse_date(src["time"]) assert last_update_src <= origin.last_update assert origin.extra_loader_arguments["artifacts"] == list(LIST_SRC_DATA) scheduler_state = lister.get_state_from_scheduler() assert scheduler_state is not None assert scheduler_state.last_seen_doc == -1 assert scheduler_state.last_seen_pom == -1 def test_maven_incremental_listing( swh_scheduler, requests_mock, maven_index_full, maven_index_incr_first, ): """Covers full listing of multiple pages, checking page results and listed origins, with a second updated run for statefulness.""" lister = MavenLister( scheduler=swh_scheduler, url=MVN_URL, instance="maven.org", index_url=INDEX_URL, incremental=True, ) # Set up test. requests_mock.get(INDEX_URL, content=maven_index_incr_first) # Then run the lister. stats = lister.run() # Start test checks. assert lister.incremental assert lister.updated assert stats.pages == 2 scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results origin_urls = [origin.url for origin in scheduler_origins] # 1 git origins + 1 maven origin with 1 release (one per jar) assert set(origin_urls) == {ORIGIN_GIT, ORIGIN_SRC} assert len(origin_urls) == len(set(origin_urls)) for origin in scheduler_origins: if origin.visit_type == "maven": last_update_src = iso8601.parse_date(LIST_SRC_DATA[0]["time"]) assert last_update_src == origin.last_update assert origin.extra_loader_arguments["artifacts"] == [LIST_SRC_DATA[0]] # Second execution of the lister, incremental mode lister = MavenLister( scheduler=swh_scheduler, url=MVN_URL, instance="maven.org", index_url=INDEX_URL, incremental=True, ) scheduler_state = lister.get_state_from_scheduler() assert scheduler_state is not None assert scheduler_state.last_seen_doc == 1 assert scheduler_state.last_seen_pom == 1 # Set up test. requests_mock.get(INDEX_URL, content=maven_index_full) # Then run the lister. stats = lister.run() # Start test checks. assert lister.incremental assert lister.updated assert stats.pages == 4 scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results origin_urls = [origin.url for origin in scheduler_origins] assert set(origin_urls) == {ORIGIN_SRC, ORIGIN_GIT, ORIGIN_GIT_INCR} assert len(origin_urls) == len(set(origin_urls)) for origin in scheduler_origins: if origin.visit_type == "maven": for src in LIST_SRC_DATA: last_update_src = iso8601.parse_date(src["time"]) assert last_update_src <= origin.last_update assert origin.extra_loader_arguments["artifacts"] == list(LIST_SRC_DATA) scheduler_state = lister.get_state_from_scheduler() assert scheduler_state is not None assert scheduler_state.last_seen_doc == 4 assert scheduler_state.last_seen_pom == 4 @pytest.mark.parametrize("http_code", [400, 404, 500, 502]) def test_maven_list_http_error_on_index_read(swh_scheduler, requests_mock, http_code): """should stop listing if the lister fails to retrieve the main index url.""" lister = MavenLister(scheduler=swh_scheduler, url=MVN_URL, index_url=INDEX_URL) requests_mock.get(INDEX_URL, status_code=http_code) with pytest.raises(requests.HTTPError): # listing cannot continues so stop lister.run() scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results assert len(scheduler_origins) == 0 @pytest.mark.parametrize("http_code", [400, 404, 500, 502]) def test_maven_list_http_error_artifacts( swh_scheduler, requests_mock, http_code, ): """should continue listing when failing to retrieve artifacts.""" # Test failure of artefacts retrieval. requests_mock.get(URL_POM_1, status_code=http_code) lister = MavenLister(scheduler=swh_scheduler, url=MVN_URL, index_url=INDEX_URL) # on artifacts though, that raises but continue listing lister.run() # If the maven_index_full step succeeded but not the get_pom step, # then we get only one maven-jar origin and one git origin. scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results origin_urls = [origin.url for origin in scheduler_origins] assert set(origin_urls) == {ORIGIN_SRC, ORIGIN_GIT_INCR} assert len(origin_urls) == len(set(origin_urls)) def test_maven_lister_null_mtime(swh_scheduler, requests_mock, maven_index_null_mtime): requests_mock.get(INDEX_URL, content=maven_index_null_mtime) # Run the lister. lister = MavenLister( scheduler=swh_scheduler, url=MVN_URL, instance="maven.org", index_url=INDEX_URL, incremental=False, ) stats = lister.run() # Start test checks. assert stats.pages == 1 scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results assert len(scheduler_origins) == 1 assert scheduler_origins[0].last_update is None def test_maven_list_pom_bad_encoding(swh_scheduler, requests_mock): """should continue listing when failing to decode pom file.""" # Test failure of pom parsing by reencoding a UTF-8 pom file to a not expected one requests_mock.get( URL_POM_1, content=requests.get(URL_POM_1).content.decode("utf-8").encode("utf-32"), ) lister = MavenLister(scheduler=swh_scheduler, url=MVN_URL, index_url=INDEX_URL) lister.run() # If the maven_index_full step succeeded but not the pom parsing step, # then we get only one maven-jar origin and one git origin. scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results assert len(scheduler_origins) == 2 def test_maven_list_pom_multi_byte_encoding(swh_scheduler, requests_mock, datadir): """should parse POM file with multi-byte encoding.""" # replace pom file with a multi-byte encoding one requests_mock.get( URL_POM_1, content=Path(datadir, "citrus-parent-3.0.7.pom").read_bytes() ) lister = MavenLister(scheduler=swh_scheduler, url=MVN_URL, index_url=INDEX_URL) lister.run() scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results assert len(scheduler_origins) == 3