diff --git a/swh/lister/pubdev/lister.py b/swh/lister/pubdev/lister.py --- a/swh/lister/pubdev/lister.py +++ b/swh/lister/pubdev/lister.py @@ -5,7 +5,9 @@ import logging from typing import Any, Dict, Iterator, List, Optional +import iso8601 import requests +from requests.exceptions import HTTPError from tenacity.before_sleep import before_sleep_log from swh.lister.utils import throttling_retry @@ -31,6 +33,7 @@ BASE_URL = "https://pub.dev/api/" PACKAGE_NAMES_URL_PATTERN = "{base_url}package-names" PACKAGE_INFO_URL_PATTERN = "{base_url}packages/{pkgname}" + ORIGIN_URL_PATTERN = "https://pub.dev/packages/{pkgname}" def __init__( self, @@ -89,12 +92,27 @@ assert self.lister_obj.id is not None for pkgname in page: - url = self.PACKAGE_INFO_URL_PATTERN.format( + info_url = self.PACKAGE_INFO_URL_PATTERN.format( base_url=self.url, pkgname=pkgname ) + try: + response = self.page_request(url=info_url, params={}) + except HTTPError: + logger.warning( + "Failed to fetch metadata for package %s, skipping it from listing.", + pkgname, + ) + continue + package_metadata = response.json() + package_versions = package_metadata["versions"] + last_published = max( + package_version["published"] for package_version in package_versions + ) + origin_url = self.ORIGIN_URL_PATTERN.format(pkgname=pkgname) yield ListedOrigin( lister_id=self.lister_obj.id, visit_type=self.VISIT_TYPE, - url=url, - last_update=None, + url=origin_url, + last_update=iso8601.parse_date(last_published), + extra_loader_arguments={"package_versions": package_metadata}, ) diff --git a/swh/lister/pubdev/tests/data/https_pub.dev/api_package-names b/swh/lister/pubdev/tests/data/https_pub.dev/api_package-names --- a/swh/lister/pubdev/tests/data/https_pub.dev/api_package-names +++ b/swh/lister/pubdev/tests/data/https_pub.dev/api_package-names @@ -1 +1,7 @@ -{"packages":["Autolinker","pdf"],"nextUrl":null} +{ + "packages": [ + "Autolinker", + "Babylon" + ], + "nextUrl": null +} \ No newline at end of file diff --git a/swh/lister/pubdev/tests/data/https_pub.dev/api_packages_Autolinker b/swh/lister/pubdev/tests/data/https_pub.dev/api_packages_Autolinker new file mode 100644 --- /dev/null +++ b/swh/lister/pubdev/tests/data/https_pub.dev/api_packages_Autolinker @@ -0,0 +1,44 @@ +{ + "name": "Autolinker", + "latest": { + "version": "0.1.1", + "pubspec": { + "version": "0.1.1", + "homepage": "https://github.com/hackcave", + "description": "Port of Autolinker.js to dart", + "name": "Autolinker", + "author": "hackcave " + }, + "archive_url": "https://pub.dartlang.org/packages/Autolinker/versions/0.1.1.tar.gz", + "archive_sha256": "0a5209a2d5a292a26fc65d7edb430163f209a7c7c24ba4f301676f1afd79fa3f", + "published": "2014-12-24T22:34:02.534090Z" + }, + "versions": [ + { + "version": "0.1.0", + "pubspec": { + "version": "0.1.0", + "homepage": "https://github.com/hackcave", + "description": "Port of Autolinker.js to dart", + "name": "Autolinker", + "author": "hackcave " + }, + "archive_url": "https://pub.dartlang.org/packages/Autolinker/versions/0.1.0.tar.gz", + "archive_sha256": "717b30e27311c775293d4795ce33d15cedb5e5d21fa140f2cb46b30f3e969041", + "published": "2014-12-24T21:16:03.118270Z" + }, + { + "version": "0.1.1", + "pubspec": { + "version": "0.1.1", + "homepage": "https://github.com/hackcave", + "description": "Port of Autolinker.js to dart", + "name": "Autolinker", + "author": "hackcave " + }, + "archive_url": "https://pub.dartlang.org/packages/Autolinker/versions/0.1.1.tar.gz", + "archive_sha256": "0a5209a2d5a292a26fc65d7edb430163f209a7c7c24ba4f301676f1afd79fa3f", + "published": "2014-12-24T22:34:02.534090Z" + } + ] +} \ No newline at end of file diff --git a/swh/lister/pubdev/tests/data/https_pub.dev/api_packages_Babylon b/swh/lister/pubdev/tests/data/https_pub.dev/api_packages_Babylon new file mode 100644 --- /dev/null +++ b/swh/lister/pubdev/tests/data/https_pub.dev/api_packages_Babylon @@ -0,0 +1,51 @@ +{ + "name": "Babylon", + "latest": { + "version": "0.0.3", + "pubspec": { + "version": "0.0.3", + "name": "Babylon", + "dependencies": { + "js": ">=0.6.0", + "browser": ">=0.10.0+2" + }, + "author": "Cedric Krause ", + "description": "A starting point for Dart libraries or applications.", + "homepage": "https://www.cedware.com", + "environment": { + "sdk": ">=1.0.0 <2.0.0" + }, + "dev_dependencies": { + "test": ">=0.12.0 <0.13.0" + } + }, + "archive_url": "https://pub.dartlang.org/packages/Babylon/versions/0.0.3.tar.gz", + "archive_sha256": "a18166c8082d795f22c38270b7fed0c306d5cb59fe390ce3a34c300770c4a8b3", + "published": "2016-06-01T19:15:38.052Z" + }, + "versions": [ + { + "version": "0.0.3", + "pubspec": { + "version": "0.0.3", + "name": "Babylon", + "dependencies": { + "js": ">=0.6.0", + "browser": ">=0.10.0+2" + }, + "author": "Cedric Krause ", + "description": "A starting point for Dart libraries or applications.", + "homepage": "https://www.cedware.com", + "environment": { + "sdk": ">=1.0.0 <2.0.0" + }, + "dev_dependencies": { + "test": ">=0.12.0 <0.13.0" + } + }, + "archive_url": "https://pub.dartlang.org/packages/Babylon/versions/0.0.3.tar.gz", + "archive_sha256": "a18166c8082d795f22c38270b7fed0c306d5cb59fe390ce3a34c300770c4a8b3", + "published": "2016-06-01T19:15:38.052Z" + } + ] +} \ No newline at end of file diff --git a/swh/lister/pubdev/tests/test_lister.py b/swh/lister/pubdev/tests/test_lister.py --- a/swh/lister/pubdev/tests/test_lister.py +++ b/swh/lister/pubdev/tests/test_lister.py @@ -2,16 +2,13 @@ # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information + from swh.lister.pubdev.lister import PubDevLister -expected_origins = [ - { - "url": "https://pub.dev/api/packages/Autolinker", - }, - { - "url": "https://pub.dev/api/packages/pdf", - }, -] +expected_origins = { + "https://pub.dev/packages/Autolinker", + "https://pub.dev/packages/Babylon", +} def test_pubdev_lister(datadir, requests_mock_datadir, swh_scheduler): @@ -19,22 +16,31 @@ res = lister.run() assert res.pages == 1 - assert res.origins == 1 + 1 + assert res.origins == 2 scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results assert len(scheduler_origins) == len(expected_origins) - assert { - ( - scheduled.visit_type, - scheduled.url, - ) - for scheduled in scheduler_origins - } == { - ( - "pubdev", - expected["url"], + for origin in scheduler_origins: + assert origin.visit_type == "pubdev" + assert origin.url in expected_origins + assert origin.last_update is not None + assert origin.extra_loader_arguments + assert "package_versions" in origin.extra_loader_arguments + assert origin.url.endswith( + origin.extra_loader_arguments["package_versions"]["name"] ) - for expected in expected_origins - } + + +def test_pubdev_lister_skip_package( + datadir, requests_mock_datadir, swh_scheduler, requests_mock +): + + requests_mock.get("https://pub.dev/api/packages/Autolinker", status_code=404) + + lister = PubDevLister(scheduler=swh_scheduler) + res = lister.run() + + assert res.pages == 1 + assert res.origins == 1