diff --git a/swh/lister/pubdev/lister.py b/swh/lister/pubdev/lister.py --- a/swh/lister/pubdev/lister.py +++ b/swh/lister/pubdev/lister.py @@ -5,7 +5,9 @@ import logging from typing import Any, Dict, Iterator, List, Optional +import iso8601 import requests +from requests.exceptions import HTTPError from tenacity.before_sleep import before_sleep_log from swh.lister.utils import throttling_retry @@ -90,6 +92,22 @@ assert self.lister_obj.id is not None for pkgname in page: + package_info_url = self.PACKAGE_INFO_URL_PATTERN.format( + base_url=self.url, pkgname=pkgname + ) + try: + response = self.page_request(url=package_info_url, params={}) + except HTTPError: + logger.warning( + "Failed to fetch metadata for package %s, skipping it from listing.", + pkgname, + ) + continue + package_metadata = response.json() + package_versions = package_metadata["versions"] + last_published = max( + package_version["published"] for package_version in package_versions + ) origin_url = self.ORIGIN_URL_PATTERN.format( base_url=self.url, pkgname=pkgname ) @@ -97,5 +115,5 @@ lister_id=self.lister_obj.id, visit_type=self.VISIT_TYPE, url=origin_url, - last_update=None, + last_update=iso8601.parse_date(last_published), ) diff --git a/swh/lister/pubdev/tests/data/https_pub.dev/api_packages_Autolinker b/swh/lister/pubdev/tests/data/https_pub.dev/api_packages_Autolinker new file mode 100644 --- /dev/null +++ b/swh/lister/pubdev/tests/data/https_pub.dev/api_packages_Autolinker @@ -0,0 +1,44 @@ +{ + "name": "Autolinker", + "latest": { + "version": "0.1.1", + "pubspec": { + "version": "0.1.1", + "homepage": "https://github.com/hackcave", + "description": "Port of Autolinker.js to dart", + "name": "Autolinker", + "author": "hackcave " + }, + "archive_url": "https://pub.dartlang.org/packages/Autolinker/versions/0.1.1.tar.gz", + "archive_sha256": "0a5209a2d5a292a26fc65d7edb430163f209a7c7c24ba4f301676f1afd79fa3f", + "published": "2014-12-24T22:34:02.534090Z" + }, + "versions": [ + { + "version": "0.1.0", + "pubspec": { + "version": "0.1.0", + "homepage": "https://github.com/hackcave", + "description": "Port of Autolinker.js to dart", + "name": "Autolinker", + "author": "hackcave " + }, + "archive_url": "https://pub.dartlang.org/packages/Autolinker/versions/0.1.0.tar.gz", + "archive_sha256": "717b30e27311c775293d4795ce33d15cedb5e5d21fa140f2cb46b30f3e969041", + "published": "2014-12-24T21:16:03.118270Z" + }, + { + "version": "0.1.1", + "pubspec": { + "version": "0.1.1", + "homepage": "https://github.com/hackcave", + "description": "Port of Autolinker.js to dart", + "name": "Autolinker", + "author": "hackcave " + }, + "archive_url": "https://pub.dartlang.org/packages/Autolinker/versions/0.1.1.tar.gz", + "archive_sha256": "0a5209a2d5a292a26fc65d7edb430163f209a7c7c24ba4f301676f1afd79fa3f", + "published": "2014-12-24T22:34:02.534090Z" + } + ] +} \ No newline at end of file diff --git a/swh/lister/pubdev/tests/data/https_pub.dev/api_packages_Babylon b/swh/lister/pubdev/tests/data/https_pub.dev/api_packages_Babylon new file mode 100644 --- /dev/null +++ b/swh/lister/pubdev/tests/data/https_pub.dev/api_packages_Babylon @@ -0,0 +1,51 @@ +{ + "name": "Babylon", + "latest": { + "version": "0.0.3", + "pubspec": { + "version": "0.0.3", + "name": "Babylon", + "dependencies": { + "js": ">=0.6.0", + "browser": ">=0.10.0+2" + }, + "author": "Cedric Krause ", + "description": "A starting point for Dart libraries or applications.", + "homepage": "https://www.cedware.com", + "environment": { + "sdk": ">=1.0.0 <2.0.0" + }, + "dev_dependencies": { + "test": ">=0.12.0 <0.13.0" + } + }, + "archive_url": "https://pub.dartlang.org/packages/Babylon/versions/0.0.3.tar.gz", + "archive_sha256": "a18166c8082d795f22c38270b7fed0c306d5cb59fe390ce3a34c300770c4a8b3", + "published": "2016-06-01T19:15:38.052Z" + }, + "versions": [ + { + "version": "0.0.3", + "pubspec": { + "version": "0.0.3", + "name": "Babylon", + "dependencies": { + "js": ">=0.6.0", + "browser": ">=0.10.0+2" + }, + "author": "Cedric Krause ", + "description": "A starting point for Dart libraries or applications.", + "homepage": "https://www.cedware.com", + "environment": { + "sdk": ">=1.0.0 <2.0.0" + }, + "dev_dependencies": { + "test": ">=0.12.0 <0.13.0" + } + }, + "archive_url": "https://pub.dartlang.org/packages/Babylon/versions/0.0.3.tar.gz", + "archive_sha256": "a18166c8082d795f22c38270b7fed0c306d5cb59fe390ce3a34c300770c4a8b3", + "published": "2016-06-01T19:15:38.052Z" + } + ] +} \ No newline at end of file diff --git a/swh/lister/pubdev/tests/test_lister.py b/swh/lister/pubdev/tests/test_lister.py --- a/swh/lister/pubdev/tests/test_lister.py +++ b/swh/lister/pubdev/tests/test_lister.py @@ -25,3 +25,17 @@ for origin in scheduler_origins: assert origin.visit_type == "pubdev" assert origin.url in expected_origins + assert origin.last_update is not None + + +def test_pubdev_lister_skip_package( + datadir, requests_mock_datadir, swh_scheduler, requests_mock +): + + requests_mock.get("https://pub.dev/api/packages/Autolinker", status_code=404) + + lister = PubDevLister(scheduler=swh_scheduler) + res = lister.run() + + assert res.pages == 1 + assert res.origins == 1