diff --git a/swh/lister/pubdev/lister.py b/swh/lister/pubdev/lister.py --- a/swh/lister/pubdev/lister.py +++ b/swh/lister/pubdev/lister.py @@ -14,9 +14,15 @@ from swh.scheduler.interface import SchedulerInterface from swh.scheduler.model import ListedOrigin -from .. import USER_AGENT +from .. import __version__ from ..pattern import CredentialsType, StatelessLister +# https://github.com/dart-lang/pub/blob/master/doc/repository-spec-v2.md#metadata-headers +USER_AGENT = ( + f"Software Heritage PubDev Lister v{__version__} " + "(+https://www.softwareheritage.org/contact)" +) + logger = logging.getLogger(__name__) # Aliasing the page results returned by `get_pages` method from the lister. diff --git a/swh/lister/pubdev/tests/test_lister.py b/swh/lister/pubdev/tests/test_lister.py --- a/swh/lister/pubdev/tests/test_lister.py +++ b/swh/lister/pubdev/tests/test_lister.py @@ -3,7 +3,7 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from swh.lister.pubdev.lister import PubDevLister +from swh.lister.pubdev.lister import USER_AGENT, PubDevLister expected_origins = { "https://pub.dev/packages/Autolinker", @@ -28,11 +28,19 @@ assert origin.last_update is not None +def _match_request(request): + return request.headers.get("User-Agent") == USER_AGENT + + def test_pubdev_lister_skip_package( datadir, requests_mock_datadir, swh_scheduler, requests_mock ): - requests_mock.get("https://pub.dev/api/packages/Autolinker", status_code=404) + requests_mock.get( + "https://pub.dev/api/packages/Autolinker", + status_code=404, + additional_matcher=_match_request, + ) lister = PubDevLister(scheduler=swh_scheduler) res = lister.run()