diff --git a/swh/lister/__init__.py b/swh/lister/__init__.py --- a/swh/lister/__init__.py +++ b/swh/lister/__init__.py @@ -15,6 +15,9 @@ except pkg_resources.DistributionNotFound: __version__ = 'devel' +USER_AGENT_TEMPLATE = 'Software Heritage Lister (%s)' +USER_AGENT = USER_AGENT_TEMPLATE % __version__ + LISTERS = {entry_point.name.split('.', 1)[1]: entry_point for entry_point in pkg_resources.iter_entry_points('swh.workers') diff --git a/swh/lister/cgit/lister.py b/swh/lister/cgit/lister.py --- a/swh/lister/cgit/lister.py +++ b/swh/lister/cgit/lister.py @@ -13,6 +13,7 @@ from .models import CGitModel from swh.core.utils import grouper +from swh.lister import USER_AGENT from swh.lister.core.lister_base import ListerBase @@ -72,6 +73,9 @@ self.instance = instance self.session = Session() self.session.mount(self.url, HTTPAdapter(max_retries=3)) + self.session.headers = { + 'User-Agent': USER_AGENT, + } def run(self): total = 0 diff --git a/swh/lister/cgit/tests/test_lister.py b/swh/lister/cgit/tests/test_lister.py --- a/swh/lister/cgit/tests/test_lister.py +++ b/swh/lister/cgit/tests/test_lister.py @@ -3,6 +3,9 @@ # See top-level LICENSE file for more information +from swh.lister import __version__ + + def test_lister_no_page(requests_mock_datadir, swh_listers): lister = swh_listers['cgit'] @@ -64,3 +67,16 @@ assert kwargs == {} assert row['policy'] == 'recurring' assert row['priority'] is None + + +def test_lister_requests(requests_mock_datadir, swh_listers): + lister = swh_listers['cgit'] + lister.url = 'https://git.tizen/cgit/' + lister.run() + + assert len(requests_mock_datadir.request_history) != 0 + for request in requests_mock_datadir.request_history: + assert 'User-Agent' in request.headers + user_agent = request.headers['User-Agent'] + assert 'Software Heritage Lister' in user_agent + assert __version__ in user_agent diff --git a/swh/lister/core/lister_transports.py b/swh/lister/core/lister_transports.py --- a/swh/lister/core/lister_transports.py +++ b/swh/lister/core/lister_transports.py @@ -14,7 +14,7 @@ from typing import Optional, Union -from swh.lister import __version__ +from swh.lister import USER_AGENT_TEMPLATE, __version__ from .abstractattribute import AbstractAttribute from .lister_base import FetchError @@ -45,7 +45,7 @@ MAY BE OVERRIDDEN if request headers are needed. """ return { - 'User-Agent': 'Software Heritage lister (%s)' % self.lister_version + 'User-Agent': USER_AGENT_TEMPLATE % self.lister_version } def request_instance_credentials(self): diff --git a/swh/lister/core/tests/test_lister.py b/swh/lister/core/tests/test_lister.py --- a/swh/lister/core/tests/test_lister.py +++ b/swh/lister/core/tests/test_lister.py @@ -326,6 +326,17 @@ self.get_api_response(self.first_index) self.assertEqual(sleepmock.call_count, 2) + @requests_mock.Mocker() + def test_request_headers(self, http_mocker): + fl = self.create_fl_with_db(http_mocker) + fl.run() + self.assertNotEqual(len(http_mocker.request_history), 0) + for request in http_mocker.request_history: + assert 'User-Agent' in request.headers + user_agent = request.headers['User-Agent'] + assert 'Software Heritage Lister' in user_agent + assert swh.lister.__version__ in user_agent + def scheduled_tasks_test(self, next_api_response_file, next_last_index, http_mocker): """Check that no loading tasks get disabled when processing a new diff --git a/swh/lister/github/lister.py b/swh/lister/github/lister.py --- a/swh/lister/github/lister.py +++ b/swh/lister/github/lister.py @@ -54,7 +54,12 @@ return [self.get_model_from_repo(repo) for repo in repos] def request_headers(self): - return {'Accept': 'application/vnd.github.v3+json'} + """(Override) Set requests headers to send when querying the GitHub API + + """ + headers = super().request_headers() + headers['Accept'] = 'application/vnd.github.v3+json' + return headers def disable_deleted_repo_tasks(self, index, next_index, keep_these): """ (Overrides) Fix provided index value to avoid erroneously disabling diff --git a/swh/lister/npm/lister.py b/swh/lister/npm/lister.py --- a/swh/lister/npm/lister.py +++ b/swh/lister/npm/lister.py @@ -69,8 +69,9 @@ registry. """ - return {'User-Agent': 'Software Heritage npm lister', - 'Accept': 'application/json'} + headers = super().request_headers() + headers['Accept'] = 'application/json' + return headers def _compute_urls(self, repo_name): """Return a tuple (package_url, package_metadata_url) diff --git a/swh/lister/npm/tests/test_tasks.py b/swh/lister/npm/tests/test_tasks.py --- a/swh/lister/npm/tests/test_tasks.py +++ b/swh/lister/npm/tests/test_tasks.py @@ -40,7 +40,6 @@ # setup the mocked NpmLister lister.return_value = lister lister.run.return_value = None - lister.request_headers.return_value = [] seq.return_value = 42 save.side_effect = mock_save diff --git a/swh/lister/phabricator/lister.py b/swh/lister/phabricator/lister.py --- a/swh/lister/phabricator/lister.py +++ b/swh/lister/phabricator/lister.py @@ -57,8 +57,9 @@ (Override) Set requests headers to send when querying the Phabricator API """ - return {'User-Agent': 'Software Heritage phabricator lister', - 'Accept': 'application/json'} + headers = super().request_headers() + headers['Accept'] = 'application/json' + return headers def get_model_from_repo(self, repo): url = get_repo_url(repo['attachments']['uris']['uris'])