Page MenuHomeSoftware Heritage

D2127.diff
No OneTemporary

D2127.diff

diff --git a/swh/lister/packagist/lister.py b/swh/lister/packagist/lister.py
--- a/swh/lister/packagist/lister.py
+++ b/swh/lister/packagist/lister.py
@@ -1,15 +1,30 @@
-# Copyright (C) 2019 the Software Heritage developers
+# Copyright (C) 2019 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-import random
import json
-from .models import PackagistModel
+import logging
+import random
+
+from typing import Any, List, Mapping
from swh.scheduler import utils
from swh.lister.core.simple_lister import SimpleLister
from swh.lister.core.lister_transports import ListerOnePageApiTransport
+from .models import PackagistModel
+
+
+logger = logging.getLogger(__name__)
+
+
+def compute_package_url(repo_name: str) -> str:
+ """Compute packgist package url from repo name.
+
+ """
+ return 'https://repo.packagist.org/p/%s.json' % repo_name
+
class PackagistLister(ListerOnePageApiTransport, SimpleLister):
"""List packages available in the Packagist package manager.
@@ -44,31 +59,35 @@
ListerOnePageApiTransport .__init__(self)
SimpleLister.__init__(self, override_config=override_config)
- def task_dict(self, origin_type, origin_url, **kwargs):
+ def task_dict(self, origin_type: str, origin_url: str,
+ **kwargs: Mapping[str, str]) -> Mapping[str, str]:
"""Return task format dict
This is overridden from the lister_base as more information is
needed for the ingestion task creation.
"""
- return utils.create_task_dict('load-%s' % origin_type,
- kwargs.get('policy', 'recurring'),
- kwargs.get('name'), origin_url)
+ return utils.create_task_dict(
+ 'load-%s' % origin_type,
+ kwargs.get('policy', 'recurring'),
+ kwargs.get('name'), origin_url,
+ retries_left=3)
- def list_packages(self, response):
+ def list_packages(self, response: Any) -> List[str]:
"""List the actual packagist origins from the response.
"""
response = json.loads(response.text)
packages = [name for name in response['packageNames']]
+ logger.debug('Number of packages: %s', len(packages))
random.shuffle(packages)
return packages
- def get_model_from_repo(self, repo_name):
+ def get_model_from_repo(self, repo_name: str) -> Mapping[str, str]:
"""Transform from repository representation to model
"""
- url = 'https://repo.packagist.org/p/%s.json' % repo_name
+ url = compute_package_url(repo_name)
return {
'uid': repo_name,
'name': repo_name,
diff --git a/swh/lister/packagist/tests/conftest.py b/swh/lister/packagist/tests/conftest.py
--- a/swh/lister/packagist/tests/conftest.py
+++ b/swh/lister/packagist/tests/conftest.py
@@ -1 +1,23 @@
+# Copyright (C) 2019 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import pytest
+
from swh.lister.core.tests.conftest import * # noqa
+
+
+@pytest.fixture
+def lister_packagist(swh_listers):
+ lister = swh_listers['packagist']
+
+ # Amend the scheduler with an unknown yet load-packagist task type
+ lister.scheduler.create_task_type({
+ 'type': 'load-packagist',
+ 'description': 'Load packagist origin',
+ 'backend_name': 'swh.loader.package.tasks.LoaderPackagist',
+ 'default_interval': '1 day',
+ })
+
+ return lister
diff --git a/swh/lister/packagist/tests/api_response.json b/swh/lister/packagist/tests/data/packagist.org/packages_list.json
rename from swh/lister/packagist/tests/api_response.json
rename to swh/lister/packagist/tests/data/packagist.org/packages_list.json
diff --git a/swh/lister/packagist/tests/test_lister.py b/swh/lister/packagist/tests/test_lister.py
--- a/swh/lister/packagist/tests/test_lister.py
+++ b/swh/lister/packagist/tests/test_lister.py
@@ -1,11 +1,14 @@
-# Copyright (C) 2019 the Software Heritage developers
+# Copyright (C) 2019 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import unittest
import requests_mock
+
from unittest.mock import patch
-from swh.lister.packagist.lister import PackagistLister
+
+from swh.lister.packagist.lister import PackagistLister, compute_package_url
from swh.lister.core.tests.test_lister import HttpSimpleListerTester
@@ -29,7 +32,7 @@
Lister = PackagistLister
PAGE = 'https://packagist.org/packages/list.json'
lister_subdir = 'packagist'
- good_api_response_file = 'api_response.json'
+ good_api_response_file = 'data/packagist.org/packages_list.json'
entries = 5
@requests_mock.Mocker()
@@ -54,13 +57,46 @@
for key, values in model[0].items():
assert values == expected_model[key]
- def test_task_dict(self):
+ @patch('swh.lister.packagist.lister.utils.create_task_dict')
+ def test_task_dict(self, mock_create_tasks):
"""Test the task creation of lister
"""
fl = self.get_fl()
- with patch('swh.lister.packagist.lister.utils.create_task_dict') as mock_create_tasks: # noqa
- fl.task_dict(origin_type='packagist', origin_url='https://abc',
- name='test_pack')
+ fl.task_dict(origin_type='packagist', origin_url='https://abc',
+ name='test_pack')
mock_create_tasks.assert_called_once_with(
- 'load-packagist', 'recurring', 'test_pack', 'https://abc')
+ 'load-packagist', 'recurring', 'test_pack', 'https://abc',
+ retries_left=3)
+
+
+def test_compute_package_url():
+ expected_url = 'https://repo.packagist.org/p/hello.json'
+ actual_url = compute_package_url('hello')
+ assert actual_url == expected_url
+
+
+def test_packagist_lister(lister_packagist, requests_mock_datadir):
+ lister_packagist.run()
+
+ r = lister_packagist.scheduler.search_tasks(task_type='load-packagist')
+ assert len(r) == 5
+
+ for row in r:
+ assert row['type'] == 'load-packagist'
+ # arguments check
+ args = row['arguments']['args']
+ assert len(args) == 2
+
+ package = args[0]
+ url = args[1]
+
+ expected_url = compute_package_url(package)
+ assert url == expected_url
+
+ # kwargs
+ kwargs = row['arguments']['kwargs']
+ assert kwargs == {}
+
+ assert row['policy'] == 'recurring'
+ assert row['priority'] is None

File Metadata

Mime Type
text/plain
Expires
Nov 5 2024, 8:07 AM (8 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3216275

Event Timeline