Page MenuHomeSoftware Heritage

No OneTemporary

diff --git a/swh/lister/pypi/lister.py b/swh/lister/pypi/lister.py
index 4da6e55..99d8013 100644
--- a/swh/lister/pypi/lister.py
+++ b/swh/lister/pypi/lister.py
@@ -1,70 +1,71 @@
-# Copyright (C) 2018-2019 the Software Heritage developers
+# Copyright (C) 2018-2019 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import random
import xmltodict
from .models import PyPIModel
from swh.scheduler import utils
from swh.lister.core.simple_lister import SimpleLister
from swh.lister.core.lister_transports import ListerOnePageApiTransport
class PyPILister(ListerOnePageApiTransport, SimpleLister):
MODEL = PyPIModel
LISTER_NAME = 'pypi'
PAGE = 'https://pypi.org/simple/'
instance = 'pypi' # As of today only the main pypi.org is used
def __init__(self, override_config=None):
ListerOnePageApiTransport .__init__(self)
SimpleLister.__init__(self, override_config=override_config)
def task_dict(self, origin_type, origin_url, **kwargs):
"""(Override) Return task format dict
This is overridden from the lister_base as more information is
needed for the ingestion task creation.
"""
_type = 'load-%s' % origin_type
_policy = kwargs.get('policy', 'recurring')
project_name = kwargs.get('name')
project_metadata_url = kwargs.get('html_url')
return utils.create_task_dict(
_type, _policy, project_name, origin_url,
project_metadata_url=project_metadata_url)
def list_packages(self, response):
"""(Override) List the actual pypi origins from the response.
"""
result = xmltodict.parse(response.content)
_packages = [p['#text'] for p in result['html']['body']['a']]
random.shuffle(_packages)
return _packages
def _compute_urls(self, repo_name):
"""Returns a tuple (project_url, project_metadata_url)
"""
return (
'https://pypi.org/project/%s/' % repo_name,
'https://pypi.org/pypi/%s/json' % repo_name
)
def get_model_from_repo(self, repo_name):
"""(Override) Transform from repository representation to model
"""
project_url, project_url_meta = self._compute_urls(repo_name)
return {
'uid': repo_name,
'name': repo_name,
'full_name': repo_name,
'html_url': project_url_meta,
'origin_url': project_url,
'origin_type': 'pypi',
}
diff --git a/swh/lister/pypi/tests/api_response.html b/swh/lister/pypi/tests/data/pypi.org/simple
similarity index 100%
rename from swh/lister/pypi/tests/api_response.html
rename to swh/lister/pypi/tests/data/pypi.org/simple
diff --git a/swh/lister/pypi/tests/test_lister.py b/swh/lister/pypi/tests/test_lister.py
index bda21a2..20566db 100644
--- a/swh/lister/pypi/tests/test_lister.py
+++ b/swh/lister/pypi/tests/test_lister.py
@@ -1,64 +1,31 @@
-# Copyright (C) 2019 the Software Heritage developers
+# Copyright (C) 2019 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-import requests_mock
-import unittest
-from unittest.mock import patch
-from swh.lister.pypi.lister import PyPILister
-from swh.lister.core.tests.test_lister import HttpSimpleListerTester
-lister = PyPILister()
+def test_pypi_lister_(swh_listers, requests_mock_datadir):
+ lister = swh_listers['pypi']
-expected_packages = ['0lever-so', '0lever-utils', '0-orchestrator', '0wned']
+ lister.run()
-expected_model = {
- 'uid': 'arrow',
- 'name': 'arrow',
- 'full_name': 'arrow',
- 'html_url': 'https://pypi.org/pypi/arrow/json',
- 'origin_url': 'https://pypi.org/project/arrow/',
- 'origin_type': 'pypi',
- }
+ r = lister.scheduler.search_tasks(task_type='load-pypi')
+ assert len(r) == 4
+ for row in r:
+ assert row['type'] == 'load-pypi'
+ # arguments check
+ args = row['arguments']['args']
+ assert len(args) == 2
-class PyPIListerTester(HttpSimpleListerTester, unittest.TestCase):
- Lister = PyPILister
- PAGE = 'https://pypi.org/simple/'
- lister_subdir = 'pypi'
- good_api_response_file = 'api_response.html'
- entries = 4
+ project = args[0]
+ url = args[1]
+ assert url == 'https://pypi.org/project/%s/' % project
- @requests_mock.Mocker()
- def test_list_packages(self, http_mocker):
- """List packages from simple api page should retrieve all packages within
+ # kwargs
+ kwargs = row['arguments']['kwargs']
+ meta_url = kwargs['project_metadata_url']
+ assert meta_url == 'https://pypi.org/pypi/%s/json' % project
- """
- http_mocker.get(self.PAGE, text=self.mock_response)
- fl = self.get_fl()
- packages = fl.list_packages(self.get_api_response(0))
-
- for package in expected_packages:
- assert package in packages
-
- def test_transport_response_simplified(self):
- """Test model created by the lister
-
- """
- fl = self.get_fl()
- model = fl.transport_response_simplified(['arrow'])
- assert len(model) == 1
- for key, values in model[0].items():
- assert values == expected_model[key]
-
- def test_task_dict(self):
- """Test the task creation of lister
-
- """
- with patch('swh.lister.pypi.lister.utils.create_task_dict') as mock_create_tasks: # noqa
- lister.task_dict(origin_type='pypi', origin_url='https://abc',
- name='test_pack', html_url='https://def')
-
- mock_create_tasks.assert_called_once_with(
- 'load-pypi', 'recurring', 'test_pack', 'https://abc',
- project_metadata_url='https://def')
+ assert row['policy'] == 'recurring'
+ assert row['priority'] is None

File Metadata

Mime Type
text/x-diff
Expires
Jul 4 2025, 6:31 PM (5 w, 2 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3261423

Event Timeline