Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9348450
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
6 KB
Subscribers
None
View Options
diff --git a/swh/lister/pypi/lister.py b/swh/lister/pypi/lister.py
index 4da6e55..99d8013 100644
--- a/swh/lister/pypi/lister.py
+++ b/swh/lister/pypi/lister.py
@@ -1,70 +1,71 @@
-# Copyright (C) 2018-2019 the Software Heritage developers
+# Copyright (C) 2018-2019 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import random
import xmltodict
from .models import PyPIModel
from swh.scheduler import utils
from swh.lister.core.simple_lister import SimpleLister
from swh.lister.core.lister_transports import ListerOnePageApiTransport
class PyPILister(ListerOnePageApiTransport, SimpleLister):
MODEL = PyPIModel
LISTER_NAME = 'pypi'
PAGE = 'https://pypi.org/simple/'
instance = 'pypi' # As of today only the main pypi.org is used
def __init__(self, override_config=None):
ListerOnePageApiTransport .__init__(self)
SimpleLister.__init__(self, override_config=override_config)
def task_dict(self, origin_type, origin_url, **kwargs):
"""(Override) Return task format dict
This is overridden from the lister_base as more information is
needed for the ingestion task creation.
"""
_type = 'load-%s' % origin_type
_policy = kwargs.get('policy', 'recurring')
project_name = kwargs.get('name')
project_metadata_url = kwargs.get('html_url')
return utils.create_task_dict(
_type, _policy, project_name, origin_url,
project_metadata_url=project_metadata_url)
def list_packages(self, response):
"""(Override) List the actual pypi origins from the response.
"""
result = xmltodict.parse(response.content)
_packages = [p['#text'] for p in result['html']['body']['a']]
random.shuffle(_packages)
return _packages
def _compute_urls(self, repo_name):
"""Returns a tuple (project_url, project_metadata_url)
"""
return (
'https://pypi.org/project/%s/' % repo_name,
'https://pypi.org/pypi/%s/json' % repo_name
)
def get_model_from_repo(self, repo_name):
"""(Override) Transform from repository representation to model
"""
project_url, project_url_meta = self._compute_urls(repo_name)
return {
'uid': repo_name,
'name': repo_name,
'full_name': repo_name,
'html_url': project_url_meta,
'origin_url': project_url,
'origin_type': 'pypi',
}
diff --git a/swh/lister/pypi/tests/api_response.html b/swh/lister/pypi/tests/data/pypi.org/simple
similarity index 100%
rename from swh/lister/pypi/tests/api_response.html
rename to swh/lister/pypi/tests/data/pypi.org/simple
diff --git a/swh/lister/pypi/tests/test_lister.py b/swh/lister/pypi/tests/test_lister.py
index bda21a2..20566db 100644
--- a/swh/lister/pypi/tests/test_lister.py
+++ b/swh/lister/pypi/tests/test_lister.py
@@ -1,64 +1,31 @@
-# Copyright (C) 2019 the Software Heritage developers
+# Copyright (C) 2019 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-import requests_mock
-import unittest
-from unittest.mock import patch
-from swh.lister.pypi.lister import PyPILister
-from swh.lister.core.tests.test_lister import HttpSimpleListerTester
-lister = PyPILister()
+def test_pypi_lister_(swh_listers, requests_mock_datadir):
+ lister = swh_listers['pypi']
-expected_packages = ['0lever-so', '0lever-utils', '0-orchestrator', '0wned']
+ lister.run()
-expected_model = {
- 'uid': 'arrow',
- 'name': 'arrow',
- 'full_name': 'arrow',
- 'html_url': 'https://pypi.org/pypi/arrow/json',
- 'origin_url': 'https://pypi.org/project/arrow/',
- 'origin_type': 'pypi',
- }
+ r = lister.scheduler.search_tasks(task_type='load-pypi')
+ assert len(r) == 4
+ for row in r:
+ assert row['type'] == 'load-pypi'
+ # arguments check
+ args = row['arguments']['args']
+ assert len(args) == 2
-class PyPIListerTester(HttpSimpleListerTester, unittest.TestCase):
- Lister = PyPILister
- PAGE = 'https://pypi.org/simple/'
- lister_subdir = 'pypi'
- good_api_response_file = 'api_response.html'
- entries = 4
+ project = args[0]
+ url = args[1]
+ assert url == 'https://pypi.org/project/%s/' % project
- @requests_mock.Mocker()
- def test_list_packages(self, http_mocker):
- """List packages from simple api page should retrieve all packages within
+ # kwargs
+ kwargs = row['arguments']['kwargs']
+ meta_url = kwargs['project_metadata_url']
+ assert meta_url == 'https://pypi.org/pypi/%s/json' % project
- """
- http_mocker.get(self.PAGE, text=self.mock_response)
- fl = self.get_fl()
- packages = fl.list_packages(self.get_api_response(0))
-
- for package in expected_packages:
- assert package in packages
-
- def test_transport_response_simplified(self):
- """Test model created by the lister
-
- """
- fl = self.get_fl()
- model = fl.transport_response_simplified(['arrow'])
- assert len(model) == 1
- for key, values in model[0].items():
- assert values == expected_model[key]
-
- def test_task_dict(self):
- """Test the task creation of lister
-
- """
- with patch('swh.lister.pypi.lister.utils.create_task_dict') as mock_create_tasks: # noqa
- lister.task_dict(origin_type='pypi', origin_url='https://abc',
- name='test_pack', html_url='https://def')
-
- mock_create_tasks.assert_called_once_with(
- 'load-pypi', 'recurring', 'test_pack', 'https://abc',
- project_metadata_url='https://def')
+ assert row['policy'] == 'recurring'
+ assert row['priority'] is None
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Jul 4 2025, 6:31 PM (5 w, 2 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3261423
Attached To
rDLS Listers
Event Timeline
Log In to Comment