Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7122819
D2397.id8478.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
16 KB
Subscribers
None
D2397.id8478.diff
View Options
diff --git a/requirements-swh.txt b/requirements-swh.txt
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -1,3 +1,4 @@
swh.core >= 0.0.75
swh.model >= 0.0.18
swh.storage >= 0.0.153
+swh.scheduler
diff --git a/requirements-test.txt b/requirements-test.txt
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -1,4 +1,6 @@
pytest
+pytest-mock
+pytest-postgresql >= 2.1.0
requests_mock
swh-core[testing]
swh-scheduler[testing]
diff --git a/setup.py b/setup.py
--- a/setup.py
+++ b/setup.py
@@ -57,6 +57,8 @@
loader.deposit=swh.loader.package.deposit:register
loader.npm=swh.loader.package.npm:register
loader.pypi=swh.loader.package.pypi:register
+ [swh.cli.subcommands]
+ loader=swh.loader.cli:run
''',
classifiers=[
"Programming Language :: Python :: 3",
diff --git a/swh/loader/cli.py b/swh/loader/cli.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/cli.py
@@ -0,0 +1,68 @@
+# Copyright (C) 2019 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import logging
+
+import click
+import pkg_resources
+
+from typing import Any
+
+from swh.core.cli import CONTEXT_SETTINGS
+from swh.scheduler.cli.utils import parse_options
+
+
+logger = logging.getLogger(__name__)
+
+
+LOADERS = {entry_point.name.split('.', 1)[1]: entry_point
+ for entry_point in pkg_resources.iter_entry_points('swh.workers')
+ if entry_point.name.split('.', 1)[0] == 'loader'}
+
+
+SUPPORTED_LOADERS = list(LOADERS)
+
+
+def get_loader(name: str, **kwargs) -> Any:
+ """Given a loader name, instantiate it.
+
+ Args:
+ name: Loader's name
+ kwargs: Configuration dict (url...)
+
+ Returns:
+ An instantiated loader
+
+ """
+ if name not in LOADERS:
+ raise ValueError(
+ 'Invalid loader %s: only supported loaders are %s' %
+ (name, SUPPORTED_LOADERS))
+
+ registry_entry = LOADERS[name].load()()
+ logger.debug(f'registry: {registry_entry}')
+ loader_cls = registry_entry['loader']
+ logger.debug(f'loader class: {loader_cls}')
+ return loader_cls(**kwargs)
+
+
+@click.command(name='run', context_settings=CONTEXT_SETTINGS)
+@click.option('--type', '-t', help='Loader to run',
+ type=click.Choice(SUPPORTED_LOADERS))
+@click.option('--url', '-u', default=None,
+ help="Origin url to load")
+@click.argument('options', nargs=-1)
+@click.pass_context
+def run(ctx, type, url, options):
+ """Loader cli tools
+
+ Load an origin from its url with loader <name>
+
+ """
+ (_, kw) = parse_options(options)
+ logger.debug(f'kw: {kw}')
+ loader = get_loader(type, url=url, **kw)
+ result = loader.load()
+ click.echo(result)
diff --git a/swh/loader/package/archive/__init__.py b/swh/loader/package/archive/__init__.py
--- a/swh/loader/package/archive/__init__.py
+++ b/swh/loader/package/archive/__init__.py
@@ -9,6 +9,8 @@
def register() -> Mapping[str, Any]:
"""Register the current worker module's definition"""
+ from .loader import ArchiveLoader
return {
'task_modules': [f'{__name__}.tasks'],
+ 'loader': ArchiveLoader,
}
diff --git a/swh/loader/package/archive/tests/test_tasks.py b/swh/loader/package/archive/tests/test_tasks.py
--- a/swh/loader/package/archive/tests/test_tasks.py
+++ b/swh/loader/package/archive/tests/test_tasks.py
@@ -3,12 +3,10 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-from unittest.mock import patch
-
-@patch('swh.loader.package.archive.loader.ArchiveLoader.load')
-def test_archive_loader(
- mock_loader, swh_app, celery_session_worker, swh_config):
+def test_archive_loader(mocker, swh_app, celery_session_worker, swh_config):
+ mock_loader = mocker.patch(
+ 'swh.loader.package.archive.loader.ArchiveLoader.load')
mock_loader.return_value = {'status': 'eventful'}
res = swh_app.send_task(
diff --git a/swh/loader/package/debian/__init__.py b/swh/loader/package/debian/__init__.py
--- a/swh/loader/package/debian/__init__.py
+++ b/swh/loader/package/debian/__init__.py
@@ -9,6 +9,8 @@
def register() -> Mapping[str, Any]:
"""Register the current worker module's definition"""
+ from .loader import DebianLoader
return {
'task_modules': [f'{__name__}.tasks'],
+ 'loader': DebianLoader,
}
diff --git a/swh/loader/package/debian/tests/test_tasks.py b/swh/loader/package/debian/tests/test_tasks.py
--- a/swh/loader/package/debian/tests/test_tasks.py
+++ b/swh/loader/package/debian/tests/test_tasks.py
@@ -3,12 +3,10 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-from unittest.mock import patch
-
-@patch('swh.loader.package.debian.loader.DebianLoader.load')
-def test_debian_loader(
- mock_loader, swh_app, celery_session_worker, swh_config):
+def test_debian_loader(mocker, swh_app, celery_session_worker, swh_config):
+ mock_loader = mocker.patch(
+ 'swh.loader.package.debian.loader.DebianLoader.load')
mock_loader.return_value = {'status': 'eventful'}
res = swh_app.send_task(
diff --git a/swh/loader/package/deposit/__init__.py b/swh/loader/package/deposit/__init__.py
--- a/swh/loader/package/deposit/__init__.py
+++ b/swh/loader/package/deposit/__init__.py
@@ -9,6 +9,8 @@
def register() -> Mapping[str, Any]:
"""Register the current worker module's definition"""
+ from .loader import DepositLoader
return {
'task_modules': [f'{__name__}.tasks'],
+ 'loader': DepositLoader,
}
diff --git a/swh/loader/package/deposit/tests/test_tasks.py b/swh/loader/package/deposit/tests/test_tasks.py
--- a/swh/loader/package/deposit/tests/test_tasks.py
+++ b/swh/loader/package/deposit/tests/test_tasks.py
@@ -3,12 +3,10 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-from unittest.mock import patch
-
-@patch('swh.loader.package.deposit.loader.DepositLoader.load')
-def test_deposit_loader(
- mock_loader, swh_app, celery_session_worker, swh_config):
+def test_deposit_loader(mocker, swh_app, celery_session_worker, swh_config):
+ mock_loader = mocker.patch(
+ 'swh.loader.package.deposit.loader.DepositLoader.load')
mock_loader.return_value = {'status': 'eventful'}
res = swh_app.send_task(
diff --git a/swh/loader/package/npm/__init__.py b/swh/loader/package/npm/__init__.py
--- a/swh/loader/package/npm/__init__.py
+++ b/swh/loader/package/npm/__init__.py
@@ -9,6 +9,8 @@
def register() -> Mapping[str, Any]:
"""Register the current worker module's definition"""
+ from .loader import NpmLoader
return {
'task_modules': [f'{__name__}.tasks'],
+ 'loader': NpmLoader,
}
diff --git a/swh/loader/package/npm/loader.py b/swh/loader/package/npm/loader.py
--- a/swh/loader/package/npm/loader.py
+++ b/swh/loader/package/npm/loader.py
@@ -14,6 +14,7 @@
import chardet
import iso8601
+from urllib.parse import quote
from swh.model.identifiers import normalize_timestamp
from swh.loader.package.loader import PackageLoader
from swh.loader.package.utils import api_info, release_name
@@ -31,21 +32,21 @@
class NpmLoader(PackageLoader):
visit_type = 'npm'
- def __init__(self, package_name, package_url, package_metadata_url):
- super().__init__(url=package_url)
- self.provider_url = package_metadata_url
+ def __init__(self, url: str):
+ """Constructor
- self._info = None
+ Args
+ str: origin url (e.g. https://www.npmjs.com/package/<package-name>)
+ """
+ super().__init__(url=url)
+ package_name = url.split('https://www.npmjs.com/package/')[1]
+ safe_name = quote(package_name, safe='')
+ self.provider_url = f'https://replicate.npmjs.com/{safe_name}/'
+ self._info: Dict[str, Any] = {}
self._versions = None
- # if package_url is None:
- # package_url = 'https://www.npmjs.com/package/%s' % package_name
- # if package_metadata_url is None:
- # package_metadata_url = 'https://replicate.npmjs.com/%s/' %\
- # quote(package_name, safe='')
-
@property
- def info(self) -> Dict:
+ def info(self) -> Dict[str, Any]:
"""Return the project metadata information (fetched from npm registry)
"""
diff --git a/swh/loader/package/npm/tasks.py b/swh/loader/package/npm/tasks.py
--- a/swh/loader/package/npm/tasks.py
+++ b/swh/loader/package/npm/tasks.py
@@ -9,6 +9,6 @@
@shared_task(name=__name__ + '.LoadNpm')
-def load_npm(*, package_name, package_url, package_metadata_url):
+def load_npm(*, url: str):
"""Load Npm package"""
- return NpmLoader(package_name, package_url, package_metadata_url).load()
+ return NpmLoader(url).load()
diff --git a/swh/loader/package/npm/tests/test_npm.py b/swh/loader/package/npm/tests/test_npm.py
--- a/swh/loader/package/npm/tests/test_npm.py
+++ b/swh/loader/package/npm/tests/test_npm.py
@@ -382,9 +382,7 @@
def test_revision_metadata_structure(swh_config, requests_mock_datadir):
package = 'org'
- loader = NpmLoader(package,
- package_url(package),
- package_metadata_url(package))
+ loader = NpmLoader(package_url(package))
actual_load_status = loader.load()
assert actual_load_status['status'] == 'eventful'
@@ -416,9 +414,7 @@
def test_npm_loader_first_visit(swh_config, requests_mock_datadir):
package = 'org'
- loader = NpmLoader(package,
- package_url(package),
- package_metadata_url(package))
+ loader = NpmLoader(package_url(package))
actual_load_status = loader.load()
expected_snapshot_id = 'd0587e1195aed5a8800411a008f2f2d627f18e2d'
@@ -479,8 +475,7 @@
swh_config, requests_mock_datadir_visits):
package = 'org'
url = package_url(package)
- metadata_url = package_metadata_url(package)
- loader = NpmLoader(package, url, metadata_url)
+ loader = NpmLoader(url)
actual_load_status = loader.load()
assert actual_load_status['status'] == 'eventful'
@@ -539,7 +534,7 @@
def test_npm_loader_version_divergence(swh_config):
package = '@aller_shared'
url = package_url(package)
- loader = NpmLoader(package, url, package_metadata_url(package))
+ loader = NpmLoader(url)
actual_load_status = loader.load()
assert actual_load_status['status'] == 'eventful'
diff --git a/swh/loader/package/npm/tests/test_tasks.py b/swh/loader/package/npm/tests/test_tasks.py
--- a/swh/loader/package/npm/tests/test_tasks.py
+++ b/swh/loader/package/npm/tests/test_tasks.py
@@ -3,19 +3,14 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-from unittest.mock import patch
-
-@patch('swh.loader.package.npm.loader.NpmLoader.load')
-def test_npm_loader(
- mock_loader, swh_app, celery_session_worker, swh_config):
+def test_npm_loader(mocker, swh_app, celery_session_worker, swh_config):
+ mock_loader = mocker.patch('swh.loader.package.npm.loader.NpmLoader.load')
mock_loader.return_value = {'status': 'eventful'}
res = swh_app.send_task(
'swh.loader.package.npm.tasks.LoadNpm',
- (), dict(package_name='some-package',
- package_url='some',
- package_metadata_url='something'))
+ (), dict(url='https://www.npmjs.com/package/some-package'))
assert res
res.wait()
assert res.successful()
diff --git a/swh/loader/package/pypi/__init__.py b/swh/loader/package/pypi/__init__.py
--- a/swh/loader/package/pypi/__init__.py
+++ b/swh/loader/package/pypi/__init__.py
@@ -9,6 +9,8 @@
def register() -> Mapping[str, Any]:
"""Register the current worker module's definition"""
+ from .loader import PyPILoader
return {
'task_modules': [f'{__name__}.tasks'],
+ 'loader': PyPILoader,
}
diff --git a/swh/loader/package/pypi/tests/test_tasks.py b/swh/loader/package/pypi/tests/test_tasks.py
--- a/swh/loader/package/pypi/tests/test_tasks.py
+++ b/swh/loader/package/pypi/tests/test_tasks.py
@@ -3,12 +3,10 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-from unittest.mock import patch
-
-@patch('swh.loader.package.pypi.loader.PyPILoader.load')
-def test_pypi_loader(
- mock_loader, swh_app, celery_session_worker, swh_config):
+def test_pypi_loader(mocker, swh_app, celery_session_worker, swh_config):
+ mock_loader = mocker.patch(
+ 'swh.loader.package.pypi.loader.PyPILoader.load')
mock_loader.return_value = {'status': 'eventful'}
res = swh_app.send_task(
diff --git a/swh/loader/tests/__init__.py b/swh/loader/tests/__init__.py
new file mode 100644
diff --git a/swh/loader/tests/conftest.py b/swh/loader/tests/conftest.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/tests/conftest.py
@@ -0,0 +1,24 @@
+# Copyright (C) 2019 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import pytest
+
+from typing import Any, Dict
+
+
+@pytest.fixture
+def swh_loader_config() -> Dict[str, Any]:
+ return {
+ 'storage': {
+ 'cls': 'memory',
+ },
+ 'deposit': {
+ 'url': 'https://deposit.softwareheritage.org/1/private',
+ 'auth': {
+ 'username': 'user',
+ 'password': 'pass',
+ }
+ },
+ }
diff --git a/swh/loader/tests/test_cli.py b/swh/loader/tests/test_cli.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/tests/test_cli.py
@@ -0,0 +1,91 @@
+# Copyright (C) 2019 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import pytest
+
+
+from swh.loader.cli import run, get_loader, SUPPORTED_LOADERS
+from swh.loader.package.loader import PackageLoader
+
+from click.testing import CliRunner
+from unittest.mock import patch
+
+
+def test_get_loader_wrong_input(swh_config):
+ """Unsupported loader should raise
+
+ """
+ loader_type = 'unknown'
+ assert loader_type not in SUPPORTED_LOADERS
+ with pytest.raises(ValueError, match='Invalid loader'):
+ get_loader(loader_type, url='db-url')
+
+
+def test_get_loader(swh_config):
+ """Instantiating a supported loader should be ok
+
+ """
+ loader_input = {
+ 'archive': {
+ 'url': 'some-url',
+ 'artifacts': [],
+ },
+ 'debian': {
+ 'url': 'some-url',
+ 'date': 'something',
+ 'packages': [],
+ },
+ 'deposit': {
+ 'url': 'some-url',
+ 'deposit_id': 1,
+ },
+ 'npm': {
+ 'url': 'https://www.npmjs.com/package/onepackage',
+ },
+ 'pypi': {
+ 'url': 'some-url',
+ },
+ }
+ for loader_type, kwargs in loader_input.items():
+ loader = get_loader(loader_type, **kwargs)
+ assert isinstance(loader, PackageLoader)
+
+
+help_msg = """Usage: run [OPTIONS] [OPTIONS]...
+
+ Loader cli tools
+
+ Load an origin from its url with loader <name>
+
+Options:
+ -t, --type [archive|debian|deposit|npm|pypi]
+ Loader to run
+ -u, --url TEXT Origin url to load
+ -h, --help Show this message and exit.
+"""
+
+
+def test_run_help(swh_config):
+ """Help message should be ok
+
+ """
+ runner = CliRunner()
+ result = runner.invoke(run, ['-h'])
+ assert result.exit_code == 0
+ assert result.output.startswith(help_msg)
+
+
+@patch('swh.loader.package.pypi.loader.PyPILoader')
+def test_run_pypi(mock_loader, swh_config):
+ """Triggering a load should be ok
+
+ """
+ runner = CliRunner()
+ result = runner.invoke(run, [
+ '--type', 'pypi',
+ '--url', 'https://some-url'
+ ])
+ assert result.exit_code == 0
+ mock_loader.assert_called_once_with(url='https://some-url') # constructor
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Tue, Dec 17, 3:59 AM (2 w, 1 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3218415
Attached To
D2397: loader.cli: Reference new cli
Event Timeline
Log In to Comment