Page MenuHomeSoftware Heritage

D2397.id8478.diff
No OneTemporary

D2397.id8478.diff

diff --git a/requirements-swh.txt b/requirements-swh.txt
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -1,3 +1,4 @@
swh.core >= 0.0.75
swh.model >= 0.0.18
swh.storage >= 0.0.153
+swh.scheduler
diff --git a/requirements-test.txt b/requirements-test.txt
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -1,4 +1,6 @@
pytest
+pytest-mock
+pytest-postgresql >= 2.1.0
requests_mock
swh-core[testing]
swh-scheduler[testing]
diff --git a/setup.py b/setup.py
--- a/setup.py
+++ b/setup.py
@@ -57,6 +57,8 @@
loader.deposit=swh.loader.package.deposit:register
loader.npm=swh.loader.package.npm:register
loader.pypi=swh.loader.package.pypi:register
+ [swh.cli.subcommands]
+ loader=swh.loader.cli:run
''',
classifiers=[
"Programming Language :: Python :: 3",
diff --git a/swh/loader/cli.py b/swh/loader/cli.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/cli.py
@@ -0,0 +1,68 @@
+# Copyright (C) 2019 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import logging
+
+import click
+import pkg_resources
+
+from typing import Any
+
+from swh.core.cli import CONTEXT_SETTINGS
+from swh.scheduler.cli.utils import parse_options
+
+
+logger = logging.getLogger(__name__)
+
+
+LOADERS = {entry_point.name.split('.', 1)[1]: entry_point
+ for entry_point in pkg_resources.iter_entry_points('swh.workers')
+ if entry_point.name.split('.', 1)[0] == 'loader'}
+
+
+SUPPORTED_LOADERS = list(LOADERS)
+
+
+def get_loader(name: str, **kwargs) -> Any:
+ """Given a loader name, instantiate it.
+
+ Args:
+ name: Loader's name
+ kwargs: Configuration dict (url...)
+
+ Returns:
+ An instantiated loader
+
+ """
+ if name not in LOADERS:
+ raise ValueError(
+ 'Invalid loader %s: only supported loaders are %s' %
+ (name, SUPPORTED_LOADERS))
+
+ registry_entry = LOADERS[name].load()()
+ logger.debug(f'registry: {registry_entry}')
+ loader_cls = registry_entry['loader']
+ logger.debug(f'loader class: {loader_cls}')
+ return loader_cls(**kwargs)
+
+
+@click.command(name='run', context_settings=CONTEXT_SETTINGS)
+@click.option('--type', '-t', help='Loader to run',
+ type=click.Choice(SUPPORTED_LOADERS))
+@click.option('--url', '-u', default=None,
+ help="Origin url to load")
+@click.argument('options', nargs=-1)
+@click.pass_context
+def run(ctx, type, url, options):
+ """Loader cli tools
+
+ Load an origin from its url with loader <name>
+
+ """
+ (_, kw) = parse_options(options)
+ logger.debug(f'kw: {kw}')
+ loader = get_loader(type, url=url, **kw)
+ result = loader.load()
+ click.echo(result)
diff --git a/swh/loader/package/archive/__init__.py b/swh/loader/package/archive/__init__.py
--- a/swh/loader/package/archive/__init__.py
+++ b/swh/loader/package/archive/__init__.py
@@ -9,6 +9,8 @@
def register() -> Mapping[str, Any]:
"""Register the current worker module's definition"""
+ from .loader import ArchiveLoader
return {
'task_modules': [f'{__name__}.tasks'],
+ 'loader': ArchiveLoader,
}
diff --git a/swh/loader/package/archive/tests/test_tasks.py b/swh/loader/package/archive/tests/test_tasks.py
--- a/swh/loader/package/archive/tests/test_tasks.py
+++ b/swh/loader/package/archive/tests/test_tasks.py
@@ -3,12 +3,10 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-from unittest.mock import patch
-
-@patch('swh.loader.package.archive.loader.ArchiveLoader.load')
-def test_archive_loader(
- mock_loader, swh_app, celery_session_worker, swh_config):
+def test_archive_loader(mocker, swh_app, celery_session_worker, swh_config):
+ mock_loader = mocker.patch(
+ 'swh.loader.package.archive.loader.ArchiveLoader.load')
mock_loader.return_value = {'status': 'eventful'}
res = swh_app.send_task(
diff --git a/swh/loader/package/debian/__init__.py b/swh/loader/package/debian/__init__.py
--- a/swh/loader/package/debian/__init__.py
+++ b/swh/loader/package/debian/__init__.py
@@ -9,6 +9,8 @@
def register() -> Mapping[str, Any]:
"""Register the current worker module's definition"""
+ from .loader import DebianLoader
return {
'task_modules': [f'{__name__}.tasks'],
+ 'loader': DebianLoader,
}
diff --git a/swh/loader/package/debian/tests/test_tasks.py b/swh/loader/package/debian/tests/test_tasks.py
--- a/swh/loader/package/debian/tests/test_tasks.py
+++ b/swh/loader/package/debian/tests/test_tasks.py
@@ -3,12 +3,10 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-from unittest.mock import patch
-
-@patch('swh.loader.package.debian.loader.DebianLoader.load')
-def test_debian_loader(
- mock_loader, swh_app, celery_session_worker, swh_config):
+def test_debian_loader(mocker, swh_app, celery_session_worker, swh_config):
+ mock_loader = mocker.patch(
+ 'swh.loader.package.debian.loader.DebianLoader.load')
mock_loader.return_value = {'status': 'eventful'}
res = swh_app.send_task(
diff --git a/swh/loader/package/deposit/__init__.py b/swh/loader/package/deposit/__init__.py
--- a/swh/loader/package/deposit/__init__.py
+++ b/swh/loader/package/deposit/__init__.py
@@ -9,6 +9,8 @@
def register() -> Mapping[str, Any]:
"""Register the current worker module's definition"""
+ from .loader import DepositLoader
return {
'task_modules': [f'{__name__}.tasks'],
+ 'loader': DepositLoader,
}
diff --git a/swh/loader/package/deposit/tests/test_tasks.py b/swh/loader/package/deposit/tests/test_tasks.py
--- a/swh/loader/package/deposit/tests/test_tasks.py
+++ b/swh/loader/package/deposit/tests/test_tasks.py
@@ -3,12 +3,10 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-from unittest.mock import patch
-
-@patch('swh.loader.package.deposit.loader.DepositLoader.load')
-def test_deposit_loader(
- mock_loader, swh_app, celery_session_worker, swh_config):
+def test_deposit_loader(mocker, swh_app, celery_session_worker, swh_config):
+ mock_loader = mocker.patch(
+ 'swh.loader.package.deposit.loader.DepositLoader.load')
mock_loader.return_value = {'status': 'eventful'}
res = swh_app.send_task(
diff --git a/swh/loader/package/npm/__init__.py b/swh/loader/package/npm/__init__.py
--- a/swh/loader/package/npm/__init__.py
+++ b/swh/loader/package/npm/__init__.py
@@ -9,6 +9,8 @@
def register() -> Mapping[str, Any]:
"""Register the current worker module's definition"""
+ from .loader import NpmLoader
return {
'task_modules': [f'{__name__}.tasks'],
+ 'loader': NpmLoader,
}
diff --git a/swh/loader/package/npm/loader.py b/swh/loader/package/npm/loader.py
--- a/swh/loader/package/npm/loader.py
+++ b/swh/loader/package/npm/loader.py
@@ -14,6 +14,7 @@
import chardet
import iso8601
+from urllib.parse import quote
from swh.model.identifiers import normalize_timestamp
from swh.loader.package.loader import PackageLoader
from swh.loader.package.utils import api_info, release_name
@@ -31,21 +32,21 @@
class NpmLoader(PackageLoader):
visit_type = 'npm'
- def __init__(self, package_name, package_url, package_metadata_url):
- super().__init__(url=package_url)
- self.provider_url = package_metadata_url
+ def __init__(self, url: str):
+ """Constructor
- self._info = None
+ Args
+ str: origin url (e.g. https://www.npmjs.com/package/<package-name>)
+ """
+ super().__init__(url=url)
+ package_name = url.split('https://www.npmjs.com/package/')[1]
+ safe_name = quote(package_name, safe='')
+ self.provider_url = f'https://replicate.npmjs.com/{safe_name}/'
+ self._info: Dict[str, Any] = {}
self._versions = None
- # if package_url is None:
- # package_url = 'https://www.npmjs.com/package/%s' % package_name
- # if package_metadata_url is None:
- # package_metadata_url = 'https://replicate.npmjs.com/%s/' %\
- # quote(package_name, safe='')
-
@property
- def info(self) -> Dict:
+ def info(self) -> Dict[str, Any]:
"""Return the project metadata information (fetched from npm registry)
"""
diff --git a/swh/loader/package/npm/tasks.py b/swh/loader/package/npm/tasks.py
--- a/swh/loader/package/npm/tasks.py
+++ b/swh/loader/package/npm/tasks.py
@@ -9,6 +9,6 @@
@shared_task(name=__name__ + '.LoadNpm')
-def load_npm(*, package_name, package_url, package_metadata_url):
+def load_npm(*, url: str):
"""Load Npm package"""
- return NpmLoader(package_name, package_url, package_metadata_url).load()
+ return NpmLoader(url).load()
diff --git a/swh/loader/package/npm/tests/test_npm.py b/swh/loader/package/npm/tests/test_npm.py
--- a/swh/loader/package/npm/tests/test_npm.py
+++ b/swh/loader/package/npm/tests/test_npm.py
@@ -382,9 +382,7 @@
def test_revision_metadata_structure(swh_config, requests_mock_datadir):
package = 'org'
- loader = NpmLoader(package,
- package_url(package),
- package_metadata_url(package))
+ loader = NpmLoader(package_url(package))
actual_load_status = loader.load()
assert actual_load_status['status'] == 'eventful'
@@ -416,9 +414,7 @@
def test_npm_loader_first_visit(swh_config, requests_mock_datadir):
package = 'org'
- loader = NpmLoader(package,
- package_url(package),
- package_metadata_url(package))
+ loader = NpmLoader(package_url(package))
actual_load_status = loader.load()
expected_snapshot_id = 'd0587e1195aed5a8800411a008f2f2d627f18e2d'
@@ -479,8 +475,7 @@
swh_config, requests_mock_datadir_visits):
package = 'org'
url = package_url(package)
- metadata_url = package_metadata_url(package)
- loader = NpmLoader(package, url, metadata_url)
+ loader = NpmLoader(url)
actual_load_status = loader.load()
assert actual_load_status['status'] == 'eventful'
@@ -539,7 +534,7 @@
def test_npm_loader_version_divergence(swh_config):
package = '@aller_shared'
url = package_url(package)
- loader = NpmLoader(package, url, package_metadata_url(package))
+ loader = NpmLoader(url)
actual_load_status = loader.load()
assert actual_load_status['status'] == 'eventful'
diff --git a/swh/loader/package/npm/tests/test_tasks.py b/swh/loader/package/npm/tests/test_tasks.py
--- a/swh/loader/package/npm/tests/test_tasks.py
+++ b/swh/loader/package/npm/tests/test_tasks.py
@@ -3,19 +3,14 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-from unittest.mock import patch
-
-@patch('swh.loader.package.npm.loader.NpmLoader.load')
-def test_npm_loader(
- mock_loader, swh_app, celery_session_worker, swh_config):
+def test_npm_loader(mocker, swh_app, celery_session_worker, swh_config):
+ mock_loader = mocker.patch('swh.loader.package.npm.loader.NpmLoader.load')
mock_loader.return_value = {'status': 'eventful'}
res = swh_app.send_task(
'swh.loader.package.npm.tasks.LoadNpm',
- (), dict(package_name='some-package',
- package_url='some',
- package_metadata_url='something'))
+ (), dict(url='https://www.npmjs.com/package/some-package'))
assert res
res.wait()
assert res.successful()
diff --git a/swh/loader/package/pypi/__init__.py b/swh/loader/package/pypi/__init__.py
--- a/swh/loader/package/pypi/__init__.py
+++ b/swh/loader/package/pypi/__init__.py
@@ -9,6 +9,8 @@
def register() -> Mapping[str, Any]:
"""Register the current worker module's definition"""
+ from .loader import PyPILoader
return {
'task_modules': [f'{__name__}.tasks'],
+ 'loader': PyPILoader,
}
diff --git a/swh/loader/package/pypi/tests/test_tasks.py b/swh/loader/package/pypi/tests/test_tasks.py
--- a/swh/loader/package/pypi/tests/test_tasks.py
+++ b/swh/loader/package/pypi/tests/test_tasks.py
@@ -3,12 +3,10 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-from unittest.mock import patch
-
-@patch('swh.loader.package.pypi.loader.PyPILoader.load')
-def test_pypi_loader(
- mock_loader, swh_app, celery_session_worker, swh_config):
+def test_pypi_loader(mocker, swh_app, celery_session_worker, swh_config):
+ mock_loader = mocker.patch(
+ 'swh.loader.package.pypi.loader.PyPILoader.load')
mock_loader.return_value = {'status': 'eventful'}
res = swh_app.send_task(
diff --git a/swh/loader/tests/__init__.py b/swh/loader/tests/__init__.py
new file mode 100644
diff --git a/swh/loader/tests/conftest.py b/swh/loader/tests/conftest.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/tests/conftest.py
@@ -0,0 +1,24 @@
+# Copyright (C) 2019 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import pytest
+
+from typing import Any, Dict
+
+
+@pytest.fixture
+def swh_loader_config() -> Dict[str, Any]:
+ return {
+ 'storage': {
+ 'cls': 'memory',
+ },
+ 'deposit': {
+ 'url': 'https://deposit.softwareheritage.org/1/private',
+ 'auth': {
+ 'username': 'user',
+ 'password': 'pass',
+ }
+ },
+ }
diff --git a/swh/loader/tests/test_cli.py b/swh/loader/tests/test_cli.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/tests/test_cli.py
@@ -0,0 +1,91 @@
+# Copyright (C) 2019 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import pytest
+
+
+from swh.loader.cli import run, get_loader, SUPPORTED_LOADERS
+from swh.loader.package.loader import PackageLoader
+
+from click.testing import CliRunner
+from unittest.mock import patch
+
+
+def test_get_loader_wrong_input(swh_config):
+ """Unsupported loader should raise
+
+ """
+ loader_type = 'unknown'
+ assert loader_type not in SUPPORTED_LOADERS
+ with pytest.raises(ValueError, match='Invalid loader'):
+ get_loader(loader_type, url='db-url')
+
+
+def test_get_loader(swh_config):
+ """Instantiating a supported loader should be ok
+
+ """
+ loader_input = {
+ 'archive': {
+ 'url': 'some-url',
+ 'artifacts': [],
+ },
+ 'debian': {
+ 'url': 'some-url',
+ 'date': 'something',
+ 'packages': [],
+ },
+ 'deposit': {
+ 'url': 'some-url',
+ 'deposit_id': 1,
+ },
+ 'npm': {
+ 'url': 'https://www.npmjs.com/package/onepackage',
+ },
+ 'pypi': {
+ 'url': 'some-url',
+ },
+ }
+ for loader_type, kwargs in loader_input.items():
+ loader = get_loader(loader_type, **kwargs)
+ assert isinstance(loader, PackageLoader)
+
+
+help_msg = """Usage: run [OPTIONS] [OPTIONS]...
+
+ Loader cli tools
+
+ Load an origin from its url with loader <name>
+
+Options:
+ -t, --type [archive|debian|deposit|npm|pypi]
+ Loader to run
+ -u, --url TEXT Origin url to load
+ -h, --help Show this message and exit.
+"""
+
+
+def test_run_help(swh_config):
+ """Help message should be ok
+
+ """
+ runner = CliRunner()
+ result = runner.invoke(run, ['-h'])
+ assert result.exit_code == 0
+ assert result.output.startswith(help_msg)
+
+
+@patch('swh.loader.package.pypi.loader.PyPILoader')
+def test_run_pypi(mock_loader, swh_config):
+ """Triggering a load should be ok
+
+ """
+ runner = CliRunner()
+ result = runner.invoke(run, [
+ '--type', 'pypi',
+ '--url', 'https://some-url'
+ ])
+ assert result.exit_code == 0
+ mock_loader.assert_called_once_with(url='https://some-url') # constructor

File Metadata

Mime Type
text/plain
Expires
Tue, Dec 17, 3:59 AM (2 w, 1 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3218415

Event Timeline