diff --git a/PKG-INFO b/PKG-INFO index 3bceb4f..4aff2a5 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,56 +1,56 @@ Metadata-Version: 2.1 Name: swh.loader.core -Version: 1.0.0 +Version: 1.0.1 Summary: Software Heritage Base Loader Home-page: https://forge.softwareheritage.org/diffusion/DLDBASE Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-loader-core Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-loader-core/ Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Requires-Python: >=3.7 Description-Content-Type: text/markdown Provides-Extra: testing License-File: LICENSE License-File: AUTHORS Software Heritage - Loader foundations ====================================== The Software Heritage Loader Core is a low-level loading utilities and helpers used by :term:`loaders `. The main entry points are classes: - :class:`swh.loader.core.loader.BaseLoader` for loaders (e.g. svn) - :class:`swh.loader.core.loader.DVCSLoader` for DVCS loaders (e.g. hg, git, ...) - :class:`swh.loader.package.loader.PackageLoader` for Package loaders (e.g. PyPI, Npm, ...) Package loaders --------------- This package also implements many package loaders directly, out of convenience, as they usually are quite similar and each fits in a single file. They all roughly follow these steps, explained in the :py:meth:`swh.loader.package.loader.PackageLoader.load` documentation. See the :ref:`package-loader-tutorial` for details. VCS loaders ----------- Unlike package loaders, VCS loaders remain in separate packages, as they often need more advanced conversions and very VCS-specific operations. This usually involves getting the branches of a repository and recursively loading revisions in the history (and directory trees in these revisions), until a known revision is found diff --git a/swh.loader.core.egg-info/PKG-INFO b/swh.loader.core.egg-info/PKG-INFO index 3bceb4f..4aff2a5 100644 --- a/swh.loader.core.egg-info/PKG-INFO +++ b/swh.loader.core.egg-info/PKG-INFO @@ -1,56 +1,56 @@ Metadata-Version: 2.1 Name: swh.loader.core -Version: 1.0.0 +Version: 1.0.1 Summary: Software Heritage Base Loader Home-page: https://forge.softwareheritage.org/diffusion/DLDBASE Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-loader-core Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-loader-core/ Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Requires-Python: >=3.7 Description-Content-Type: text/markdown Provides-Extra: testing License-File: LICENSE License-File: AUTHORS Software Heritage - Loader foundations ====================================== The Software Heritage Loader Core is a low-level loading utilities and helpers used by :term:`loaders `. The main entry points are classes: - :class:`swh.loader.core.loader.BaseLoader` for loaders (e.g. svn) - :class:`swh.loader.core.loader.DVCSLoader` for DVCS loaders (e.g. hg, git, ...) - :class:`swh.loader.package.loader.PackageLoader` for Package loaders (e.g. PyPI, Npm, ...) Package loaders --------------- This package also implements many package loaders directly, out of convenience, as they usually are quite similar and each fits in a single file. They all roughly follow these steps, explained in the :py:meth:`swh.loader.package.loader.PackageLoader.load` documentation. See the :ref:`package-loader-tutorial` for details. VCS loaders ----------- Unlike package loaders, VCS loaders remain in separate packages, as they often need more advanced conversions and very VCS-specific operations. This usually involves getting the branches of a repository and recursively loading revisions in the history (and directory trees in these revisions), until a known revision is found diff --git a/swh/loader/core/utils.py b/swh/loader/core/utils.py index 89684f2..de026c2 100644 --- a/swh/loader/core/utils.py +++ b/swh/loader/core/utils.py @@ -1,48 +1,45 @@ -# Copyright (C) 2018 The Software Heritage developers +# Copyright (C) 2018-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os import shutil import psutil -def clean_dangling_folders(dirpath, pattern_check, log=None): - """Clean up potential dangling temporary working folder rooted at - `dirpath`. Those folders must match a dedicated pattern and not - belonging to a live pid. +def clean_dangling_folders(dirpath: str, pattern_check: str, log=None) -> None: + """Clean up potential dangling temporary working folder rooted at `dirpath`. Those + folders must match a dedicated pattern and not belonging to a live pid. Args: - dirpath (str): Path to check for dangling files - pattern_check (str): A dedicated pattern to check on first - level directory (e.g `swh.loader.mercurial.`, - `swh.loader.svn.`) - log (Logger): Optional logger + dirpath: Path to check for dangling files + pattern_check: A dedicated pattern to check on first level directory (e.g + `swh.loader.mercurial.`, `swh.loader.svn.`) + log (Logger): Optional logger """ if not os.path.exists(dirpath): return for filename in os.listdir(dirpath): path_to_cleanup = os.path.join(dirpath, filename) try: - # pattern: `swh.loader.svn-pid.{noise}` + # pattern: `swh.loader.{loader-type}-pid.{noise}` if ( pattern_check not in filename or "-" not in filename ): # silently ignore unknown patterns continue - _, pid = filename.split("-") - pid = int(pid.split(".")[0]) + _, pid_ = filename.split("-") + pid = int(pid_.split(".")[0]) if psutil.pid_exists(pid): if log: - log.debug("PID %s is live, skipping" % pid) + log.debug("PID %s is live, skipping", pid) continue # could be removed concurrently, so check before removal if os.path.exists(path_to_cleanup): shutil.rmtree(path_to_cleanup) except Exception as e: if log: - msg = "Fail to clean dangling path %s: %s" % (path_to_cleanup, e) - log.warn(msg) + log.warn("Fail to clean dangling path %s: %s", path_to_cleanup, e) diff --git a/swh/loader/tests/test_cli.py b/swh/loader/tests/test_cli.py index cbbe27f..949d239 100644 --- a/swh/loader/tests/test_cli.py +++ b/swh/loader/tests/test_cli.py @@ -1,150 +1,152 @@ # Copyright (C) 2019-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import datetime import os from click.formatting import HelpFormatter from click.testing import CliRunner import pytest import yaml from swh.loader.cli import SUPPORTED_LOADERS, get_loader from swh.loader.cli import loader as loader_cli from swh.loader.package.loader import PackageLoader def test_get_loader_wrong_input(swh_config): """Unsupported loader should raise """ loader_type = "unknown" assert loader_type not in SUPPORTED_LOADERS with pytest.raises(ValueError, match="Invalid loader"): get_loader(loader_type, url="db-url") def test_get_loader(swh_loader_config): """Instantiating a supported loader should be ok """ loader_input = { "archive": {"url": "some-url", "artifacts": []}, "debian": {"url": "some-url", "date": "something", "packages": [],}, "npm": {"url": "https://www.npmjs.com/package/onepackage",}, "pypi": {"url": "some-url",}, } for loader_type, kwargs in loader_input.items(): kwargs["storage"] = swh_loader_config["storage"] loader = get_loader(loader_type, **kwargs) assert isinstance(loader, PackageLoader) def _write_usage(command, args, max_width=80): hf = HelpFormatter(width=max_width) hf.write_usage(command, args) return hf.getvalue()[:-1] def test_run_help(swh_config): """Usage message should contain list of available loaders """ runner = CliRunner() result = runner.invoke(loader_cli, ["run", "-h"]) assert result.exit_code == 0 - usage_prefix = _write_usage( - "loader run", f"[OPTIONS] [{'|'.join(SUPPORTED_LOADERS)}]\n" - ) - assert result.output.startswith(usage_prefix) + + # Syntax depends on dependencies' versions + supported_loaders = "|".join(SUPPORTED_LOADERS) + usage_prefix = _write_usage("loader run", "[OPTIONS] [%s]\n" % supported_loaders) + usage_prefix2 = _write_usage("loader run", "[OPTIONS] {%s}\n" % supported_loaders) + assert result.output.startswith((usage_prefix, usage_prefix2)) def test_run_with_configuration_failure(tmp_path): """Triggering a load should fail since configuration is incomplete """ runner = CliRunner() conf_path = os.path.join(str(tmp_path), "cli.yml") with open(conf_path, "w") as f: f.write(yaml.dump({})) with pytest.raises(ValueError, match="Missing storage"): runner.invoke( loader_cli, ["-C", conf_path, "run", "pypi", "url=https://some-url",], catch_exceptions=False, ) def test_run_pypi(mocker, swh_config): """Triggering a load should be ok """ mock_loader = mocker.patch("swh.loader.package.pypi.loader.PyPILoader.load") runner = CliRunner() result = runner.invoke( loader_cli, ["-C", swh_config, "run", "pypi", "url=https://some-url",] ) assert result.exit_code == 0 mock_loader.assert_called_once_with() def test_run_with_visit_date(mocker, swh_config): """iso visit_date parameter should be parsed as datetime """ mock_loader = mocker.patch("swh.loader.cli.get_loader") runner = CliRunner() input_date = "2016-05-03 15:16:32+00" result = runner.invoke( loader_cli, ["run", "npm", "https://some-url", f"visit_date='{input_date}'"] ) assert result.exit_code == 0 expected_parsed_date = datetime.datetime( 2016, 5, 3, 15, 16, 32, tzinfo=datetime.timezone.utc ) mock_loader.assert_called_once_with( "npm", storage={"cls": "memory"}, url="https://some-url", visit_date=expected_parsed_date, ) def test_list_help(mocker, swh_config): """Usage message should contain list of available loaders """ runner = CliRunner() result = runner.invoke(loader_cli, ["list", "--help"]) assert result.exit_code == 0 usage_prefix = _write_usage( "loader list", f"[OPTIONS] [[{'|'.join(['all'] + SUPPORTED_LOADERS)}]]" ) expected_help_msg = f"""{usage_prefix} List supported loaders and optionally their arguments Options: -h, --help Show this message and exit. """ assert result.output.startswith(expected_help_msg) def test_list_help_npm(mocker, swh_config): """Triggering a load should be ok """ runner = CliRunner() result = runner.invoke(loader_cli, ["list", "npm"]) assert result.exit_code == 0 expected_help_msg = """ Loader: Load npm origin's artifact releases into swh archive. """ assert result.output.startswith(expected_help_msg[1:])