diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d0fb19c..839c429 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,42 +1,43 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.1.0 hooks: - id: trailing-whitespace - id: check-json - id: check-yaml - repo: https://gitlab.com/pycqa/flake8 rev: 4.0.1 hooks: - id: flake8 additional_dependencies: [flake8-bugbear==22.3.23] - repo: https://github.com/codespell-project/codespell rev: v2.1.0 hooks: - id: codespell name: Check source code spelling exclude: ^(swh/loader/package/.*[/]+tests/data/.*)$ + args: [-L crate] entry: codespell --ignore-words-list=iff stages: [commit] - repo: local hooks: - id: mypy name: mypy entry: mypy args: [swh] pass_filenames: false language: system types: [python] - repo: https://github.com/PyCQA/isort rev: 5.10.1 hooks: - id: isort - repo: https://github.com/python/black rev: 22.3.0 hooks: - id: black diff --git a/PKG-INFO b/PKG-INFO index 188ecbb..bc910d0 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,56 +1,56 @@ Metadata-Version: 2.1 Name: swh.loader.core -Version: 3.2.0 +Version: 3.3.0 Summary: Software Heritage Base Loader Home-page: https://forge.softwareheritage.org/diffusion/DLDBASE Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-loader-core Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-loader-core/ Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Requires-Python: >=3.7 Description-Content-Type: text/markdown Provides-Extra: testing License-File: LICENSE License-File: AUTHORS Software Heritage - Loader foundations ====================================== The Software Heritage Loader Core is a low-level loading utilities and helpers used by :term:`loaders `. The main entry points are classes: - :class:`swh.loader.core.loader.BaseLoader` for loaders (e.g. svn) - :class:`swh.loader.core.loader.DVCSLoader` for DVCS loaders (e.g. hg, git, ...) - :class:`swh.loader.package.loader.PackageLoader` for Package loaders (e.g. PyPI, Npm, ...) Package loaders --------------- This package also implements many package loaders directly, out of convenience, as they usually are quite similar and each fits in a single file. They all roughly follow these steps, explained in the :py:meth:`swh.loader.package.loader.PackageLoader.load` documentation. See the :ref:`package-loader-tutorial` for details. VCS loaders ----------- Unlike package loaders, VCS loaders remain in separate packages, as they often need more advanced conversions and very VCS-specific operations. This usually involves getting the branches of a repository and recursively loading revisions in the history (and directory trees in these revisions), until a known revision is found diff --git a/docs/package-loader-specifications.rst b/docs/package-loader-specifications.rst index b53577b..caad4ac 100644 --- a/docs/package-loader-specifications.rst +++ b/docs/package-loader-specifications.rst @@ -1,124 +1,133 @@ .. _package-loader-specifications: Package loader specifications ============================= Release fields -------------- Here is an overview of the fields (+ internal version name + branch name) used by each package loader, after D6616: .. list-table:: Fields used by each package loader :header-rows: 1 * - Loader - internal version - branch name - name - message - synthetic - author - date - Notes * - archive - passed as arg - ``release_name(​version)`` - =version - "Synthetic release for archive at {p_info.url}\n" - true - "" - passed as arg - * - cran - ``metadata.get(​"Version", passed as arg)`` - ``release_name(​version)`` - =version - standard message - true - ``metadata.get(​"Maintainer", "")`` - ``metadata.get(​"Date")`` - metadata is intrinsic + * - crates + - passed as arg + - ``release_name(​version, filename)`` + - =version + - Synthetic release for Crate source package {p_info.name} version {p_info.version} {description} + - true + - from intrinsic metadata + - from extrinsic metadata + - ``i_metadata`` for intrinsic metadata, ``e_metadata`` for extrinsic metadata * - debian - =``version`` - ``release_name(​version)`` - =``i_version`` - standard message (using ``i_version``) - true - ``metadata​.changelog​.person`` - ``metadata​.changelog​.date`` - metadata is intrinsic. Old revisions have ``dsc`` as type ``i_version`` is the intrinsic version (eg. ``0.7.2-3``) while ``version`` contains the debian suite name (eg. ``stretch/contrib/0.7.2-3``) and is passed as arg * - deposit - HEAD - only HEAD - HEAD - "{client}: Deposit {id} in collection {collection}\n" - true - original author - ```` from SWORD XML - revisions had parents * - maven-loader - passed as arg - HEAD - ``release_name(version)`` - "Synthetic release for archive at {p_info.url}\n" - true - "" - passed as arg - Only one artefact per url (jar/zip src) * - nixguix - URL - URL - URL - None - true - "" - None - it's the URL of the artifact referenced by the derivation * - npm - ``metadata​["version"]`` - ``release_name(​version)`` - =version - standard message - true - from int metadata or "" - from ext metadata or None - * - opam - as given by opam - "{opam_package}​.{version}" - =version - standard message - true - from metadata - None - "{self.opam_package}​.{version}" matches the version names used by opam's backend. metadata is extrinsic * - pypi - ``metadata​["version"]`` - ``release_name(​version)`` or ``release_name(​version, filename)`` - =version - ``metadata[​'comment_text']}`` or standard message - true - from int metadata or "" - from ext metadata or None - metadata is intrinsic using this function:: def release_name(version: str, filename: Optional[str] = None) -> str: if filename: return "releases/%s/%s" % (version, filename) return "releases/%s" % version and "standard message" being:: msg = ( f"Synthetic release for {PACKAGE_MANAGER} source package {name} " f"version {version}\n" ) The ``target_type`` field is always ``dir``, and the target the id of a directory loaded by unpacking a tarball/zip file/... diff --git a/requirements.txt b/requirements.txt index 3be0d4a..644a170 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,11 +1,12 @@ # Add here external Python modules dependencies, one per line. Module names # should match https://pypi.python.org/pypi names. For the full spec or # dependency lines, see https://pip.readthedocs.org/en/1.1/requirements.html psutil requests iso8601 pkginfo python-debian python-dateutil typing-extensions +toml diff --git a/setup.py b/setup.py index 81f0481..a4f4d95 100755 --- a/setup.py +++ b/setup.py @@ -1,81 +1,82 @@ #!/usr/bin/env python3 # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from io import open from os import path from setuptools import find_packages, setup here = path.abspath(path.dirname(__file__)) # Get the long description from the README file with open(path.join(here, "README.rst"), encoding="utf-8") as f: long_description = f.read() def parse_requirements(name=None): if name: reqf = "requirements-%s.txt" % name else: reqf = "requirements.txt" requirements = [] if not path.exists(reqf): return requirements with open(reqf) as f: for line in f.readlines(): line = line.strip() if not line or line.startswith("#"): continue requirements.append(line) return requirements setup( name="swh.loader.core", description="Software Heritage Base Loader", long_description=long_description, long_description_content_type="text/markdown", python_requires=">=3.7", author="Software Heritage developers", author_email="swh-devel@inria.fr", url="https://forge.softwareheritage.org/diffusion/DLDBASE", packages=find_packages(), # packages's modules scripts=[], # scripts to package install_requires=parse_requirements() + parse_requirements("swh"), setup_requires=["setuptools-scm"], use_scm_version=True, extras_require={"testing": parse_requirements("test")}, include_package_data=True, entry_points=""" [swh.cli.subcommands] loader=swh.loader.cli [swh.workers] loader.archive=swh.loader.package.archive:register loader.cran=swh.loader.package.cran:register + loader.crates=swh.loader.package.crates:register loader.debian=swh.loader.package.debian:register loader.deposit=swh.loader.package.deposit:register loader.nixguix=swh.loader.package.nixguix:register loader.npm=swh.loader.package.npm:register loader.opam=swh.loader.package.opam:register loader.pypi=swh.loader.package.pypi:register loader.maven=swh.loader.package.maven:register """, classifiers=[ "Programming Language :: Python :: 3", "Intended Audience :: Developers", "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", "Operating System :: OS Independent", "Development Status :: 5 - Production/Stable", ], project_urls={ "Bug Reports": "https://forge.softwareheritage.org/maniphest", "Funding": "https://www.softwareheritage.org/donate", "Source": "https://forge.softwareheritage.org/source/swh-loader-core", "Documentation": "https://docs.softwareheritage.org/devel/swh-loader-core/", }, ) diff --git a/swh.loader.core.egg-info/PKG-INFO b/swh.loader.core.egg-info/PKG-INFO index 188ecbb..bc910d0 100644 --- a/swh.loader.core.egg-info/PKG-INFO +++ b/swh.loader.core.egg-info/PKG-INFO @@ -1,56 +1,56 @@ Metadata-Version: 2.1 Name: swh.loader.core -Version: 3.2.0 +Version: 3.3.0 Summary: Software Heritage Base Loader Home-page: https://forge.softwareheritage.org/diffusion/DLDBASE Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-loader-core Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-loader-core/ Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Requires-Python: >=3.7 Description-Content-Type: text/markdown Provides-Extra: testing License-File: LICENSE License-File: AUTHORS Software Heritage - Loader foundations ====================================== The Software Heritage Loader Core is a low-level loading utilities and helpers used by :term:`loaders `. The main entry points are classes: - :class:`swh.loader.core.loader.BaseLoader` for loaders (e.g. svn) - :class:`swh.loader.core.loader.DVCSLoader` for DVCS loaders (e.g. hg, git, ...) - :class:`swh.loader.package.loader.PackageLoader` for Package loaders (e.g. PyPI, Npm, ...) Package loaders --------------- This package also implements many package loaders directly, out of convenience, as they usually are quite similar and each fits in a single file. They all roughly follow these steps, explained in the :py:meth:`swh.loader.package.loader.PackageLoader.load` documentation. See the :ref:`package-loader-tutorial` for details. VCS loaders ----------- Unlike package loaders, VCS loaders remain in separate packages, as they often need more advanced conversions and very VCS-specific operations. This usually involves getting the branches of a repository and recursively loading revisions in the history (and directory trees in these revisions), until a known revision is found diff --git a/swh.loader.core.egg-info/SOURCES.txt b/swh.loader.core.egg-info/SOURCES.txt index 9cae0d4..c2042ba 100644 --- a/swh.loader.core.egg-info/SOURCES.txt +++ b/swh.loader.core.egg-info/SOURCES.txt @@ -1,220 +1,238 @@ .git-blame-ignore-revs .gitignore .pre-commit-config.yaml AUTHORS CODE_OF_CONDUCT.md CONTRIBUTORS LICENSE MANIFEST.in Makefile README.rst conftest.py mypy.ini pyproject.toml pytest.ini requirements-swh.txt requirements-test.txt requirements.txt setup.cfg setup.py tox.ini docs/.gitignore docs/Makefile docs/README.rst docs/cli.rst docs/conf.py docs/index.rst docs/package-loader-specifications.rst docs/package-loader-tutorial.rst docs/vcs-loader-overview.rst docs/_static/.placeholder docs/_templates/.placeholder swh/__init__.py swh.loader.core.egg-info/PKG-INFO swh.loader.core.egg-info/SOURCES.txt swh.loader.core.egg-info/dependency_links.txt swh.loader.core.egg-info/entry_points.txt swh.loader.core.egg-info/requires.txt swh.loader.core.egg-info/top_level.txt swh/loader/__init__.py swh/loader/cli.py swh/loader/exception.py swh/loader/pytest_plugin.py swh/loader/core/__init__.py swh/loader/core/converters.py swh/loader/core/loader.py swh/loader/core/metadata_fetchers.py swh/loader/core/py.typed swh/loader/core/utils.py swh/loader/core/tests/__init__.py swh/loader/core/tests/test_converters.py swh/loader/core/tests/test_loader.py swh/loader/core/tests/test_utils.py swh/loader/package/__init__.py swh/loader/package/loader.py swh/loader/package/py.typed swh/loader/package/utils.py swh/loader/package/archive/__init__.py swh/loader/package/archive/loader.py swh/loader/package/archive/tasks.py swh/loader/package/archive/tests/__init__.py swh/loader/package/archive/tests/test_archive.py swh/loader/package/archive/tests/test_tasks.py swh/loader/package/archive/tests/data/not_gzipped_tarball.tar.gz swh/loader/package/archive/tests/data/https_ftp.gnu.org/gnu_8sync_8sync-0.1.0.tar.gz swh/loader/package/archive/tests/data/https_ftp.gnu.org/gnu_8sync_8sync-0.1.0.tar.gz_visit1 swh/loader/package/archive/tests/data/https_ftp.gnu.org/gnu_8sync_8sync-0.1.0.tar.gz_visit2 swh/loader/package/archive/tests/data/https_ftp.gnu.org/gnu_8sync_8sync-0.2.0.tar.gz swh/loader/package/cran/__init__.py swh/loader/package/cran/loader.py swh/loader/package/cran/tasks.py swh/loader/package/cran/tests/__init__.py swh/loader/package/cran/tests/test_cran.py swh/loader/package/cran/tests/test_tasks.py swh/loader/package/cran/tests/data/description/KnownBR swh/loader/package/cran/tests/data/description/acepack swh/loader/package/cran/tests/data/https_cran.r-project.org/src_contrib_1.4.0_Recommended_KernSmooth_2.22-6.tar.gz +swh/loader/package/crates/__init__.py +swh/loader/package/crates/loader.py +swh/loader/package/crates/tasks.py +swh/loader/package/crates/tests/__init__.py +swh/loader/package/crates/tests/test_crates.py +swh/loader/package/crates/tests/test_tasks.py +swh/loader/package/crates/tests/data/fake_crates.sh +swh/loader/package/crates/tests/data/https_crates.io/api_v1_crates_hg-core +swh/loader/package/crates/tests/data/https_crates.io/api_v1_crates_micro-timer +swh/loader/package/crates/tests/data/https_static.crates.io/crates_hg-core_hg-core-0.0.1.crate +swh/loader/package/crates/tests/data/https_static.crates.io/crates_micro-timer_micro-timer-0.1.0.crate +swh/loader/package/crates/tests/data/https_static.crates.io/crates_micro-timer_micro-timer-0.1.1.crate +swh/loader/package/crates/tests/data/https_static.crates.io/crates_micro-timer_micro-timer-0.1.2.crate +swh/loader/package/crates/tests/data/https_static.crates.io/crates_micro-timer_micro-timer-0.2.0.crate +swh/loader/package/crates/tests/data/https_static.crates.io/crates_micro-timer_micro-timer-0.2.1.crate +swh/loader/package/crates/tests/data/https_static.crates.io/crates_micro-timer_micro-timer-0.3.0.crate +swh/loader/package/crates/tests/data/https_static.crates.io/crates_micro-timer_micro-timer-0.3.1.crate +swh/loader/package/crates/tests/data/https_static.crates.io/crates_micro-timer_micro-timer-0.4.0.crate swh/loader/package/debian/__init__.py swh/loader/package/debian/loader.py swh/loader/package/debian/tasks.py swh/loader/package/debian/tests/__init__.py swh/loader/package/debian/tests/test_debian.py swh/loader/package/debian/tests/test_tasks.py swh/loader/package/debian/tests/data/http_deb.debian.org/debian_pool_contrib_c_cicero_cicero_0.7.2-3.diff.gz swh/loader/package/debian/tests/data/http_deb.debian.org/debian_pool_contrib_c_cicero_cicero_0.7.2-3.dsc swh/loader/package/debian/tests/data/http_deb.debian.org/debian_pool_contrib_c_cicero_cicero_0.7.2-4.diff.gz swh/loader/package/debian/tests/data/http_deb.debian.org/debian_pool_contrib_c_cicero_cicero_0.7.2-4.dsc swh/loader/package/debian/tests/data/http_deb.debian.org/debian_pool_contrib_c_cicero_cicero_0.7.2.orig.tar.gz swh/loader/package/debian/tests/data/http_deb.debian.org/onefile.txt swh/loader/package/deposit/__init__.py swh/loader/package/deposit/loader.py swh/loader/package/deposit/tasks.py swh/loader/package/deposit/tests/__init__.py swh/loader/package/deposit/tests/conftest.py swh/loader/package/deposit/tests/test_deposit.py swh/loader/package/deposit/tests/test_tasks.py swh/loader/package/deposit/tests/data/https_deposit.softwareheritage.org/1_private_666_meta swh/loader/package/deposit/tests/data/https_deposit.softwareheritage.org/1_private_666_raw swh/loader/package/deposit/tests/data/https_deposit.softwareheritage.org/1_private_777_meta swh/loader/package/deposit/tests/data/https_deposit.softwareheritage.org/1_private_777_raw swh/loader/package/deposit/tests/data/https_deposit.softwareheritage.org/1_private_888_meta swh/loader/package/deposit/tests/data/https_deposit.softwareheritage.org/1_private_888_raw swh/loader/package/deposit/tests/data/https_deposit.softwareheritage.org/1_private_999_meta swh/loader/package/deposit/tests/data/https_deposit.softwareheritage.org/1_private_999_raw swh/loader/package/deposit/tests/data/https_deposit.softwareheritage.org/hello-2.10.zip swh/loader/package/deposit/tests/data/https_deposit.softwareheritage.org/hello-2.12.tar.gz swh/loader/package/deposit/tests/data/https_deposit.softwareheritage.org/hello_2.10.json swh/loader/package/deposit/tests/data/https_deposit.softwareheritage.org/hello_2.11.json swh/loader/package/deposit/tests/data/https_deposit.softwareheritage.org/hello_2.12.json swh/loader/package/deposit/tests/data/https_deposit.softwareheritage.org/hello_2.13.json swh/loader/package/maven/__init__.py swh/loader/package/maven/loader.py swh/loader/package/maven/tasks.py swh/loader/package/maven/tests/__init__.py swh/loader/package/maven/tests/test_maven.py swh/loader/package/maven/tests/test_tasks.py swh/loader/package/maven/tests/data/https_maven.org/sprova4j-0.1.0-sources.jar swh/loader/package/maven/tests/data/https_maven.org/sprova4j-0.1.0.pom swh/loader/package/maven/tests/data/https_maven.org/sprova4j-0.1.1-sources.jar swh/loader/package/maven/tests/data/https_maven.org/sprova4j-0.1.1.pom swh/loader/package/nixguix/__init__.py swh/loader/package/nixguix/loader.py swh/loader/package/nixguix/tasks.py swh/loader/package/nixguix/tests/__init__.py swh/loader/package/nixguix/tests/conftest.py swh/loader/package/nixguix/tests/test_nixguix.py swh/loader/package/nixguix/tests/test_tasks.py swh/loader/package/nixguix/tests/data/https_example.com/file.txt swh/loader/package/nixguix/tests/data/https_fail.com/truncated-archive.tgz swh/loader/package/nixguix/tests/data/https_ftp.gnu.org/gnu_8sync_8sync-0.1.0.tar.gz swh/loader/package/nixguix/tests/data/https_ftp.gnu.org/gnu_8sync_8sync-0.1.0.tar.gz_visit1 swh/loader/package/nixguix/tests/data/https_ftp.gnu.org/gnu_8sync_8sync-0.1.0.tar.gz_visit2 swh/loader/package/nixguix/tests/data/https_ftp.gnu.org/gnu_8sync_8sync-0.2.0.tar.gz swh/loader/package/nixguix/tests/data/https_github.com/owner-1_repository-1_revision-1.tgz swh/loader/package/nixguix/tests/data/https_github.com/owner-2_repository-1_revision-1.tgz swh/loader/package/nixguix/tests/data/https_github.com/owner-3_repository-1_revision-1.tgz swh/loader/package/nixguix/tests/data/https_nix-community.github.io/nixpkgs-swh_sources-EOFError.json swh/loader/package/nixguix/tests/data/https_nix-community.github.io/nixpkgs-swh_sources.json swh/loader/package/nixguix/tests/data/https_nix-community.github.io/nixpkgs-swh_sources.json_visit1 swh/loader/package/nixguix/tests/data/https_nix-community.github.io/nixpkgs-swh_sources_special.json swh/loader/package/nixguix/tests/data/https_nix-community.github.io/nixpkgs-swh_sources_special.json_visit1 swh/loader/package/npm/__init__.py swh/loader/package/npm/loader.py swh/loader/package/npm/tasks.py swh/loader/package/npm/tests/__init__.py swh/loader/package/npm/tests/test_npm.py swh/loader/package/npm/tests/test_tasks.py swh/loader/package/npm/tests/data/https_registry.npmjs.org/@aller_shared_-_shared-0.1.0.tgz swh/loader/package/npm/tests/data/https_registry.npmjs.org/@aller_shared_-_shared-0.1.1-alpha.14.tgz swh/loader/package/npm/tests/data/https_registry.npmjs.org/jammit-express_-_jammit-express-0.0.1.tgz swh/loader/package/npm/tests/data/https_registry.npmjs.org/nativescript-telerik-analytics_-_nativescript-telerik-analytics-1.0.0.tgz swh/loader/package/npm/tests/data/https_registry.npmjs.org/org_-_org-0.0.2.tgz swh/loader/package/npm/tests/data/https_registry.npmjs.org/org_-_org-0.0.3-beta.tgz swh/loader/package/npm/tests/data/https_registry.npmjs.org/org_-_org-0.0.3.tgz swh/loader/package/npm/tests/data/https_registry.npmjs.org/org_-_org-0.0.4.tgz swh/loader/package/npm/tests/data/https_registry.npmjs.org/org_-_org-0.0.5.tgz swh/loader/package/npm/tests/data/https_registry.npmjs.org/org_-_org-0.1.0.tgz swh/loader/package/npm/tests/data/https_registry.npmjs.org/org_-_org-0.2.0.tgz swh/loader/package/npm/tests/data/https_replicate.npmjs.com/@aller_shared swh/loader/package/npm/tests/data/https_replicate.npmjs.com/catify swh/loader/package/npm/tests/data/https_replicate.npmjs.com/jammit-express swh/loader/package/npm/tests/data/https_replicate.npmjs.com/jammit-no-time swh/loader/package/npm/tests/data/https_replicate.npmjs.com/nativescript-telerik-analytics swh/loader/package/npm/tests/data/https_replicate.npmjs.com/org swh/loader/package/npm/tests/data/https_replicate.npmjs.com/org_version_mismatch swh/loader/package/npm/tests/data/https_replicate.npmjs.com/org_visit1 swh/loader/package/opam/__init__.py swh/loader/package/opam/loader.py swh/loader/package/opam/tasks.py swh/loader/package/opam/tests/__init__.py swh/loader/package/opam/tests/test_opam.py swh/loader/package/opam/tests/test_tasks.py swh/loader/package/opam/tests/data/fake_opam_repo/_repo swh/loader/package/opam/tests/data/fake_opam_repo/version swh/loader/package/opam/tests/data/fake_opam_repo/repo/loadertest/lock swh/loader/package/opam/tests/data/fake_opam_repo/repo/loadertest/repos-config swh/loader/package/opam/tests/data/fake_opam_repo/repo/loadertest/packages/agrid/agrid.0.1/opam swh/loader/package/opam/tests/data/fake_opam_repo/repo/loadertest/packages/directories/directories.0.1/opam swh/loader/package/opam/tests/data/fake_opam_repo/repo/loadertest/packages/directories/directories.0.2/opam swh/loader/package/opam/tests/data/fake_opam_repo/repo/loadertest/packages/directories/directories.0.3/opam swh/loader/package/opam/tests/data/fake_opam_repo/repo/loadertest/packages/ocb/ocb.0.1/opam swh/loader/package/opam/tests/data/https_github.com/OCamlPro_agrid_archive_0.1.tar.gz swh/loader/package/opam/tests/data/https_github.com/OCamlPro_directories_archive_0.1.tar.gz swh/loader/package/opam/tests/data/https_github.com/OCamlPro_directories_archive_0.2.tar.gz swh/loader/package/opam/tests/data/https_github.com/OCamlPro_directories_archive_0.3.tar.gz swh/loader/package/opam/tests/data/https_github.com/OCamlPro_ocb_archive_0.1.tar.gz swh/loader/package/pypi/__init__.py swh/loader/package/pypi/loader.py swh/loader/package/pypi/tasks.py swh/loader/package/pypi/tests/__init__.py swh/loader/package/pypi/tests/test_pypi.py swh/loader/package/pypi/tests/test_tasks.py swh/loader/package/pypi/tests/data/https_files.pythonhosted.org/0805nexter-1.1.0.tar.gz swh/loader/package/pypi/tests/data/https_files.pythonhosted.org/0805nexter-1.1.0.zip swh/loader/package/pypi/tests/data/https_files.pythonhosted.org/0805nexter-1.2.0.zip swh/loader/package/pypi/tests/data/https_files.pythonhosted.org/0805nexter-1.3.0.zip swh/loader/package/pypi/tests/data/https_files.pythonhosted.org/0805nexter-1.4.0.zip swh/loader/package/pypi/tests/data/https_files.pythonhosted.org/nexter-1.1.0.tar.gz swh/loader/package/pypi/tests/data/https_files.pythonhosted.org/nexter-1.1.0.zip swh/loader/package/pypi/tests/data/https_files.pythonhosted.org/packages_70_97_c49fb8ec24a7aaab54c3dbfbb5a6ca1431419d9ee0f6c363d9ad01d2b8b1_0805nexter-1.3.0.zip swh/loader/package/pypi/tests/data/https_files.pythonhosted.org/packages_86_10_c9555ec63106153aaaad753a281ff47f4ac79e980ff7f5d740d6649cd56a_upymenu-0.0.1.tar.gz swh/loader/package/pypi/tests/data/https_files.pythonhosted.org/packages_c4_a0_4562cda161dc4ecbbe9e2a11eb365400c0461845c5be70d73869786809c4_0805nexter-1.2.0.zip swh/loader/package/pypi/tests/data/https_files.pythonhosted.org/packages_c4_a0_4562cda161dc4ecbbe9e2a11eb365400c0461845c5be70d73869786809c4_0805nexter-1.2.0.zip_visit1 swh/loader/package/pypi/tests/data/https_files.pythonhosted.org/packages_ec_65_c0116953c9a3f47de89e71964d6c7b0c783b01f29fa3390584dbf3046b4d_0805nexter-1.1.0.zip swh/loader/package/pypi/tests/data/https_files.pythonhosted.org/packages_ec_65_c0116953c9a3f47de89e71964d6c7b0c783b01f29fa3390584dbf3046b4d_0805nexter-1.1.0.zip_visit1 swh/loader/package/pypi/tests/data/https_pypi.org/pypi_0805nexter_json swh/loader/package/pypi/tests/data/https_pypi.org/pypi_0805nexter_json_visit1 swh/loader/package/pypi/tests/data/https_pypi.org/pypi_nexter_json swh/loader/package/pypi/tests/data/https_pypi.org/pypi_upymenu_json swh/loader/package/tests/__init__.py swh/loader/package/tests/common.py swh/loader/package/tests/test_conftest.py swh/loader/package/tests/test_loader.py swh/loader/package/tests/test_loader_metadata.py swh/loader/package/tests/test_utils.py swh/loader/tests/__init__.py swh/loader/tests/conftest.py swh/loader/tests/py.typed swh/loader/tests/test_cli.py swh/loader/tests/test_init.py swh/loader/tests/data/0805nexter-1.1.0.tar.gz \ No newline at end of file diff --git a/swh.loader.core.egg-info/entry_points.txt b/swh.loader.core.egg-info/entry_points.txt index 442b094..2c2ca76 100644 --- a/swh.loader.core.egg-info/entry_points.txt +++ b/swh.loader.core.egg-info/entry_points.txt @@ -1,13 +1,14 @@ [swh.cli.subcommands] loader = swh.loader.cli [swh.workers] loader.archive = swh.loader.package.archive:register loader.cran = swh.loader.package.cran:register +loader.crates = swh.loader.package.crates:register loader.debian = swh.loader.package.debian:register loader.deposit = swh.loader.package.deposit:register loader.maven = swh.loader.package.maven:register loader.nixguix = swh.loader.package.nixguix:register loader.npm = swh.loader.package.npm:register loader.opam = swh.loader.package.opam:register loader.pypi = swh.loader.package.pypi:register diff --git a/swh.loader.core.egg-info/requires.txt b/swh.loader.core.egg-info/requires.txt index 1768888..3aebd1f 100644 --- a/swh.loader.core.egg-info/requires.txt +++ b/swh.loader.core.egg-info/requires.txt @@ -1,24 +1,25 @@ psutil requests iso8601 pkginfo python-debian python-dateutil typing-extensions +toml swh.core>=0.3 swh.model>=4.4.0 swh.objstorage>=0.2.2 swh.scheduler>=0.4.0 swh.storage>=0.29.0 [testing] pytest pytest-mock requests_mock swh-core[testing] swh-scheduler[testing]>=0.5.0 swh-storage[testing]>=0.10.6 types-click types-python-dateutil types-pyyaml types-requests diff --git a/swh/loader/package/archive/tasks.py b/swh/loader/package/archive/tasks.py index 727cffd..31a7b58 100644 --- a/swh/loader/package/archive/tasks.py +++ b/swh/loader/package/archive/tasks.py @@ -1,17 +1,15 @@ -# Copyright (C) 2019-2021 The Software Heritage developers +# Copyright (C) 2019-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from celery import shared_task from swh.loader.package.archive.loader import ArchiveLoader @shared_task(name=__name__ + ".LoadArchive") -def load_archive_files(*, url=None, artifacts=None, snapshot_append=False): +def load_archive_files(**kwargs): """Load archive's artifacts (e.g gnu, etc...)""" - loader = ArchiveLoader.from_configfile( - url=url, artifacts=artifacts, snapshot_append=snapshot_append - ) + loader = ArchiveLoader.from_configfile(**kwargs) return loader.load() diff --git a/swh/loader/package/archive/tests/test_tasks.py b/swh/loader/package/archive/tests/test_tasks.py index 74e1967..0d2448f 100644 --- a/swh/loader/package/archive/tests/test_tasks.py +++ b/swh/loader/package/archive/tests/test_tasks.py @@ -1,38 +1,92 @@ -# Copyright (C) 2019-2021 The Software Heritage developers +# Copyright (C) 2019-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import uuid + +import pytest + +from swh.scheduler.model import ListedOrigin, Lister +from swh.scheduler.utils import create_origin_task_dict + + +@pytest.fixture(autouse=True) +def celery_worker_and_swh_config(swh_scheduler_celery_worker, swh_config): + pass + + +@pytest.fixture +def archive_lister(): + return Lister(name="archive-lister", instance_name="example", id=uuid.uuid4()) + + +@pytest.fixture +def archive_listed_origin(archive_lister): + return ListedOrigin( + lister_id=archive_lister.id, + url="https://example.org/archives", + visit_type="tar", + extra_loader_arguments={ + "artifacts": [], + "snapshot_append": True, + }, + ) + def test_tasks_archive_loader( - mocker, swh_scheduler_celery_app, swh_scheduler_celery_worker, swh_config + mocker, + swh_scheduler_celery_app, ): mock_load = mocker.patch("swh.loader.package.archive.loader.ArchiveLoader.load") mock_load.return_value = {"status": "eventful"} res = swh_scheduler_celery_app.send_task( "swh.loader.package.archive.tasks.LoadArchive", kwargs=dict(url="https://gnu.org/", artifacts=[]), ) assert res res.wait() assert res.successful() assert mock_load.called assert res.result == {"status": "eventful"} def test_tasks_archive_loader_snapshot_append( - mocker, swh_scheduler_celery_app, swh_scheduler_celery_worker, swh_config + mocker, + swh_scheduler_celery_app, ): mock_load = mocker.patch("swh.loader.package.archive.loader.ArchiveLoader.load") mock_load.return_value = {"status": "eventful"} res = swh_scheduler_celery_app.send_task( "swh.loader.package.archive.tasks.LoadArchive", kwargs=dict(url="https://gnu.org/", artifacts=[], snapshot_append=True), ) assert res res.wait() assert res.successful() assert mock_load.called assert res.result == {"status": "eventful"} + + +def test_tasks_archive_loader_for_listed_origin( + mocker, + swh_scheduler_celery_app, + archive_lister, + archive_listed_origin, +): + mock_load = mocker.patch("swh.loader.package.archive.loader.ArchiveLoader.load") + mock_load.return_value = {"status": "eventful"} + + task_dict = create_origin_task_dict(archive_listed_origin, archive_lister) + + res = swh_scheduler_celery_app.send_task( + "swh.loader.package.archive.tasks.LoadArchive", + kwargs=task_dict["arguments"]["kwargs"], + ) + assert res + res.wait() + assert res.successful() + assert mock_load.called + assert res.result == {"status": "eventful"} diff --git a/swh/loader/package/cran/tasks.py b/swh/loader/package/cran/tasks.py index ac5c53a..8fa82f8 100644 --- a/swh/loader/package/cran/tasks.py +++ b/swh/loader/package/cran/tasks.py @@ -1,14 +1,14 @@ -# Copyright (C) 2019-2021 The Software Heritage developers +# Copyright (C) 2019-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from celery import shared_task from swh.loader.package.cran.loader import CRANLoader @shared_task(name=__name__ + ".LoadCRAN") -def load_cran(url=None, artifacts=[]): +def load_cran(**kwargs): """Load CRAN's artifacts""" - return CRANLoader.from_configfile(url=url, artifacts=artifacts).load() + return CRANLoader.from_configfile(**kwargs).load() diff --git a/swh/loader/package/cran/tests/test_tasks.py b/swh/loader/package/cran/tests/test_tasks.py index ae8a604..e2b5de2 100644 --- a/swh/loader/package/cran/tests/test_tasks.py +++ b/swh/loader/package/cran/tests/test_tasks.py @@ -1,24 +1,76 @@ -# Copyright (C) 2019-2021 The Software Heritage developers +# Copyright (C) 2019-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import uuid + +import pytest + +from swh.scheduler.model import ListedOrigin, Lister +from swh.scheduler.utils import create_origin_task_dict + + +@pytest.fixture(autouse=True) +def celery_worker_and_swh_config(swh_scheduler_celery_worker, swh_config): + pass + + +@pytest.fixture +def cran_lister(): + return Lister(name="cran-lister", instance_name="example", id=uuid.uuid4()) + + +@pytest.fixture +def cran_listed_origin(cran_lister): + return ListedOrigin( + lister_id=cran_lister.id, + url="https://cran.example.org/project", + visit_type="cran", + extra_loader_arguments={ + "artifacts": [{"version": "1.2.3", "url": "artifact-url"}], + }, + ) + def test_tasks_cran_loader( - mocker, swh_scheduler_celery_app, swh_scheduler_celery_worker, swh_config + mocker, + swh_scheduler_celery_app, ): mock_load = mocker.patch("swh.loader.package.cran.loader.CRANLoader.load") mock_load.return_value = {"status": "eventful"} res = swh_scheduler_celery_app.send_task( "swh.loader.package.cran.tasks.LoadCRAN", kwargs=dict( url="some-url", artifacts=[{"version": "1.2.3", "url": "artifact-url"}], ), ) assert res res.wait() assert res.successful() assert mock_load.called assert res.result == {"status": "eventful"} + + +def test_tasks_cran_loader_for_listed_origin( + mocker, + swh_scheduler_celery_app, + cran_lister, + cran_listed_origin, +): + mock_load = mocker.patch("swh.loader.package.cran.loader.CRANLoader.load") + mock_load.return_value = {"status": "eventful"} + + task_dict = create_origin_task_dict(cran_listed_origin, cran_lister) + + res = swh_scheduler_celery_app.send_task( + "swh.loader.package.cran.tasks.LoadCRAN", + kwargs=task_dict["arguments"]["kwargs"], + ) + assert res + res.wait() + assert res.successful() + assert mock_load.called + assert res.result == {"status": "eventful"} diff --git a/swh/loader/package/crates/__init__.py b/swh/loader/package/crates/__init__.py new file mode 100644 index 0000000..ec7214c --- /dev/null +++ b/swh/loader/package/crates/__init__.py @@ -0,0 +1,17 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + + +from typing import Any, Mapping + + +def register() -> Mapping[str, Any]: + """Register the current worker module's definition""" + from .loader import CratesLoader + + return { + "task_modules": [f"{__name__}.tasks"], + "loader": CratesLoader, + } diff --git a/swh/loader/package/crates/loader.py b/swh/loader/package/crates/loader.py new file mode 100644 index 0000000..4b59015 --- /dev/null +++ b/swh/loader/package/crates/loader.py @@ -0,0 +1,355 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from distutils.version import StrictVersion +import json +from pathlib import Path +from typing import Any, Dict, Iterator, List, Optional, Sequence, Tuple +from urllib.parse import urlparse + +import attr +import toml +from typing_extensions import TypedDict + +from swh.loader.package.loader import BasePackageInfo, PackageLoader +from swh.loader.package.utils import api_info, cached_method, release_name +from swh.model.model import ObjectType, Person, Release, Sha1Git, TimestampWithTimezone +from swh.storage.interface import StorageInterface + + +class ExtrinsicPackageMetadata(TypedDict): + """Data structure for package extrinsic metadata pulled from http api endpoint. + + We set only the keys we need according to what is available when querying + https://crates.io/api/v1/crates/, where `name` is the name of the crate + package. + + Json response example for https://crates.io/api/v1/crates/hg-core : + + .. literalinclude:: ../../swh/loader/package/crates/tests/data/https_crates.io/api_v1_crates_hg-core + + Usage example: + + .. code-block:: python + + e_metadata = ExtrinsicPackageMetadata(**self.info()) + + """ # noqa + + categories: List[Dict[Any, Any]] + """Related categories""" + + crate: Dict[Any, Any] + """Crate project information""" + + keywords: List[Any] + """Keywords""" + + versions: List[Dict[Any, Any]] + """A list of released versions for a crate""" + + +class ExtrinsicVersionPackageMetadata(TypedDict): + """Data structure for specific package version extrinsic metadata, pulled + from http api endpoint. + + Similar to `ExtrinsicPackageMetadata` in its usage, but we flatten the data + related to a specific version. + """ + + crate: str + """The package name""" + + crate_size: int + """The package size""" + + created_at: str + """First released at""" + + downloads: str + """Number of downloads""" + + license: str + """Package license""" + + num: str + """Package version""" + + published_by: Dict[Any, Any] + """Publishers information""" + + updated_at: str + """Last update""" + + yanked: bool + """Is that version yanked? (yanked means release-level deprecation)""" + + +class IntrinsicPackageMetadata(TypedDict): + """Data structure for specific package version intrinsic metadata. + + Data is extracted from the crate package's .toml file. Then the data of the + 'package' entry is flattened. + + Cargo.toml file content example: + + .. code-block:: toml + + [package] + name = "hg-core" + version = "0.0.1" + authors = ["Georges Racinet "] + description = "Mercurial pure Rust core library, with no assumption on + Python bindings (FFI)" + homepage = "https://mercurial-scm.org" + license = "GPL-2.0-or-later" + repository = "https://www.mercurial-scm.org/repo/hg" + + [lib] + name = "hg" + [dev-dependencies.rand] + version = "~0.6" + + [dev-dependencies.rand_pcg] + version = "~0.1" + + :param toml: toml object + """ + + name: str + """The package name""" + + version: str + """Package version""" + + authors: List[str] + """Authors""" + + description: str + """Package and release description""" + + homepage: str + """Homepage of the project""" + + license: str + """Package license""" + + repository: str + """Source code repository""" + + +@attr.s +class CratesPackageInfo(BasePackageInfo): + + name = attr.ib(type=str) + """Name of the package""" + + version = attr.ib(type=str) + """Current version""" + + e_metadata: Dict[str, Any] = attr.ib(factory=ExtrinsicPackageMetadata) + """Extrinsic package metadata, common to all versions""" + + e_metadata_version: Dict[str, Any] = attr.ib( + factory=ExtrinsicVersionPackageMetadata + ) + """Extrinsic package metadata specific to a version""" + + i_metadata: Dict[str, Any] = attr.ib(factory=IntrinsicPackageMetadata) + """Intrinsic metadata of the current package version""" + + +def extract_intrinsic_metadata(dir_path: Path) -> Dict[str, Any]: + """Extract intrinsic metadata from Cargo.toml file at dir_path. + + Each crate archive has a Cargo.toml at the root of the archive. + + Args: + dir_path: A directory on disk where a Cargo.toml must be present + + Returns: + A dict mapping from toml parser + """ + return toml.load(dir_path / "Cargo.toml") + + +def extract_author(p_info: CratesPackageInfo) -> Person: + """Extract package author from intrinsic metadata and return it as a + `Person` model. + + Args: + p_info: CratesPackageInfo that should contains i_metadata entries + + Returns: + Only one author (Person) of the package. Currently limited by internal detail + of the swh stack (see T3887). + """ + authors = p_info.i_metadata["authors"] + fullname = authors[0] # TODO: here we have a list of author, see T3887 + return Person.from_fullname(fullname.encode()) + + +def extract_description(p_info: CratesPackageInfo) -> str: + """Extract package description from intrinsic metadata and return it as a + string. + + Args: + p_info: CratesPackageInfo that should contains i_metadata and entries + + Returns: + Package description from metadata. + """ + return p_info.i_metadata["description"] + + +class CratesLoader(PackageLoader[CratesPackageInfo]): + """Load Crates package origins into swh archive.""" + + visit_type = "crates" + + def __init__( + self, + storage: StorageInterface, + url: str, + package_name: str, + version: str, + checksum: Optional[str] = None, + max_content_size: Optional[int] = None, + ): + """Constructor + + Args: + + url : str + Origin url (e.g. + https://static.crates.io/crates//-.crate) + + package_name : str + Crate package name + + version : str + Crate package version + + checksum : str, optional + Checksum for the package file to download + """ + super().__init__(storage=storage, url=url, max_content_size=max_content_size) + self.name = package_name + self.provider_url = f"https://crates.io/api/v1/crates/{self.name}" + # Check consistency of name, version, url + filename = urlparse(url).path.split("/")[-1] + assert f"{self.name}-{version}.crate" == filename + + @cached_method + def _raw_info(self) -> bytes: + """Get crate metadata (fetched from http api endpoint set as self.provider_url) + + Returns: + Content response as bytes. Content response is a json document. + """ + return api_info(self.provider_url) + + @cached_method + def info(self) -> Dict: + """Parse http api json response and return the crate metadata information + as a Dict.""" + return json.loads(self._raw_info()) + + def get_versions(self) -> Sequence[str]: + """Get all released versions of a crate + + Returns: + A sequence of versions + + Example:: + + ["0.1.1", "0.10.2"] + """ + versions = [version["num"] for version in self.info()["versions"]] + versions.sort(key=StrictVersion) + return versions + + def get_default_version(self) -> str: + """Get the newest release version of a crate + + Returns: + A string representing a version + + Example:: + + "0.1.2" + """ + return self.info()["crate"]["newest_version"] + + def get_package_info(self, version: str) -> Iterator[Tuple[str, CratesPackageInfo]]: + """Get release name and package information from version + + Args: + version: crate version (e.g: "0.1.0") + + Returns: + Iterator of tuple (release_name, p_info) + """ + filename = f"{self.name}-{version}.crate" + url = f"https://static.crates.io/crates/{self.name}/{self.name}-{version}.crate" + + # Get extrinsic metadata from http api + + # Raw crate info + e_metadata = ExtrinsicPackageMetadata(**self.info()) # type: ignore[misc] + # Extract crate info for current version (One .crate file for a given version) + (crate_version,) = [ + crate for crate in e_metadata["versions"] if crate["num"] == version + ] + e_metadata_version = ExtrinsicVersionPackageMetadata( # type: ignore[misc] + **crate_version + ) + + p_info = CratesPackageInfo( + name=self.name, + filename=filename, + url=url, + version=version, + e_metadata=e_metadata, + e_metadata_version=e_metadata_version, + ) + yield release_name(version, filename), p_info + + def build_release( + self, p_info: CratesPackageInfo, uncompressed_path: str, directory: Sha1Git + ) -> Optional[Release]: + # Extract intrinsic metadata from dir_path/Cargo.toml + name = p_info.name + version = p_info.version + dir_path = Path(uncompressed_path, f"{name}-{version}") + i_metadata_raw = extract_intrinsic_metadata(dir_path) + # Get only corresponding key of IntrinsicPackageMetadata + i_metadata_keys = [k for k in IntrinsicPackageMetadata.__annotations__.keys()] + # We use data only from "package" entry + + i_metadata = { + k: v for k, v in i_metadata_raw["package"].items() if k in i_metadata_keys + } + p_info.i_metadata = IntrinsicPackageMetadata(**i_metadata) # type: ignore[misc] + + author = extract_author(p_info) + description = extract_description(p_info) + message = ( + f"Synthetic release for Crate source package {p_info.name} " + f"version {p_info.version}\n" + f"{description}\n" + ) + # The only way to get a value for updated_at is through extrinsic metadata + updated_at = p_info.e_metadata_version.get("updated_at") + + return Release( + name=version.encode(), + author=author, + date=TimestampWithTimezone.from_iso8601(updated_at), + message=message.encode(), + target_type=ObjectType.DIRECTORY, + target=directory, + synthetic=True, + ) diff --git a/swh/loader/package/crates/tasks.py b/swh/loader/package/crates/tasks.py new file mode 100644 index 0000000..2d774c9 --- /dev/null +++ b/swh/loader/package/crates/tasks.py @@ -0,0 +1,16 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from celery import shared_task + +from swh.loader.package.crates.loader import CratesLoader + + +@shared_task(name=__name__ + ".LoadCrates") +def load_crates(*, url=None, package_name: str, version: str, checksum=None): + """Load Rust crate package""" + return CratesLoader.from_configfile( + url=url, package_name=package_name, version=version, checksum=checksum + ).load() diff --git a/swh/loader/package/crates/tests/__init__.py b/swh/loader/package/crates/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/swh/loader/package/crates/tests/data/fake_crates.sh b/swh/loader/package/crates/tests/data/fake_crates.sh new file mode 100644 index 0000000..9bfff25 --- /dev/null +++ b/swh/loader/package/crates/tests/data/fake_crates.sh @@ -0,0 +1,259 @@ +#!/usr/bin/env bash + +# Script to generate fake crates files and fake http api response. + +set -euo pipefail + +# files and directories +mkdir -p tmp_dir/crates/ +mkdir tmp_dir/crates/hg-core-0.0.1 +mkdir tmp_dir/crates/micro-timer-0.1.0 +mkdir tmp_dir/crates/micro-timer-0.1.1 +mkdir tmp_dir/crates/micro-timer-0.1.2 +mkdir tmp_dir/crates/micro-timer-0.2.0 +mkdir tmp_dir/crates/micro-timer-0.2.1 +mkdir tmp_dir/crates/micro-timer-0.3.0 +mkdir tmp_dir/crates/micro-timer-0.3.1 +mkdir tmp_dir/crates/micro-timer-0.4.0 + + +cd tmp_dir/crates/ + +# Creates some -.crate file for test purposes. + +# hg-core-0.0.1/Cargo.toml +echo -e '''[package] +name = "hg-core" +version = "0.0.1" +authors = ["Georges Racinet "] +description = "Mercurial pure Rust core library, with no assumption on Python bindings (FFI)" +homepage = "https://mercurial-scm.org" +license = "GPL-2.0-or-later" +repository = "https://www.mercurial-scm.org/repo/hg" + +[lib] +name = "hg" +[dev-dependencies.rand] +version = "~0.6" + +[dev-dependencies.rand_pcg] +version = "~0.1" +''' > hg-core-0.0.1/Cargo.toml + +# micro-timer-0.1.0/Cargo.toml +echo -e '''[package] +edition = "2018" +name = "micro-timer" +version = "0.1.0" +authors = ["Raphaël Gomès "] +description = "Dumb tiny logging timer" +homepage = "https://heptapod.octobus.net/Alphare/micro-timer" +readme = "README.md" +license-file = "LICENCE" +repository = "https://heptapod.octobus.net/Alphare/micro-timer" + +[lib] +proc-macro = true +[dependencies.quote] +version = "1.0.2" + +[dependencies.syn] +version = "1.0.16" +features = ["full", "extra-traits"] +[dev-dependencies.log] +version = "0.4.8" +''' > micro-timer-0.1.0/Cargo.toml + +# micro-timer-0.1.1/Cargo.toml +echo -e '''[package] +edition = "2018" +name = "micro-timer" +version = "0.1.1" +authors = ["Raphaël Gomès "] +description = "Dumb tiny logging timer" +homepage = "https://heptapod.octobus.net/Alphare/micro-timer" +readme = "README.md" +license-file = "LICENCE" +repository = "https://heptapod.octobus.net/Alphare/micro-timer" + +[lib] +proc-macro = true +[dependencies.quote] +version = "1.0.2" + +[dependencies.syn] +version = "1.0.16" +features = ["full", "extra-traits"] +[dev-dependencies.log] +version = "0.4.8" +''' > micro-timer-0.1.1/Cargo.toml + +# micro-timer-0.1.2/Cargo.toml +echo -e '''[package] +edition = "2018" +name = "micro-timer" +version = "0.1.2" +authors = ["Raphaël Gomès "] +description = "Dumb tiny logging timer" +homepage = "https://heptapod.octobus.net/Alphare/micro-timer" +readme = "README.md" +license-file = "LICENCE" +repository = "https://heptapod.octobus.net/Alphare/micro-timer" + +[lib] +proc-macro = true +[dependencies.proc-macro2] +version = "1.0.9" + +[dependencies.quote] +version = "1.0.2" + +[dependencies.syn] +version = "1.0.16" +features = ["full", "extra-traits"] +[dev-dependencies.log] +version = "0.4.8" + +[dev-dependencies.pretty_assertions] +version = "0.6.1" +''' > micro-timer-0.1.2/Cargo.toml + +# micro-timer-0.2.0/Cargo.toml +echo -e '''[package] +edition = "2018" +name = "micro-timer" +version = "0.2.0" +authors = ["Raphaël Gomès "] +description = "Dumb tiny logging timer" +homepage = "https://heptapod.octobus.net/Alphare/micro-timer" +readme = "README.md" +license-file = "LICENCE" +repository = "https://heptapod.octobus.net/Alphare/micro-timer" +[dependencies.micro-timer-macros] +version = "0.2.0" + +[dependencies.scopeguard] +version = "1.1.0" +[dev-dependencies.log] +version = "0.4.8" +''' > micro-timer-0.2.0/Cargo.toml + +# micro-timer-0.2.1/Cargo.toml +echo -e '''[package] +edition = "2018" +name = "micro-timer" +version = "0.2.1" +authors = ["Raphaël Gomès "] +description = "Dumb tiny logging timer" +homepage = "https://heptapod.octobus.net/Alphare/micro-timer" +readme = "README.md" +license-file = "LICENCE" +repository = "https://heptapod.octobus.net/Alphare/micro-timer" +[dependencies.micro-timer-macros] +version = "0.2.0" + +[dependencies.scopeguard] +version = "1.1.0" +[dev-dependencies.log] +version = "0.4.8" +''' > micro-timer-0.2.1/Cargo.toml + +# micro-timer-0.3.0/Cargo.toml +echo -e '''[package] +edition = "2018" +name = "micro-timer" +version = "0.3.0" +authors = ["Raphaël Gomès "] +description = "Dumb tiny logging timer" +homepage = "https://heptapod.octobus.net/Alphare/micro-timer" +readme = "README.md" +license-file = "LICENCE" +repository = "https://heptapod.octobus.net/Alphare/micro-timer" +[dependencies.micro-timer-macros] +version = "0.3.0" + +[dependencies.scopeguard] +version = "1.1.0" +[dev-dependencies.log] +version = "0.4.8" +''' > micro-timer-0.3.0/Cargo.toml + +# micro-timer-0.3.1/Cargo.toml +echo -e '''[package] +edition = "2018" +name = "micro-timer" +version = "0.3.1" +authors = ["Raphaël Gomès "] +description = "Dumb tiny logging timer" +homepage = "https://foss.heptapod.net/octobus/rust/micro-timer" +readme = "README.md" +license-file = "LICENCE" +repository = "https://foss.heptapod.net/octobus/rust/micro-timer" +[dependencies.micro-timer-macros] +version = "0.3.1" + +[dependencies.scopeguard] +version = "1.1.0" +[dev-dependencies.log] +version = "0.4.8" +''' > micro-timer-0.3.1/Cargo.toml + +# micro-timer-0.4.0/Cargo.toml +echo -e '''[package] +edition = "2018" +name = "micro-timer" +version = "0.4.0" +authors = ["Raphaël Gomès "] +description = "Dumb tiny logging timer" +homepage = "https://foss.heptapod.net/octobus/rust/micro-timer" +readme = "README.md" +license-file = "LICENCE" +repository = "https://foss.heptapod.net/octobus/rust/micro-timer" +[dependencies.micro-timer-macros] +version = "0.4.0" + +[dependencies.scopeguard] +version = "1.1.0" +[dev-dependencies.log] +version = "0.4.8" +''' > micro-timer-0.4.0/Cargo.toml + +# .crate file are tar.gz archive +tar -czf hg-core-0.0.1.crate hg-core-0.0.1/ +tar -czf micro-timer-0.1.0.crate micro-timer-0.1.0/ +tar -czf micro-timer-0.1.1.crate micro-timer-0.1.1/ +tar -czf micro-timer-0.1.2.crate micro-timer-0.1.2/ +tar -czf micro-timer-0.2.0.crate micro-timer-0.2.0/ +tar -czf micro-timer-0.2.1.crate micro-timer-0.2.1/ +tar -czf micro-timer-0.3.0.crate micro-timer-0.3.0/ +tar -czf micro-timer-0.3.1.crate micro-timer-0.3.1/ +tar -czf micro-timer-0.4.0.crate micro-timer-0.4.0/ + +# Copy and rename .crate file for usage with 'requests_mock_datadir' +# See : https://docs.softwareheritage.org/devel/apidoc/swh.core.pytest_plugin.html#swh.core.pytest_plugin.requests_mock_datadir +mkdir ../../https_static.crates.io + +cp hg-core-0.0.1.crate ../../https_static.crates.io/crates_hg-core_hg-core-0.0.1.crate +cp micro-timer-0.1.0.crate ../../https_static.crates.io/crates_micro-timer_micro-timer-0.1.0.crate +cp micro-timer-0.1.1.crate ../../https_static.crates.io/crates_micro-timer_micro-timer-0.1.1.crate +cp micro-timer-0.1.2.crate ../../https_static.crates.io/crates_micro-timer_micro-timer-0.1.2.crate +cp micro-timer-0.2.0.crate ../../https_static.crates.io/crates_micro-timer_micro-timer-0.2.0.crate +cp micro-timer-0.2.1.crate ../../https_static.crates.io/crates_micro-timer_micro-timer-0.2.1.crate +cp micro-timer-0.3.0.crate ../../https_static.crates.io/crates_micro-timer_micro-timer-0.3.0.crate +cp micro-timer-0.3.1.crate ../../https_static.crates.io/crates_micro-timer_micro-timer-0.3.1.crate +cp micro-timer-0.4.0.crate ../../https_static.crates.io/crates_micro-timer_micro-timer-0.4.0.crate + +# Creates some http file response for test purposes. +mkdir ../../https_crates.io + +# hg-core, https://crates.io/api/v1/crates/hg-core +echo -e '''{"categories":[],"crate":{"badges":[],"categories":[],"created_at":"2019-04-16T18:48:11.404457+00:00","description":"Mercurial pure Rust core library, with no assumption on Python bindings (FFI)","documentation":null,"downloads":442,"exact_match":false,"homepage":"https://mercurial-scm.org","id":"hg-core","keywords":[],"links":{"owner_team":"/api/v1/crates/hg-core/owner_team","owner_user":"/api/v1/crates/hg-core/owner_user","owners":"/api/v1/crates/hg-core/owners","reverse_dependencies":"/api/v1/crates/hg-core/reverse_dependencies","version_downloads":"/api/v1/crates/hg-core/downloads","versions":null},"max_stable_version":"0.0.1","max_version":"0.0.1","name":"hg-core","newest_version":"0.0.1","recent_downloads":40,"repository":"https://www.mercurial-scm.org/repo/hg","updated_at":"2019-04-16T18:48:11.404457+00:00","versions":[145309]},"keywords":[],"versions":[{"audit_actions":[],"crate":"hg-core","crate_size":21344,"created_at":"2019-04-16T18:48:11.404457+00:00","dl_path":"/api/v1/crates/hg-core/0.0.1/download","downloads":442,"features":{},"id":145309,"license":"GPL-2.0-or-later","links":{"authors":"/api/v1/crates/hg-core/0.0.1/authors","dependencies":"/api/v1/crates/hg-core/0.0.1/dependencies","version_downloads":"/api/v1/crates/hg-core/0.0.1/downloads"},"num":"0.0.1","published_by":{"avatar":"https://avatars0.githubusercontent.com/u/474220?v=4","id":45544,"login":"gracinet","name":"Georges Racinet","url":"https://github.com/gracinet"},"readme_path":"/api/v1/crates/hg-core/0.0.1/readme","updated_at":"2019-04-16T18:48:11.404457+00:00","yanked":false}]} +''' > ../../https_crates.io/api_v1_crates_hg-core + +# micro-timer, https://crates.io/api/v1/crates/micro-timer +echo -e '''{"categories":[],"crate":{"badges":[],"categories":[],"created_at":"2020-02-27T14:31:49.131258+00:00","description":"Dumb tiny logging timer","documentation":null,"downloads":44245,"exact_match":false,"homepage":"https://foss.heptapod.net/octobus/rust/micro-timer","id":"micro-timer","keywords":[],"links":{"owner_team":"/api/v1/crates/micro-timer/owner_team","owner_user":"/api/v1/crates/micro-timer/owner_user","owners":"/api/v1/crates/micro-timer/owners","reverse_dependencies":"/api/v1/crates/micro-timer/reverse_dependencies","version_downloads":"/api/v1/crates/micro-timer/downloads","versions":null},"max_stable_version":"0.4.0","max_version":"0.4.0","name":"micro-timer","newest_version":"0.4.0","recent_downloads":3910,"repository":"https://foss.heptapod.net/octobus/rust/micro-timer","updated_at":"2020-09-28T13:40:49.593030+00:00","versions":[288167,254896,248120,223660,223652,216405,216156,216139]},"keywords":[],"versions":[{"audit_actions":[{"action":"publish","time":"2020-09-28T13:40:49.593030+00:00","user":{"avatar":"https://avatars.githubusercontent.com/u/9445758?v=4","id":79957,"login":"Alphare","name":"Raphaël Gomès","url":"https://github.com/Alphare"}}],"crate":"micro-timer","crate_size":3513,"created_at":"2020-09-28T13:40:49.593030+00:00","dl_path":"/api/v1/crates/micro-timer/0.4.0/download","downloads":337,"features":{},"id":288167,"license":"non-standard","links":{"authors":"/api/v1/crates/micro-timer/0.4.0/authors","dependencies":"/api/v1/crates/micro-timer/0.4.0/dependencies","version_downloads":"/api/v1/crates/micro-timer/0.4.0/downloads"},"num":"0.4.0","published_by":{"avatar":"https://avatars.githubusercontent.com/u/9445758?v=4","id":79957,"login":"Alphare","name":"Raphaël Gomès","url":"https://github.com/Alphare"},"readme_path":"/api/v1/crates/micro-timer/0.4.0/readme","updated_at":"2020-09-28T13:40:49.593030+00:00","yanked":false},{"audit_actions":[{"action":"publish","time":"2020-06-22T16:40:06.754009+00:00","user":{"avatar":"https://avatars.githubusercontent.com/u/9445758?v=4","id":79957,"login":"Alphare","name":"Raphaël Gomès","url":"https://github.com/Alphare"}}],"crate":"micro-timer","crate_size":3357,"created_at":"2020-06-22T16:40:06.754009+00:00","dl_path":"/api/v1/crates/micro-timer/0.3.1/download","downloads":37853,"features":{},"id":254896,"license":"non-standard","links":{"authors":"/api/v1/crates/micro-timer/0.3.1/authors","dependencies":"/api/v1/crates/micro-timer/0.3.1/dependencies","version_downloads":"/api/v1/crates/micro-timer/0.3.1/downloads"},"num":"0.3.1","published_by":{"avatar":"https://avatars.githubusercontent.com/u/9445758?v=4","id":79957,"login":"Alphare","name":"Raphaël Gomès","url":"https://github.com/Alphare"},"readme_path":"/api/v1/crates/micro-timer/0.3.1/readme","updated_at":"2020-06-22T16:40:06.754009+00:00","yanked":false},{"audit_actions":[{"action":"publish","time":"2020-06-02T11:38:33.047581+00:00","user":{"avatar":"https://avatars.githubusercontent.com/u/9445758?v=4","id":79957,"login":"Alphare","name":"Raphaël Gomès","url":"https://github.com/Alphare"}}],"crate":"micro-timer","crate_size":3306,"created_at":"2020-06-02T11:38:33.047581+00:00","dl_path":"/api/v1/crates/micro-timer/0.3.0/download","downloads":4163,"features":{},"id":248120,"license":"non-standard","links":{"authors":"/api/v1/crates/micro-timer/0.3.0/authors","dependencies":"/api/v1/crates/micro-timer/0.3.0/dependencies","version_downloads":"/api/v1/crates/micro-timer/0.3.0/downloads"},"num":"0.3.0","published_by":{"avatar":"https://avatars.githubusercontent.com/u/9445758?v=4","id":79957,"login":"Alphare","name":"Raphaël Gomès","url":"https://github.com/Alphare"},"readme_path":"/api/v1/crates/micro-timer/0.3.0/readme","updated_at":"2020-06-02T11:38:33.047581+00:00","yanked":false},{"audit_actions":[{"action":"publish","time":"2020-03-23T11:22:26.288804+00:00","user":{"avatar":"https://avatars.githubusercontent.com/u/9445758?v=4","id":79957,"login":"Alphare","name":"Raphaël Gomès","url":"https://github.com/Alphare"}}],"crate":"micro-timer","crate_size":2937,"created_at":"2020-03-23T11:22:26.288804+00:00","dl_path":"/api/v1/crates/micro-timer/0.2.1/download","downloads":1301,"features":{},"id":223660,"license":"non-standard","links":{"authors":"/api/v1/crates/micro-timer/0.2.1/authors","dependencies":"/api/v1/crates/micro-timer/0.2.1/dependencies","version_downloads":"/api/v1/crates/micro-timer/0.2.1/downloads"},"num":"0.2.1","published_by":{"avatar":"https://avatars.githubusercontent.com/u/9445758?v=4","id":79957,"login":"Alphare","name":"Raphaël Gomès","url":"https://github.com/Alphare"},"readme_path":"/api/v1/crates/micro-timer/0.2.1/readme","updated_at":"2020-03-23T11:22:26.288804+00:00","yanked":false},{"audit_actions":[{"action":"publish","time":"2020-03-23T10:57:04.418462+00:00","user":{"avatar":"https://avatars.githubusercontent.com/u/9445758?v=4","id":79957,"login":"Alphare","name":"Raphaël Gomès","url":"https://github.com/Alphare"}}],"crate":"micro-timer","crate_size":2941,"created_at":"2020-03-23T10:57:04.418462+00:00","dl_path":"/api/v1/crates/micro-timer/0.2.0/download","downloads":104,"features":{},"id":223652,"license":"non-standard","links":{"authors":"/api/v1/crates/micro-timer/0.2.0/authors","dependencies":"/api/v1/crates/micro-timer/0.2.0/dependencies","version_downloads":"/api/v1/crates/micro-timer/0.2.0/downloads"},"num":"0.2.0","published_by":{"avatar":"https://avatars.githubusercontent.com/u/9445758?v=4","id":79957,"login":"Alphare","name":"Raphaël Gomès","url":"https://github.com/Alphare"},"readme_path":"/api/v1/crates/micro-timer/0.2.0/readme","updated_at":"2020-03-23T10:57:04.418462+00:00","yanked":false},{"audit_actions":[{"action":"publish","time":"2020-02-27T23:35:41.872176+00:00","user":{"avatar":"https://avatars.githubusercontent.com/u/9445758?v=4","id":79957,"login":"Alphare","name":"Raphaël Gomès","url":"https://github.com/Alphare"}}],"crate":"micro-timer","crate_size":4927,"created_at":"2020-02-27T23:35:41.872176+00:00","dl_path":"/api/v1/crates/micro-timer/0.1.2/download","downloads":258,"features":{},"id":216405,"license":"non-standard","links":{"authors":"/api/v1/crates/micro-timer/0.1.2/authors","dependencies":"/api/v1/crates/micro-timer/0.1.2/dependencies","version_downloads":"/api/v1/crates/micro-timer/0.1.2/downloads"},"num":"0.1.2","published_by":{"avatar":"https://avatars.githubusercontent.com/u/9445758?v=4","id":79957,"login":"Alphare","name":"Raphaël Gomès","url":"https://github.com/Alphare"},"readme_path":"/api/v1/crates/micro-timer/0.1.2/readme","updated_at":"2020-02-27T23:35:41.872176+00:00","yanked":false},{"audit_actions":[{"action":"publish","time":"2020-02-27T15:17:53.486346+00:00","user":{"avatar":"https://avatars.githubusercontent.com/u/9445758?v=4","id":79957,"login":"Alphare","name":"Raphaël Gomès","url":"https://github.com/Alphare"}}],"crate":"micro-timer","crate_size":2916,"created_at":"2020-02-27T15:17:53.486346+00:00","dl_path":"/api/v1/crates/micro-timer/0.1.1/download","downloads":111,"features":{},"id":216156,"license":"non-standard","links":{"authors":"/api/v1/crates/micro-timer/0.1.1/authors","dependencies":"/api/v1/crates/micro-timer/0.1.1/dependencies","version_downloads":"/api/v1/crates/micro-timer/0.1.1/downloads"},"num":"0.1.1","published_by":{"avatar":"https://avatars.githubusercontent.com/u/9445758?v=4","id":79957,"login":"Alphare","name":"Raphaël Gomès","url":"https://github.com/Alphare"},"readme_path":"/api/v1/crates/micro-timer/0.1.1/readme","updated_at":"2020-02-27T15:17:53.486346+00:00","yanked":false},{"audit_actions":[{"action":"publish","time":"2020-02-27T14:31:49.131258+00:00","user":{"avatar":"https://avatars.githubusercontent.com/u/9445758?v=4","id":79957,"login":"Alphare","name":"Raphaël Gomès","url":"https://github.com/Alphare"}}],"crate":"micro-timer","crate_size":2902,"created_at":"2020-02-27T14:31:49.131258+00:00","dl_path":"/api/v1/crates/micro-timer/0.1.0/download","downloads":118,"features":{},"id":216139,"license":"non-standard","links":{"authors":"/api/v1/crates/micro-timer/0.1.0/authors","dependencies":"/api/v1/crates/micro-timer/0.1.0/dependencies","version_downloads":"/api/v1/crates/micro-timer/0.1.0/downloads"},"num":"0.1.0","published_by":{"avatar":"https://avatars.githubusercontent.com/u/9445758?v=4","id":79957,"login":"Alphare","name":"Raphaël Gomès","url":"https://github.com/Alphare"},"readme_path":"/api/v1/crates/micro-timer/0.1.0/readme","updated_at":"2020-02-27T14:31:49.131258+00:00","yanked":false}]} +''' > ../../https_crates.io/api_v1_crates_micro-timer + +# Clean up removing tmp_dir +cd ../../ +rm -r tmp_dir/ diff --git a/swh/loader/package/crates/tests/data/https_crates.io/api_v1_crates_hg-core b/swh/loader/package/crates/tests/data/https_crates.io/api_v1_crates_hg-core new file mode 100644 index 0000000..c76874d --- /dev/null +++ b/swh/loader/package/crates/tests/data/https_crates.io/api_v1_crates_hg-core @@ -0,0 +1,2 @@ +{"categories":[],"crate":{"badges":[],"categories":[],"created_at":"2019-04-16T18:48:11.404457+00:00","description":"Mercurial pure Rust core library, with no assumption on Python bindings (FFI)","documentation":null,"downloads":442,"exact_match":false,"homepage":"https://mercurial-scm.org","id":"hg-core","keywords":[],"links":{"owner_team":"/api/v1/crates/hg-core/owner_team","owner_user":"/api/v1/crates/hg-core/owner_user","owners":"/api/v1/crates/hg-core/owners","reverse_dependencies":"/api/v1/crates/hg-core/reverse_dependencies","version_downloads":"/api/v1/crates/hg-core/downloads","versions":null},"max_stable_version":"0.0.1","max_version":"0.0.1","name":"hg-core","newest_version":"0.0.1","recent_downloads":40,"repository":"https://www.mercurial-scm.org/repo/hg","updated_at":"2019-04-16T18:48:11.404457+00:00","versions":[145309]},"keywords":[],"versions":[{"audit_actions":[],"crate":"hg-core","crate_size":21344,"created_at":"2019-04-16T18:48:11.404457+00:00","dl_path":"/api/v1/crates/hg-core/0.0.1/download","downloads":442,"features":{},"id":145309,"license":"GPL-2.0-or-later","links":{"authors":"/api/v1/crates/hg-core/0.0.1/authors","dependencies":"/api/v1/crates/hg-core/0.0.1/dependencies","version_downloads":"/api/v1/crates/hg-core/0.0.1/downloads"},"num":"0.0.1","published_by":{"avatar":"https://avatars0.githubusercontent.com/u/474220?v=4","id":45544,"login":"gracinet","name":"Georges Racinet","url":"https://github.com/gracinet"},"readme_path":"/api/v1/crates/hg-core/0.0.1/readme","updated_at":"2019-04-16T18:48:11.404457+00:00","yanked":false}]} + diff --git a/swh/loader/package/crates/tests/data/https_crates.io/api_v1_crates_micro-timer b/swh/loader/package/crates/tests/data/https_crates.io/api_v1_crates_micro-timer new file mode 100644 index 0000000..e6878b6 --- /dev/null +++ b/swh/loader/package/crates/tests/data/https_crates.io/api_v1_crates_micro-timer @@ -0,0 +1,2 @@ +{"categories":[],"crate":{"badges":[],"categories":[],"created_at":"2020-02-27T14:31:49.131258+00:00","description":"Dumb tiny logging timer","documentation":null,"downloads":44245,"exact_match":false,"homepage":"https://foss.heptapod.net/octobus/rust/micro-timer","id":"micro-timer","keywords":[],"links":{"owner_team":"/api/v1/crates/micro-timer/owner_team","owner_user":"/api/v1/crates/micro-timer/owner_user","owners":"/api/v1/crates/micro-timer/owners","reverse_dependencies":"/api/v1/crates/micro-timer/reverse_dependencies","version_downloads":"/api/v1/crates/micro-timer/downloads","versions":null},"max_stable_version":"0.4.0","max_version":"0.4.0","name":"micro-timer","newest_version":"0.4.0","recent_downloads":3910,"repository":"https://foss.heptapod.net/octobus/rust/micro-timer","updated_at":"2020-09-28T13:40:49.593030+00:00","versions":[288167,254896,248120,223660,223652,216405,216156,216139]},"keywords":[],"versions":[{"audit_actions":[{"action":"publish","time":"2020-09-28T13:40:49.593030+00:00","user":{"avatar":"https://avatars.githubusercontent.com/u/9445758?v=4","id":79957,"login":"Alphare","name":"Raphaël Gomès","url":"https://github.com/Alphare"}}],"crate":"micro-timer","crate_size":3513,"created_at":"2020-09-28T13:40:49.593030+00:00","dl_path":"/api/v1/crates/micro-timer/0.4.0/download","downloads":337,"features":{},"id":288167,"license":"non-standard","links":{"authors":"/api/v1/crates/micro-timer/0.4.0/authors","dependencies":"/api/v1/crates/micro-timer/0.4.0/dependencies","version_downloads":"/api/v1/crates/micro-timer/0.4.0/downloads"},"num":"0.4.0","published_by":{"avatar":"https://avatars.githubusercontent.com/u/9445758?v=4","id":79957,"login":"Alphare","name":"Raphaël Gomès","url":"https://github.com/Alphare"},"readme_path":"/api/v1/crates/micro-timer/0.4.0/readme","updated_at":"2020-09-28T13:40:49.593030+00:00","yanked":false},{"audit_actions":[{"action":"publish","time":"2020-06-22T16:40:06.754009+00:00","user":{"avatar":"https://avatars.githubusercontent.com/u/9445758?v=4","id":79957,"login":"Alphare","name":"Raphaël Gomès","url":"https://github.com/Alphare"}}],"crate":"micro-timer","crate_size":3357,"created_at":"2020-06-22T16:40:06.754009+00:00","dl_path":"/api/v1/crates/micro-timer/0.3.1/download","downloads":37853,"features":{},"id":254896,"license":"non-standard","links":{"authors":"/api/v1/crates/micro-timer/0.3.1/authors","dependencies":"/api/v1/crates/micro-timer/0.3.1/dependencies","version_downloads":"/api/v1/crates/micro-timer/0.3.1/downloads"},"num":"0.3.1","published_by":{"avatar":"https://avatars.githubusercontent.com/u/9445758?v=4","id":79957,"login":"Alphare","name":"Raphaël Gomès","url":"https://github.com/Alphare"},"readme_path":"/api/v1/crates/micro-timer/0.3.1/readme","updated_at":"2020-06-22T16:40:06.754009+00:00","yanked":false},{"audit_actions":[{"action":"publish","time":"2020-06-02T11:38:33.047581+00:00","user":{"avatar":"https://avatars.githubusercontent.com/u/9445758?v=4","id":79957,"login":"Alphare","name":"Raphaël Gomès","url":"https://github.com/Alphare"}}],"crate":"micro-timer","crate_size":3306,"created_at":"2020-06-02T11:38:33.047581+00:00","dl_path":"/api/v1/crates/micro-timer/0.3.0/download","downloads":4163,"features":{},"id":248120,"license":"non-standard","links":{"authors":"/api/v1/crates/micro-timer/0.3.0/authors","dependencies":"/api/v1/crates/micro-timer/0.3.0/dependencies","version_downloads":"/api/v1/crates/micro-timer/0.3.0/downloads"},"num":"0.3.0","published_by":{"avatar":"https://avatars.githubusercontent.com/u/9445758?v=4","id":79957,"login":"Alphare","name":"Raphaël Gomès","url":"https://github.com/Alphare"},"readme_path":"/api/v1/crates/micro-timer/0.3.0/readme","updated_at":"2020-06-02T11:38:33.047581+00:00","yanked":false},{"audit_actions":[{"action":"publish","time":"2020-03-23T11:22:26.288804+00:00","user":{"avatar":"https://avatars.githubusercontent.com/u/9445758?v=4","id":79957,"login":"Alphare","name":"Raphaël Gomès","url":"https://github.com/Alphare"}}],"crate":"micro-timer","crate_size":2937,"created_at":"2020-03-23T11:22:26.288804+00:00","dl_path":"/api/v1/crates/micro-timer/0.2.1/download","downloads":1301,"features":{},"id":223660,"license":"non-standard","links":{"authors":"/api/v1/crates/micro-timer/0.2.1/authors","dependencies":"/api/v1/crates/micro-timer/0.2.1/dependencies","version_downloads":"/api/v1/crates/micro-timer/0.2.1/downloads"},"num":"0.2.1","published_by":{"avatar":"https://avatars.githubusercontent.com/u/9445758?v=4","id":79957,"login":"Alphare","name":"Raphaël Gomès","url":"https://github.com/Alphare"},"readme_path":"/api/v1/crates/micro-timer/0.2.1/readme","updated_at":"2020-03-23T11:22:26.288804+00:00","yanked":false},{"audit_actions":[{"action":"publish","time":"2020-03-23T10:57:04.418462+00:00","user":{"avatar":"https://avatars.githubusercontent.com/u/9445758?v=4","id":79957,"login":"Alphare","name":"Raphaël Gomès","url":"https://github.com/Alphare"}}],"crate":"micro-timer","crate_size":2941,"created_at":"2020-03-23T10:57:04.418462+00:00","dl_path":"/api/v1/crates/micro-timer/0.2.0/download","downloads":104,"features":{},"id":223652,"license":"non-standard","links":{"authors":"/api/v1/crates/micro-timer/0.2.0/authors","dependencies":"/api/v1/crates/micro-timer/0.2.0/dependencies","version_downloads":"/api/v1/crates/micro-timer/0.2.0/downloads"},"num":"0.2.0","published_by":{"avatar":"https://avatars.githubusercontent.com/u/9445758?v=4","id":79957,"login":"Alphare","name":"Raphaël Gomès","url":"https://github.com/Alphare"},"readme_path":"/api/v1/crates/micro-timer/0.2.0/readme","updated_at":"2020-03-23T10:57:04.418462+00:00","yanked":false},{"audit_actions":[{"action":"publish","time":"2020-02-27T23:35:41.872176+00:00","user":{"avatar":"https://avatars.githubusercontent.com/u/9445758?v=4","id":79957,"login":"Alphare","name":"Raphaël Gomès","url":"https://github.com/Alphare"}}],"crate":"micro-timer","crate_size":4927,"created_at":"2020-02-27T23:35:41.872176+00:00","dl_path":"/api/v1/crates/micro-timer/0.1.2/download","downloads":258,"features":{},"id":216405,"license":"non-standard","links":{"authors":"/api/v1/crates/micro-timer/0.1.2/authors","dependencies":"/api/v1/crates/micro-timer/0.1.2/dependencies","version_downloads":"/api/v1/crates/micro-timer/0.1.2/downloads"},"num":"0.1.2","published_by":{"avatar":"https://avatars.githubusercontent.com/u/9445758?v=4","id":79957,"login":"Alphare","name":"Raphaël Gomès","url":"https://github.com/Alphare"},"readme_path":"/api/v1/crates/micro-timer/0.1.2/readme","updated_at":"2020-02-27T23:35:41.872176+00:00","yanked":false},{"audit_actions":[{"action":"publish","time":"2020-02-27T15:17:53.486346+00:00","user":{"avatar":"https://avatars.githubusercontent.com/u/9445758?v=4","id":79957,"login":"Alphare","name":"Raphaël Gomès","url":"https://github.com/Alphare"}}],"crate":"micro-timer","crate_size":2916,"created_at":"2020-02-27T15:17:53.486346+00:00","dl_path":"/api/v1/crates/micro-timer/0.1.1/download","downloads":111,"features":{},"id":216156,"license":"non-standard","links":{"authors":"/api/v1/crates/micro-timer/0.1.1/authors","dependencies":"/api/v1/crates/micro-timer/0.1.1/dependencies","version_downloads":"/api/v1/crates/micro-timer/0.1.1/downloads"},"num":"0.1.1","published_by":{"avatar":"https://avatars.githubusercontent.com/u/9445758?v=4","id":79957,"login":"Alphare","name":"Raphaël Gomès","url":"https://github.com/Alphare"},"readme_path":"/api/v1/crates/micro-timer/0.1.1/readme","updated_at":"2020-02-27T15:17:53.486346+00:00","yanked":false},{"audit_actions":[{"action":"publish","time":"2020-02-27T14:31:49.131258+00:00","user":{"avatar":"https://avatars.githubusercontent.com/u/9445758?v=4","id":79957,"login":"Alphare","name":"Raphaël Gomès","url":"https://github.com/Alphare"}}],"crate":"micro-timer","crate_size":2902,"created_at":"2020-02-27T14:31:49.131258+00:00","dl_path":"/api/v1/crates/micro-timer/0.1.0/download","downloads":118,"features":{},"id":216139,"license":"non-standard","links":{"authors":"/api/v1/crates/micro-timer/0.1.0/authors","dependencies":"/api/v1/crates/micro-timer/0.1.0/dependencies","version_downloads":"/api/v1/crates/micro-timer/0.1.0/downloads"},"num":"0.1.0","published_by":{"avatar":"https://avatars.githubusercontent.com/u/9445758?v=4","id":79957,"login":"Alphare","name":"Raphaël Gomès","url":"https://github.com/Alphare"},"readme_path":"/api/v1/crates/micro-timer/0.1.0/readme","updated_at":"2020-02-27T14:31:49.131258+00:00","yanked":false}]} + diff --git a/swh/loader/package/crates/tests/data/https_static.crates.io/crates_hg-core_hg-core-0.0.1.crate b/swh/loader/package/crates/tests/data/https_static.crates.io/crates_hg-core_hg-core-0.0.1.crate new file mode 100644 index 0000000..e51e437 Binary files /dev/null and b/swh/loader/package/crates/tests/data/https_static.crates.io/crates_hg-core_hg-core-0.0.1.crate differ diff --git a/swh/loader/package/crates/tests/data/https_static.crates.io/crates_micro-timer_micro-timer-0.1.0.crate b/swh/loader/package/crates/tests/data/https_static.crates.io/crates_micro-timer_micro-timer-0.1.0.crate new file mode 100644 index 0000000..dffc2fc Binary files /dev/null and b/swh/loader/package/crates/tests/data/https_static.crates.io/crates_micro-timer_micro-timer-0.1.0.crate differ diff --git a/swh/loader/package/crates/tests/data/https_static.crates.io/crates_micro-timer_micro-timer-0.1.1.crate b/swh/loader/package/crates/tests/data/https_static.crates.io/crates_micro-timer_micro-timer-0.1.1.crate new file mode 100644 index 0000000..6cc3eb6 Binary files /dev/null and b/swh/loader/package/crates/tests/data/https_static.crates.io/crates_micro-timer_micro-timer-0.1.1.crate differ diff --git a/swh/loader/package/crates/tests/data/https_static.crates.io/crates_micro-timer_micro-timer-0.1.2.crate b/swh/loader/package/crates/tests/data/https_static.crates.io/crates_micro-timer_micro-timer-0.1.2.crate new file mode 100644 index 0000000..3a06370 Binary files /dev/null and b/swh/loader/package/crates/tests/data/https_static.crates.io/crates_micro-timer_micro-timer-0.1.2.crate differ diff --git a/swh/loader/package/crates/tests/data/https_static.crates.io/crates_micro-timer_micro-timer-0.2.0.crate b/swh/loader/package/crates/tests/data/https_static.crates.io/crates_micro-timer_micro-timer-0.2.0.crate new file mode 100644 index 0000000..c304d04 Binary files /dev/null and b/swh/loader/package/crates/tests/data/https_static.crates.io/crates_micro-timer_micro-timer-0.2.0.crate differ diff --git a/swh/loader/package/crates/tests/data/https_static.crates.io/crates_micro-timer_micro-timer-0.2.1.crate b/swh/loader/package/crates/tests/data/https_static.crates.io/crates_micro-timer_micro-timer-0.2.1.crate new file mode 100644 index 0000000..3d844f0 Binary files /dev/null and b/swh/loader/package/crates/tests/data/https_static.crates.io/crates_micro-timer_micro-timer-0.2.1.crate differ diff --git a/swh/loader/package/crates/tests/data/https_static.crates.io/crates_micro-timer_micro-timer-0.3.0.crate b/swh/loader/package/crates/tests/data/https_static.crates.io/crates_micro-timer_micro-timer-0.3.0.crate new file mode 100644 index 0000000..ff91227 Binary files /dev/null and b/swh/loader/package/crates/tests/data/https_static.crates.io/crates_micro-timer_micro-timer-0.3.0.crate differ diff --git a/swh/loader/package/crates/tests/data/https_static.crates.io/crates_micro-timer_micro-timer-0.3.1.crate b/swh/loader/package/crates/tests/data/https_static.crates.io/crates_micro-timer_micro-timer-0.3.1.crate new file mode 100644 index 0000000..fcbe142 Binary files /dev/null and b/swh/loader/package/crates/tests/data/https_static.crates.io/crates_micro-timer_micro-timer-0.3.1.crate differ diff --git a/swh/loader/package/crates/tests/data/https_static.crates.io/crates_micro-timer_micro-timer-0.4.0.crate b/swh/loader/package/crates/tests/data/https_static.crates.io/crates_micro-timer_micro-timer-0.4.0.crate new file mode 100644 index 0000000..1b50fa5 Binary files /dev/null and b/swh/loader/package/crates/tests/data/https_static.crates.io/crates_micro-timer_micro-timer-0.4.0.crate differ diff --git a/swh/loader/package/crates/tests/test_crates.py b/swh/loader/package/crates/tests/test_crates.py new file mode 100644 index 0000000..e577462 --- /dev/null +++ b/swh/loader/package/crates/tests/test_crates.py @@ -0,0 +1,214 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information +from swh.loader.package.crates.loader import CratesLoader +from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats +from swh.model.hashutil import hash_to_bytes +from swh.model.model import ( + ObjectType, + Person, + Release, + Snapshot, + SnapshotBranch, + TargetType, + TimestampWithTimezone, +) + +CRATES_EXTRA = [ + { + "name": "hg-core", + "version": "0.0.1", + "url": "https://static.crates.io/crates/hg-core/hg-core-0.0.1.crate", + "checksum": "7fe168efadebadb9da6a329fdc027036e233b662285730cad27220e11e53c384", + }, + { + "name": "micro-timer", + "version": "0.4.0", + "url": "https://static.crates.io/crates/micro-timer/micro-timer-0.4.0.crate", + "checksum": "5de32cb59a062672560d6f0842c4aa7714727457b9fe2daf8987d995a176a405", + }, +] + + +def test_get_versions(requests_mock_datadir, swh_storage): + loader = CratesLoader( + swh_storage, + url=CRATES_EXTRA[1]["url"], + package_name=CRATES_EXTRA[1]["name"], + version=CRATES_EXTRA[1]["version"], + ) + assert loader.get_versions() == [ + "0.1.0", + "0.1.1", + "0.1.2", + "0.2.0", + "0.2.1", + "0.3.0", + "0.3.1", + "0.4.0", + ] + + +def test_get_default_version(requests_mock_datadir, swh_storage): + loader = CratesLoader( + swh_storage, + url=CRATES_EXTRA[1]["url"], + package_name=CRATES_EXTRA[1]["name"], + version=CRATES_EXTRA[1]["version"], + ) + assert loader.get_default_version() == "0.4.0" + + +def test_crate_origin_not_found(swh_storage, requests_mock_datadir): + url = "https://nowhere-to-run/nowhere-to-hide-0.0.1.crate" + loader = CratesLoader( + swh_storage, + url, + package_name="nowhere-to-hide", + version="0.0.1", + ) + + assert loader.load() == {"status": "failed"} + + assert_last_visit_matches( + swh_storage, url, status="not_found", type="crates", snapshot=None + ) + + +def test_crates_loader_load_one_version(datadir, requests_mock_datadir, swh_storage): + loader = CratesLoader( + swh_storage, + url=CRATES_EXTRA[0]["url"], + package_name=CRATES_EXTRA[0]["name"], + version=CRATES_EXTRA[0]["version"], + ) + actual_load_status = loader.load() + assert actual_load_status["status"] == "eventful" + assert actual_load_status["snapshot_id"] is not None + + expected_snapshot_id = "353cd6858c88ee8210432ea1098993c2e9966561" + expected_release_id = "d578833534017430f1b93eb741620899620c2505" + + assert expected_snapshot_id == actual_load_status["snapshot_id"] + + expected_snapshot = Snapshot( + id=hash_to_bytes(actual_load_status["snapshot_id"]), + branches={ + b"releases/0.0.1/hg-core-0.0.1.crate": SnapshotBranch( + target=hash_to_bytes(expected_release_id), + target_type=TargetType.RELEASE, + ), + b"HEAD": SnapshotBranch( + target=b"releases/0.0.1/hg-core-0.0.1.crate", + target_type=TargetType.ALIAS, + ), + }, + ) + check_snapshot(expected_snapshot, swh_storage) + + stats = get_stats(swh_storage) + assert { + "content": 1, + "directory": 2, + "origin": 1, + "origin_visit": 1, + "release": 1, + "revision": 0, + "skipped_content": 0, + "snapshot": 1, + } == stats + + assert swh_storage.release_get([hash_to_bytes(expected_release_id)])[0] == Release( + name=b"0.0.1", + message=b"Synthetic release for Crate source package hg-core version " + b"0.0.1\nMercurial pure Rust core library, with no assumption " + b"on Python bindings (FFI)\n", + target=hash_to_bytes("674c3b0b54628d55b93a79dc7adf304efc01b371"), + target_type=ObjectType.DIRECTORY, + synthetic=True, + author=Person.from_fullname(b"Georges Racinet "), + date=TimestampWithTimezone.from_iso8601("2019-04-16T18:48:11.404457+00:00"), + id=hash_to_bytes(expected_release_id), + ) + + +def test_crates_loader_load_n_versions(datadir, requests_mock_datadir, swh_storage): + url = CRATES_EXTRA[1]["url"] + loader = CratesLoader( + swh_storage, + url=url, + package_name=CRATES_EXTRA[1]["name"], + version=CRATES_EXTRA[1]["version"], + checksum=CRATES_EXTRA[1]["checksum"], + ) + actual_load_status = loader.load() + assert actual_load_status["status"] == "eventful" + assert actual_load_status["snapshot_id"] is not None + + expected_snapshot_id = "016cbbe3bb78424c35b898015a2d80d79359e2ad" + assert expected_snapshot_id == actual_load_status["snapshot_id"] + + expected_snapshot = Snapshot( + id=hash_to_bytes(expected_snapshot_id), + branches={ + b"releases/0.4.0/micro-timer-0.4.0.crate": SnapshotBranch( + target=hash_to_bytes("3237c1174c4ccfa8e934d1bfd8d80b3a89760e39"), + target_type=TargetType.RELEASE, + ), + b"releases/0.3.1/micro-timer-0.3.1.crate": SnapshotBranch( + target=hash_to_bytes("8b727a280051cdb90468ede2746e176e6fdf355f"), + target_type=TargetType.RELEASE, + ), + b"releases/0.3.0/micro-timer-0.3.0.crate": SnapshotBranch( + target=hash_to_bytes("f45ec236ae50fb37e924a3d2cc093e72b6cbf1cd"), + target_type=TargetType.RELEASE, + ), + b"releases/0.2.1/micro-timer-0.2.1.crate": SnapshotBranch( + target=hash_to_bytes("50a60a2c3696df7cd1b623bd7dbea2c89b994e42"), + target_type=TargetType.RELEASE, + ), + b"releases/0.2.0/micro-timer-0.2.0.crate": SnapshotBranch( + target=hash_to_bytes("f0592dc0ae05399d872017d0260c45b875cb590e"), + target_type=TargetType.RELEASE, + ), + b"releases/0.1.2/micro-timer-0.1.2.crate": SnapshotBranch( + target=hash_to_bytes("9220d7823fc40ab44e3ae3227522e7de672fad3e"), + target_type=TargetType.RELEASE, + ), + b"releases/0.1.1/micro-timer-0.1.1.crate": SnapshotBranch( + target=hash_to_bytes("38529b7e355f79fdce31a3ba891e146174e10237"), + target_type=TargetType.RELEASE, + ), + b"releases/0.1.0/micro-timer-0.1.0.crate": SnapshotBranch( + target=hash_to_bytes("5e5e6120af55b65c577e09331df54e70fad5e8b0"), + target_type=TargetType.RELEASE, + ), + b"HEAD": SnapshotBranch( + target=b"releases/0.4.0/micro-timer-0.4.0.crate", + target_type=TargetType.ALIAS, + ), + }, + ) + + check_snapshot(expected_snapshot, swh_storage) + + stats = get_stats(swh_storage) + assert { + "content": 8, + "directory": 16, + "origin": 1, + "origin_visit": 1, + "release": 8, + "revision": 0, + "skipped_content": 0, + "snapshot": 1, + } == stats + + assert_last_visit_matches( + swh_storage, + url, + status="full", + type="crates", + snapshot=expected_snapshot.id, + ) diff --git a/swh/loader/package/cran/tests/test_tasks.py b/swh/loader/package/crates/tests/test_tasks.py similarity index 59% copy from swh/loader/package/cran/tests/test_tasks.py copy to swh/loader/package/crates/tests/test_tasks.py index ae8a604..b38412f 100644 --- a/swh/loader/package/cran/tests/test_tasks.py +++ b/swh/loader/package/crates/tests/test_tasks.py @@ -1,24 +1,25 @@ -# Copyright (C) 2019-2021 The Software Heritage developers +# Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -def test_tasks_cran_loader( +def test_tasks_crates_loader( mocker, swh_scheduler_celery_app, swh_scheduler_celery_worker, swh_config ): - mock_load = mocker.patch("swh.loader.package.cran.loader.CRANLoader.load") + mock_load = mocker.patch("swh.loader.package.crates.loader.CratesLoader.load") mock_load.return_value = {"status": "eventful"} res = swh_scheduler_celery_app.send_task( - "swh.loader.package.cran.tasks.LoadCRAN", + "swh.loader.package.crates.tasks.LoadCrates", kwargs=dict( - url="some-url", - artifacts=[{"version": "1.2.3", "url": "artifact-url"}], + url="some-url/crates/some-package/some-package-0.0.1.crate", + package_name="some-package", + version="0.0.1", ), ) assert res res.wait() assert res.successful() assert mock_load.called assert res.result == {"status": "eventful"} diff --git a/swh/loader/package/debian/tasks.py b/swh/loader/package/debian/tasks.py index c821ea3..38d582b 100644 --- a/swh/loader/package/debian/tasks.py +++ b/swh/loader/package/debian/tasks.py @@ -1,15 +1,15 @@ -# Copyright (C) 2019-2021 The Software Heritage developers +# Copyright (C) 2019-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from celery import shared_task from swh.loader.package.debian.loader import DebianLoader @shared_task(name=__name__ + ".LoadDebian") -def load_deb(*, url, packages): +def load_deb(**kwargs): """Load Debian package""" - loader = DebianLoader.from_configfile(url=url, packages=packages) + loader = DebianLoader.from_configfile(**kwargs) return loader.load() diff --git a/swh/loader/package/debian/tests/test_tasks.py b/swh/loader/package/debian/tests/test_tasks.py index 048953d..f55979f 100644 --- a/swh/loader/package/debian/tests/test_tasks.py +++ b/swh/loader/package/debian/tests/test_tasks.py @@ -1,21 +1,65 @@ -# Copyright (C) 2019-2021 The Software Heritage developers +# Copyright (C) 2019-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import uuid -def test_tasks_debian_loader( - mocker, swh_scheduler_celery_app, swh_scheduler_celery_worker, swh_config -): +import pytest + +from swh.scheduler.model import ListedOrigin, Lister +from swh.scheduler.utils import create_origin_task_dict + + +@pytest.fixture(autouse=True) +def celery_worker_and_swh_config(swh_scheduler_celery_worker, swh_config): + pass + + +@pytest.fixture +def debian_lister(): + return Lister(name="debian-lister", instance_name="example", id=uuid.uuid4()) + + +@pytest.fixture +def debian_listed_origin(debian_lister): + return ListedOrigin( + lister_id=debian_lister.id, + url="https://debian.example.org/package", + visit_type="debian", + extra_loader_arguments={"packages": {}}, + ) + + +def test_tasks_debian_loader(mocker, swh_scheduler_celery_app): mock_load = mocker.patch("swh.loader.package.debian.loader.DebianLoader.load") mock_load.return_value = {"status": "eventful"} res = swh_scheduler_celery_app.send_task( "swh.loader.package.debian.tasks.LoadDebian", kwargs=dict(url="some-url", packages={}), ) assert res res.wait() assert res.successful() assert mock_load.called assert res.result == {"status": "eventful"} + + +def test_tasks_debian_loader_for_listed_origin( + mocker, swh_scheduler_celery_app, debian_lister, debian_listed_origin +): + mock_load = mocker.patch("swh.loader.package.debian.loader.DebianLoader.load") + mock_load.return_value = {"status": "eventful"} + + task_dict = create_origin_task_dict(debian_listed_origin, debian_lister) + + res = swh_scheduler_celery_app.send_task( + "swh.loader.package.debian.tasks.LoadDebian", + kwargs=task_dict["arguments"]["kwargs"], + ) + assert res + res.wait() + assert res.successful() + assert mock_load.called + assert res.result == {"status": "eventful"} diff --git a/swh/loader/package/deposit/tasks.py b/swh/loader/package/deposit/tasks.py index 187651b..6372ff2 100644 --- a/swh/loader/package/deposit/tasks.py +++ b/swh/loader/package/deposit/tasks.py @@ -1,14 +1,14 @@ -# Copyright (C) 2019-2021 The Software Heritage developers +# Copyright (C) 2019-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from celery import shared_task from swh.loader.package.deposit.loader import DepositLoader @shared_task(name=__name__ + ".LoadDeposit") -def load_deposit(*, url, deposit_id): +def load_deposit(**kwargs): """Load Deposit artifacts""" - return DepositLoader.from_configfile(url=url, deposit_id=deposit_id).load() + return DepositLoader.from_configfile(**kwargs).load() diff --git a/swh/loader/package/deposit/tests/test_tasks.py b/swh/loader/package/deposit/tests/test_tasks.py index cd63efd..1d82756 100644 --- a/swh/loader/package/deposit/tests/test_tasks.py +++ b/swh/loader/package/deposit/tests/test_tasks.py @@ -1,27 +1,80 @@ -# Copyright (C) 2019-2021 The Software Heritage developers +# Copyright (C) 2019-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import uuid + +import pytest + +from swh.scheduler.model import ListedOrigin, Lister +from swh.scheduler.utils import create_origin_task_dict + + +@pytest.fixture(autouse=True) +def celery_worker_and_swh_config(swh_scheduler_celery_worker, swh_config): + pass + + +@pytest.fixture +def deposit_lister(): + return Lister(name="deposit-lister", instance_name="example", id=uuid.uuid4()) + + +@pytest.fixture +def deposit_listed_origin(deposit_lister): + return ListedOrigin( + lister_id=deposit_lister.id, + url="https://example.org/project", + visit_type="deposit", + extra_loader_arguments={"deposit_id": "some-d-id"}, + ) + def test_tasks_deposit_loader( - mocker, swh_scheduler_celery_app, swh_scheduler_celery_worker, swh_config + mocker, + swh_scheduler_celery_app, ): mock_loader = mocker.patch( "swh.loader.package.deposit.loader.DepositLoader.from_configfile" ) mock_loader.return_value = mock_loader mock_loader.load.return_value = {"status": "eventful"} res = swh_scheduler_celery_app.send_task( "swh.loader.package.deposit.tasks.LoadDeposit", kwargs=dict( url="some-url", deposit_id="some-d-id", ), ) assert res res.wait() assert res.successful() assert mock_loader.called assert res.result == {"status": "eventful"} + + +def test_tasks_deposit_loader_for_listed_origin( + mocker, + swh_scheduler_celery_app, + deposit_lister, + deposit_listed_origin, +): + mock_loader = mocker.patch( + "swh.loader.package.deposit.loader.DepositLoader.from_configfile" + ) + mock_loader.return_value = mock_loader + mock_loader.load.return_value = {"status": "eventful"} + + task_dict = create_origin_task_dict(deposit_listed_origin, deposit_lister) + + res = swh_scheduler_celery_app.send_task( + "swh.loader.package.deposit.tasks.LoadDeposit", + kwargs=task_dict["arguments"]["kwargs"], + ) + assert res + res.wait() + assert res.successful() + assert mock_loader.called + assert res.result == {"status": "eventful"} diff --git a/swh/loader/package/maven/loader.py b/swh/loader/package/maven/loader.py index d664e67..e2d9d05 100644 --- a/swh/loader/package/maven/loader.py +++ b/swh/loader/package/maven/loader.py @@ -1,204 +1,206 @@ -# Copyright (C) 2021 The Software Heritage developers +# Copyright (C) 2021-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +from __future__ import annotations + from datetime import datetime, timezone import json import logging from os import path import string from typing import Any, Iterator, List, Optional, Sequence, Tuple import attr import iso8601 import requests from typing_extensions import TypedDict from swh.loader.package.loader import ( BasePackageInfo, PackageLoader, RawExtrinsicMetadataCore, ) from swh.loader.package.utils import EMPTY_AUTHOR, release_name from swh.model.model import ( MetadataAuthority, MetadataAuthorityType, ObjectType, RawExtrinsicMetadata, Release, Sha1Git, TimestampWithTimezone, ) from swh.storage.interface import StorageInterface logger = logging.getLogger(__name__) class ArtifactDict(TypedDict): """Data about a Maven artifact, passed by the Maven Lister.""" time: str """the time of the last update of jar file on the server as an iso8601 date string """ url: str """the artifact url to retrieve filename""" filename: Optional[str] """optionally, the file's name""" gid: str """artifact's groupId""" aid: str """artifact's artifactId""" version: str """artifact's version""" base_url: str """root URL of the Maven instance""" @attr.s class MavenPackageInfo(BasePackageInfo): time = attr.ib(type=datetime) """Timestamp of the last update of jar file on the server.""" gid = attr.ib(type=str) """Group ID of the maven artifact""" aid = attr.ib(type=str) """Artifact ID of the maven artifact""" version = attr.ib(type=str) """Version of the maven artifact""" base_url = attr.ib(type=str) """Root URL of the Maven instance""" # default format for maven artifacts MANIFEST_FORMAT = string.Template("$gid $aid $version $url $time") EXTID_TYPE = "maven-jar" EXTID_VERSION = 0 @classmethod - def from_metadata(cls, url: str, a_metadata: ArtifactDict) -> "MavenPackageInfo": + def from_metadata(cls, a_metadata: ArtifactDict) -> MavenPackageInfo: time = iso8601.parse_date(a_metadata["time"]).astimezone(tz=timezone.utc) + url = a_metadata["url"] return cls( url=url, filename=a_metadata.get("filename") or path.split(url)[-1], time=time, gid=a_metadata["gid"], aid=a_metadata["aid"], version=a_metadata["version"], base_url=a_metadata["base_url"], directory_extrinsic_metadata=[ RawExtrinsicMetadataCore( format="maven-json", metadata=json.dumps(a_metadata).encode(), ), ], ) class MavenLoader(PackageLoader[MavenPackageInfo]): """Load source code jar origin's artifact files into swh archive""" visit_type = "maven" def __init__( self, storage: StorageInterface, url: str, artifacts: Sequence[ArtifactDict], **kwargs: Any, ): """Loader constructor. For now, this is the lister's task output. There is one, and only one, artefact (jar or zip) per version, as guaranteed by the Maven coordinates system. Args: url: Origin url artifacts: List of single artifact information """ super().__init__(storage=storage, url=url, **kwargs) self.artifacts = artifacts # assume order is enforced in the lister self.version_artifact = { jar["version"]: jar for jar in artifacts if jar["version"] } if artifacts: base_urls = {jar["base_url"] for jar in artifacts} try: (self.base_url,) = base_urls except ValueError: raise ValueError( "Artifacts originate from more than one Maven instance: " + ", ".join(base_urls) ) from None else: # There is no artifact, so self.metadata_authority won't be called, # so self.base_url won't be accessed. pass def get_versions(self) -> Sequence[str]: return list(self.version_artifact) def get_default_version(self) -> str: # Default version is the last item return self.artifacts[-1]["version"] def get_metadata_authority(self): return MetadataAuthority(type=MetadataAuthorityType.FORGE, url=self.base_url) def build_extrinsic_directory_metadata( self, p_info: MavenPackageInfo, release_id: Sha1Git, directory_id: Sha1Git, ) -> List[RawExtrinsicMetadata]: # Rebuild POM URL. pom_url = path.dirname(p_info.url) pom_url = f"{pom_url}/{p_info.aid}-{p_info.version}.pom" r = requests.get(pom_url, allow_redirects=True) if r.status_code == 200: metadata_pom = r.content else: metadata_pom = b"" p_info.directory_extrinsic_metadata.append( RawExtrinsicMetadataCore( format="maven-pom", metadata=metadata_pom, ) ) return super().build_extrinsic_directory_metadata( p_info=p_info, release_id=release_id, directory_id=directory_id, ) def get_package_info(self, version: str) -> Iterator[Tuple[str, MavenPackageInfo]]: a_metadata = self.version_artifact[version] - yield release_name(a_metadata["version"]), MavenPackageInfo.from_metadata( - self.origin.url, a_metadata - ) + rel_name = release_name(a_metadata["version"]) + yield rel_name, MavenPackageInfo.from_metadata(a_metadata) def build_release( self, p_info: MavenPackageInfo, uncompressed_path: str, directory: Sha1Git ) -> Optional[Release]: msg = f"Synthetic release for archive at {p_info.url}\n".encode("utf-8") normalized_time = TimestampWithTimezone.from_datetime(p_info.time) return Release( name=p_info.version.encode(), message=msg, date=normalized_time, author=EMPTY_AUTHOR, target=directory, target_type=ObjectType.DIRECTORY, synthetic=True, ) diff --git a/swh/loader/package/maven/tasks.py b/swh/loader/package/maven/tasks.py index 49d2b0b..27fe787 100644 --- a/swh/loader/package/maven/tasks.py +++ b/swh/loader/package/maven/tasks.py @@ -1,15 +1,15 @@ # Copyright (C) 2021-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from celery import shared_task from swh.loader.package.maven.loader import MavenLoader @shared_task(name=__name__ + ".LoadMaven") -def load_maven(*, url=None, artifacts=None): +def load_maven(**kwargs): """Load maven jar artifacts.""" - loader = MavenLoader.from_configfile(url=url, artifacts=artifacts) + loader = MavenLoader.from_configfile(**kwargs) return loader.load() diff --git a/swh/loader/package/maven/tests/test_maven.py b/swh/loader/package/maven/tests/test_maven.py index 96b6ad6..36de2a7 100644 --- a/swh/loader/package/maven/tests/test_maven.py +++ b/swh/loader/package/maven/tests/test_maven.py @@ -1,618 +1,475 @@ # Copyright (C) 2019-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import datetime import hashlib +from itertools import chain import json +import os from pathlib import Path import pytest +from swh.core.tarball import uncompress from swh.loader.package import __version__ from swh.loader.package.maven.loader import MavenLoader, MavenPackageInfo from swh.loader.package.utils import EMPTY_AUTHOR from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats +from swh.model.from_disk import Directory, iter_directory from swh.model.hashutil import hash_to_bytes from swh.model.model import ( RawExtrinsicMetadata, Release, Snapshot, SnapshotBranch, TargetType, TimestampWithTimezone, ) from swh.model.model import MetadataAuthority, MetadataAuthorityType, MetadataFetcher from swh.model.model import ObjectType as ModelObjectType from swh.model.swhids import CoreSWHID, ExtendedObjectType, ExtendedSWHID, ObjectType from swh.storage.algos.snapshot import snapshot_get_all_branches REPO_BASE_URL = "https://repo1.maven.org/maven2/" -MVN_ARTIFACT_URLS = [ - f"{REPO_BASE_URL}al/aldi/sprova4j/0.1.0/sprova4j-0.1.0-sources.jar", - f"{REPO_BASE_URL}al/aldi/sprova4j/0.1.1/sprova4j-0.1.1-sources.jar", -] +MVN_ORIGIN_URL = f"{REPO_BASE_URL}al/aldi/sprova4j" MVN_ARTIFACTS = [ { "time": "2021-07-12 19:06:59.335000", "gid": "al.aldi", "aid": "sprova4j", "filename": "sprova4j-0.1.0-sources.jar", "version": "0.1.0", "base_url": REPO_BASE_URL, + "url": f"{REPO_BASE_URL}al/aldi/sprova4j/0.1.0/sprova4j-0.1.0-sources.jar", }, { "time": "2021-07-12 19:37:05.534000", "gid": "al.aldi", "aid": "sprova4j", "filename": "sprova4j-0.1.1-sources.jar", "version": "0.1.1", "base_url": REPO_BASE_URL, + "url": f"{REPO_BASE_URL}al/aldi/sprova4j/0.1.1/sprova4j-0.1.1-sources.jar", }, ] MVN_ARTIFACTS_POM = [ f"{REPO_BASE_URL}al/aldi/sprova4j/0.1.0/sprova4j-0.1.0.pom", f"{REPO_BASE_URL}al/aldi/sprova4j/0.1.1/sprova4j-0.1.1.pom", ] -_expected_new_contents_first_visit = [ - "cd807364cd7730022b3849f90ccf4bababbada84", - "79e33dd52ebdf615e6696ae69add91cb990d81e2", - "8002bd514156f05a0940ae14ef86eb0179cbd510", - "23479553a6ccec30d377dee0496123a65d23fd8c", - "07ffbebb933bc1660e448f07d8196c2b083797f9", - "abf021b581f80035b56153c9aa27195b8d7ebbb8", - "eec70ba80a6862ed2619727663b17eb0d9dfe131", - "81a493dacb44dedf623f29ecf62c0e035bf698de", - "bda85ed0bbecf8cddfea04234bee16f476f64fe4", - "1ec91d561f5bdf59acb417086e04c54ead94e94e", - "d517b423da707fa21378623f35facebff53cb59d", - "3f0f21a764972d79e583908991c893c999613354", - "a2dd4d7dfe6043baf9619081e4e29966989211af", - "f62685cf0c6825a4097c949280b584cf0e16d047", - "56afc1ea60cef6548ce0a34f44e91b0e4b063835", - "cf7c740926e7ebc9ac8978a5c4f0e1e7a0e9e3af", - "86ff828bea1c22ca3d50ed82569b9c59ce2c41a1", - "1d0fa04454d9fec31d8ee3f35b58158ca1e28b15", - "e90239a2c8d9ede61a29671a8b397a743e18fa34", - "ce8851005d084aea089bcd8cf01052f4b234a823", - "2c34ce622aa7fa68d104900840f66671718e6249", - "e6a6fec32dcb3bee93c34fc11b0174a6b0b0ec6d", - "405d3e1be4b658bf26de37f2c90c597b2796b9d7", - "d0d2f5848721e04300e537826ef7d2d6d9441df0", - "399c67e33e38c475fd724d283dd340f6a2e8dc91", - "dea10c1111cc61ac1809fb7e88857e3db054959f", -] - -_expected_json_metadata = { - "time": "2021-07-12 19:06:59.335000", - "gid": "al.aldi", - "aid": "sprova4j", - "filename": "sprova4j-0.1.0-sources.jar", - "version": "0.1.0", - "base_url": REPO_BASE_URL, -} -_expected_pom_metadata = ( - """ - - 4.0.0 - al.aldi - sprova4j - 0.1.0 - sprova4j - Java client for Sprova Test Management - https://github.com/aldialimucaj/sprova4j - 2018 - - - The Apache Software License, Version 2.0 - http://www.apache.org/licenses/LICENSE-2.0.txt - repo - - - - - aldi - Aldi Alimucaj - aldi.alimucaj@gmail.com - - - - scm:git:git://github.com/aldialimucaj/sprova4j.git - scm:git:git://github.com/aldialimucaj/sprova4j.git - https://github.com/aldialimucaj/sprova4j - - - - ch.qos.logback - logback-classic - 1.2.3 - runtime - - - com.google.code.gson - gson - 2.8.3 - runtime - - - com.squareup.okhttp3 - okhttp - 3.10.0 - runtime - - - com.squareup.okio - okio - 1.0.0 - runtime - - - org.glassfish - javax.json - 1.1.2 - runtime - - - javax.json - javax.json-api - 1.1.2 - runtime - - - javax.validation - validation-api - 2.0.1.Final - runtime - - - junit - junit - 4.12 - test - - - com.squareup.okhttp3 - mockwebserver - 3.10.0 - test - - - -""" -) - -_expected_new_directories_first_visit = [ - "6c9de41e4cebb91a8368da1d89ae9873bd540ec3", - "c1a2ee97fc47426d0179f94d223405336b5cd075", - "9e1bdca292765a9528af18743bd793b80362c768", - "193a7af634592ef27fb341762806f61e8fb8eab3", - "a297aa21e3dbf138b370be3aae7a852dd403bbbb", - "da84026119ae04022f007d5b3362e98d46d09045", - "75bb915942a9c441ca62aeffc3b634f1ec9ce5e2", - "0851d359283b2ad82b116c8d1b55ab14b1ec219c", - "2bcbb8b723a025ee9a36b719cea229ed38c37e46", -] - -_expected_new_release_first_visit = "02e83c29ec094db581f939d2e238d0613a4f59ac" - -REL_MSG = ( +REL_MSGS = ( b"Synthetic release for archive at https://repo1.maven.org/maven2/al/aldi/" - b"sprova4j/0.1.0/sprova4j-0.1.0-sources.jar\n" + b"sprova4j/0.1.0/sprova4j-0.1.0-sources.jar\n", + b"Synthetic release for archive at https://repo1.maven.org/maven2/al/aldi/" + b"sprova4j/0.1.1/sprova4j-0.1.1-sources.jar\n", ) -REVISION_DATE = TimestampWithTimezone.from_datetime( - datetime.datetime(2021, 7, 12, 19, 6, 59, 335000, tzinfo=datetime.timezone.utc) +REL_DATES = ( + TimestampWithTimezone.from_datetime( + datetime.datetime(2021, 7, 12, 19, 6, 59, 335000, tzinfo=datetime.timezone.utc) + ), + TimestampWithTimezone.from_datetime( + datetime.datetime(2021, 7, 12, 19, 37, 5, 534000, tzinfo=datetime.timezone.utc) + ), ) @pytest.fixture def data_jar_1(datadir): content = Path( datadir, "https_maven.org", "sprova4j-0.1.0-sources.jar" ).read_bytes() return content @pytest.fixture def data_pom_1(datadir): content = Path(datadir, "https_maven.org", "sprova4j-0.1.0.pom").read_bytes() return content @pytest.fixture def data_jar_2(datadir): content = Path( datadir, "https_maven.org", "sprova4j-0.1.1-sources.jar" ).read_bytes() return content @pytest.fixture def data_pom_2(datadir): content = Path(datadir, "https_maven.org", "sprova4j-0.1.1.pom").read_bytes() return content -def test_jar_visit_with_no_artifact_found(swh_storage, requests_mock_datadir): +@pytest.fixture +def jar_dirs(datadir, tmp_path): + jar_1_path = os.path.join(datadir, "https_maven.org", "sprova4j-0.1.0-sources.jar") + jar_2_path = os.path.join(datadir, "https_maven.org", "sprova4j-0.1.1-sources.jar") + + jar_1_extract_path = os.path.join(tmp_path, "jar_1") + jar_2_extract_path = os.path.join(tmp_path, "jar_2") + + uncompress(jar_1_path, jar_1_extract_path) + uncompress(jar_2_path, jar_2_extract_path) + + jar_1_dir = Directory.from_disk(path=jar_1_extract_path.encode()) + jar_2_dir = Directory.from_disk(path=jar_2_extract_path.encode()) + + return [jar_1_dir, jar_2_dir] + + +@pytest.fixture +def expected_contents_and_directories(jar_dirs): + jar_1_cnts, _, jar_1_dirs = iter_directory(jar_dirs[0]) + jar_2_cnts, _, jar_2_dirs = iter_directory(jar_dirs[1]) + + contents = {cnt.sha1 for cnt in chain(jar_1_cnts, jar_2_cnts)} + directories = {dir.id for dir in chain(jar_1_dirs, jar_2_dirs)} + + return contents, directories + + +@pytest.fixture +def expected_releases(jar_dirs): + return [ + Release( + name=b"0.1.0", + message=REL_MSGS[0], + author=EMPTY_AUTHOR, + date=REL_DATES[0], + target_type=ModelObjectType.DIRECTORY, + target=jar_dirs[0].hash, + synthetic=True, + metadata=None, + ), + Release( + name=b"0.1.1", + message=REL_MSGS[1], + author=EMPTY_AUTHOR, + date=REL_DATES[1], + target_type=ModelObjectType.DIRECTORY, + target=jar_dirs[1].hash, + synthetic=True, + metadata=None, + ), + ] + + +@pytest.fixture +def expected_snapshot(expected_releases): + return Snapshot( + branches={ + b"HEAD": SnapshotBranch( + target_type=TargetType.ALIAS, + target=b"releases/0.1.1", + ), + b"releases/0.1.0": SnapshotBranch( + target_type=TargetType.RELEASE, + target=expected_releases[0].id, + ), + b"releases/0.1.1": SnapshotBranch( + target_type=TargetType.RELEASE, + target=expected_releases[1].id, + ), + }, + ) + + +@pytest.fixture +def expected_json_metadata(): + return MVN_ARTIFACTS + + +@pytest.fixture +def expected_pom_metadata(data_pom_1, data_pom_2): + return [data_pom_1, data_pom_2] + + +@pytest.fixture(autouse=True) +def network_requests_mock( + requests_mock, + data_jar_1, + data_pom_1, + data_jar_2, + data_pom_2, +): + requests_mock.get(MVN_ARTIFACTS[0]["url"], content=data_jar_1) + requests_mock.get(MVN_ARTIFACTS_POM[0], content=data_pom_1) + requests_mock.get(MVN_ARTIFACTS[1]["url"], content=data_jar_2) + requests_mock.get(MVN_ARTIFACTS_POM[1], content=data_pom_2) + + +def test_maven_loader_visit_with_no_artifact_found(swh_storage, requests_mock_datadir): + origin_url = "https://ftp.g.o/unknown" unknown_artifact_url = "https://ftp.g.o/unknown/8sync-0.1.0.tar.gz" loader = MavenLoader( swh_storage, - unknown_artifact_url, + origin_url, artifacts=[ { "time": "2021-07-18 08:05:05.187000", "url": unknown_artifact_url, # unknown artifact "filename": "8sync-0.1.0.tar.gz", "gid": "al/aldi", "aid": "sprova4j", "version": "0.1.0", "base_url": "https://repo1.maven.org/maven2/", } ], ) actual_load_status = loader.load() assert actual_load_status["status"] == "uneventful" assert actual_load_status["snapshot_id"] is not None expected_snapshot_id = "1a8893e6a86f444e8be8e7bda6cb34fb1735a00e" assert actual_load_status["snapshot_id"] == expected_snapshot_id stats = get_stats(swh_storage) - assert_last_visit_matches( - swh_storage, unknown_artifact_url, status="partial", type="maven" - ) + assert_last_visit_matches(swh_storage, origin_url, status="partial", type="maven") assert { "content": 0, "directory": 0, "origin": 1, "origin_visit": 1, "release": 0, "revision": 0, "skipped_content": 0, "snapshot": 1, } == stats -def test_jar_visit_inconsistent_base_url( +def test_maven_loader_jar_visit_inconsistent_base_url( swh_storage, requests_mock, data_jar_1, data_pom_1 ): """With no prior visit, loading a jar ends up with 1 snapshot""" with pytest.raises(ValueError, match="more than one Maven instance"): MavenLoader( swh_storage, - MVN_ARTIFACT_URLS[0], + MVN_ORIGIN_URL, artifacts=[ MVN_ARTIFACTS[0], {**MVN_ARTIFACTS[1], "base_url": "http://maven.example/"}, ], ) -def test_jar_visit_with_release_artifact_no_prior_visit( - swh_storage, requests_mock, data_jar_1, data_pom_1 +def test_maven_loader_first_visit( + swh_storage, expected_contents_and_directories, expected_snapshot, expected_releases ): """With no prior visit, loading a jar ends up with 1 snapshot""" - requests_mock.get(MVN_ARTIFACT_URLS[0], content=data_jar_1) - requests_mock.get(MVN_ARTIFACTS_POM[0], content=data_pom_1) - loader = MavenLoader( - swh_storage, MVN_ARTIFACT_URLS[0], artifacts=[MVN_ARTIFACTS[0]] - ) + + loader = MavenLoader(swh_storage, MVN_ORIGIN_URL, artifacts=MVN_ARTIFACTS) actual_load_status = loader.load() assert actual_load_status["status"] == "eventful" - expected_snapshot_first_visit_id = hash_to_bytes( - "c5195b8ebd148649bf094561877964b131ab27e0" - ) - - expected_snapshot = Snapshot( - id=expected_snapshot_first_visit_id, - branches={ - b"HEAD": SnapshotBranch( - target_type=TargetType.ALIAS, - target=b"releases/0.1.0", - ), - b"releases/0.1.0": SnapshotBranch( - target_type=TargetType.RELEASE, - target=hash_to_bytes(_expected_new_release_first_visit), - ), - }, - ) actual_snapshot = snapshot_get_all_branches( swh_storage, hash_to_bytes(actual_load_status["snapshot_id"]) ) - assert actual_snapshot == expected_snapshot + assert actual_load_status["snapshot_id"] == expected_snapshot.id.hex() check_snapshot(expected_snapshot, swh_storage) - assert ( - hash_to_bytes(actual_load_status["snapshot_id"]) - == expected_snapshot_first_visit_id - ) - stats = get_stats(swh_storage) - assert_last_visit_matches( - swh_storage, MVN_ARTIFACT_URLS[0], status="full", type="maven" - ) + assert_last_visit_matches(swh_storage, MVN_ORIGIN_URL, status="full", type="maven") - expected_contents = map(hash_to_bytes, _expected_new_contents_first_visit) + expected_contents, expected_directories = expected_contents_and_directories assert list(swh_storage.content_missing_per_sha1(expected_contents)) == [] - - expected_dirs = map(hash_to_bytes, _expected_new_directories_first_visit) - assert list(swh_storage.directory_missing(expected_dirs)) == [] - - expected_rels = map(hash_to_bytes, {_expected_new_release_first_visit}) - assert list(swh_storage.release_missing(expected_rels)) == [] + assert list(swh_storage.directory_missing(expected_directories)) == [] rel_id = actual_snapshot.branches[b"releases/0.1.0"].target - (rel,) = swh_storage.release_get([rel_id]) - - assert rel == Release( - id=hash_to_bytes(_expected_new_release_first_visit), - name=b"0.1.0", - message=REL_MSG, - author=EMPTY_AUTHOR, - date=REVISION_DATE, - target_type=ModelObjectType.DIRECTORY, - target=hash_to_bytes("6c9de41e4cebb91a8368da1d89ae9873bd540ec3"), - synthetic=True, - metadata=None, - ) + rel2_id = actual_snapshot.branches[b"releases/0.1.1"].target + releases = swh_storage.release_get([rel_id, rel2_id]) + + assert releases == expected_releases assert { - "content": len(_expected_new_contents_first_visit), - "directory": len(_expected_new_directories_first_visit), + "content": len(expected_contents), + "directory": len(expected_directories), "origin": 1, "origin_visit": 1, - "release": 1, + "release": 2, "revision": 0, "skipped_content": 0, "snapshot": 1, } == stats -def test_jar_2_visits_without_change( - swh_storage, requests_mock_datadir, requests_mock, data_jar_2, data_pom_2 +def test_maven_loader_2_visits_without_change( + swh_storage, requests_mock, expected_snapshot ): - """With no prior visit, load a gnu project ends up with 1 snapshot""" - requests_mock.get(MVN_ARTIFACT_URLS[1], content=data_jar_2) - requests_mock.get(MVN_ARTIFACTS_POM[1], content=data_pom_2) - loader = MavenLoader( - swh_storage, MVN_ARTIFACT_URLS[1], artifacts=[MVN_ARTIFACTS[1]] - ) + """With no prior visit, load a maven project ends up with 1 snapshot""" + + loader = MavenLoader(swh_storage, MVN_ORIGIN_URL, artifacts=MVN_ARTIFACTS) actual_load_status = loader.load() assert actual_load_status["status"] == "eventful" - expected_snapshot_first_visit_id = hash_to_bytes( - "91dcacee7a6d2b54f9cab14bc14cb86d22d2ac2b" - ) - - assert ( - hash_to_bytes(actual_load_status["snapshot_id"]) - == expected_snapshot_first_visit_id - ) + assert actual_load_status["snapshot_id"] == expected_snapshot.id.hex() - assert_last_visit_matches( - swh_storage, MVN_ARTIFACT_URLS[1], status="full", type="maven" - ) + assert_last_visit_matches(swh_storage, MVN_ORIGIN_URL, status="full", type="maven") actual_load_status2 = loader.load() assert actual_load_status2["status"] == "uneventful" assert actual_load_status2["snapshot_id"] is not None assert actual_load_status["snapshot_id"] == actual_load_status2["snapshot_id"] - assert_last_visit_matches( - swh_storage, MVN_ARTIFACT_URLS[1], status="full", type="maven" - ) + assert_last_visit_matches(swh_storage, MVN_ORIGIN_URL, status="full", type="maven") # Make sure we have only one entry in history for the pom fetch, one for # the actual download of jar, and that they're correct. - urls_history = [str(req.url) for req in list(requests_mock_datadir.request_history)] + urls_history = [str(req.url) for req in list(requests_mock.request_history)] assert urls_history == [ - MVN_ARTIFACT_URLS[1], + MVN_ARTIFACTS[0]["url"], + MVN_ARTIFACTS_POM[0], + MVN_ARTIFACTS[1]["url"], MVN_ARTIFACTS_POM[1], ] -def test_metadata(swh_storage, requests_mock, data_jar_1, data_pom_1): +def test_maven_loader_extrinsic_metadata( + swh_storage, expected_releases, expected_json_metadata, expected_pom_metadata +): """With no prior visit, loading a jar ends up with 1 snapshot. Extrinsic metadata is the pom file associated to the source jar. """ - requests_mock.get(MVN_ARTIFACT_URLS[0], content=data_jar_1) - requests_mock.get(MVN_ARTIFACTS_POM[0], content=data_pom_1) - loader = MavenLoader( - swh_storage, MVN_ARTIFACT_URLS[0], artifacts=[MVN_ARTIFACTS[0]] - ) + loader = MavenLoader(swh_storage, MVN_ORIGIN_URL, artifacts=MVN_ARTIFACTS) actual_load_status = loader.load() assert actual_load_status["status"] == "eventful" - expected_release_id = hash_to_bytes(_expected_new_release_first_visit) - release = swh_storage.release_get([expected_release_id])[0] - assert release is not None + for i, expected_release in enumerate(expected_releases): - release_swhid = CoreSWHID( - object_type=ObjectType.RELEASE, object_id=expected_release_id - ) - directory_swhid = ExtendedSWHID( - object_type=ExtendedObjectType.DIRECTORY, object_id=release.target - ) - metadata_authority = MetadataAuthority( - type=MetadataAuthorityType.FORGE, - url=REPO_BASE_URL, - ) + expected_release_id = expected_release.id + release = swh_storage.release_get([expected_release_id])[0] + assert release is not None - expected_metadata = [ - RawExtrinsicMetadata( - target=directory_swhid, - authority=metadata_authority, - fetcher=MetadataFetcher( - name="swh.loader.package.maven.loader.MavenLoader", - version=__version__, + release_swhid = CoreSWHID( + object_type=ObjectType.RELEASE, object_id=expected_release_id + ) + directory_swhid = ExtendedSWHID( + object_type=ExtendedObjectType.DIRECTORY, object_id=release.target + ) + metadata_authority = MetadataAuthority( + type=MetadataAuthorityType.FORGE, + url=REPO_BASE_URL, + ) + + expected_metadata = [ + RawExtrinsicMetadata( + target=directory_swhid, + authority=metadata_authority, + fetcher=MetadataFetcher( + name="swh.loader.package.maven.loader.MavenLoader", + version=__version__, + ), + discovery_date=loader.visit_date, + format="maven-pom", + metadata=expected_pom_metadata[i], + origin=MVN_ORIGIN_URL, + release=release_swhid, ), - discovery_date=loader.visit_date, - format="maven-pom", - metadata=_expected_pom_metadata.encode(), - origin=MVN_ARTIFACT_URLS[0], - release=release_swhid, - ), - RawExtrinsicMetadata( - target=directory_swhid, - authority=metadata_authority, - fetcher=MetadataFetcher( - name="swh.loader.package.maven.loader.MavenLoader", - version=__version__, + RawExtrinsicMetadata( + target=directory_swhid, + authority=metadata_authority, + fetcher=MetadataFetcher( + name="swh.loader.package.maven.loader.MavenLoader", + version=__version__, + ), + discovery_date=loader.visit_date, + format="maven-json", + metadata=json.dumps(expected_json_metadata[i]).encode(), + origin=MVN_ORIGIN_URL, + release=release_swhid, ), - discovery_date=loader.visit_date, - format="maven-json", - metadata=json.dumps(_expected_json_metadata).encode(), - origin=MVN_ARTIFACT_URLS[0], - release=release_swhid, - ), - ] + ] - res = swh_storage.raw_extrinsic_metadata_get(directory_swhid, metadata_authority) - assert res.next_page_token is None - assert set(res.results) == set(expected_metadata) + res = swh_storage.raw_extrinsic_metadata_get( + directory_swhid, metadata_authority + ) + assert res.next_page_token is None + assert set(res.results) == set(expected_metadata) -def test_metadata_no_pom(swh_storage, requests_mock, data_jar_1): +def test_maven_loader_extrinsic_metadata_no_pom( + swh_storage, requests_mock, expected_releases, expected_json_metadata +): """With no prior visit, loading a jar ends up with 1 snapshot. Extrinsic metadata is None if the pom file cannot be retrieved. """ - artifact_url = MVN_ARTIFACT_URLS[0] - requests_mock.get(artifact_url, content=data_jar_1) + requests_mock.get(MVN_ARTIFACTS_POM[0], status_code="404") - loader = MavenLoader(swh_storage, artifact_url, artifacts=[MVN_ARTIFACTS[0]]) + loader = MavenLoader(swh_storage, MVN_ORIGIN_URL, artifacts=MVN_ARTIFACTS) actual_load_status = loader.load() assert actual_load_status["status"] == "eventful" - expected_release_id = hash_to_bytes(_expected_new_release_first_visit) + expected_release_id = expected_releases[0].id release = swh_storage.release_get([expected_release_id])[0] assert release is not None release_swhid = CoreSWHID( object_type=ObjectType.RELEASE, object_id=expected_release_id ) directory_swhid = ExtendedSWHID( object_type=ExtendedObjectType.DIRECTORY, object_id=release.target ) metadata_authority = MetadataAuthority( type=MetadataAuthorityType.FORGE, url=REPO_BASE_URL, ) expected_metadata = [ RawExtrinsicMetadata( target=directory_swhid, authority=metadata_authority, fetcher=MetadataFetcher( name="swh.loader.package.maven.loader.MavenLoader", version=__version__, ), discovery_date=loader.visit_date, format="maven-pom", metadata=b"", - origin=artifact_url, + origin=MVN_ORIGIN_URL, release=release_swhid, ), RawExtrinsicMetadata( target=directory_swhid, authority=metadata_authority, fetcher=MetadataFetcher( name="swh.loader.package.maven.loader.MavenLoader", version=__version__, ), discovery_date=loader.visit_date, format="maven-json", - metadata=json.dumps(_expected_json_metadata).encode(), - origin=artifact_url, + metadata=json.dumps(expected_json_metadata[0]).encode(), + origin=MVN_ORIGIN_URL, release=release_swhid, ), ] res = swh_storage.raw_extrinsic_metadata_get(directory_swhid, metadata_authority) assert res.next_page_token is None assert set(res.results) == set(expected_metadata) -def test_jar_extid(): +def test_maven_loader_jar_extid(): """Compute primary key should return the right identity""" - metadata = MVN_ARTIFACTS[0] - # metadata.pop("url", None) - url = MVN_ARTIFACT_URLS[0] - p_info = MavenPackageInfo(url=url, **metadata) + p_info = MavenPackageInfo(**metadata) - expected_manifest = "{gid} {aid} {version} {url} {time}".format( - url=url, **metadata - ).encode() + expected_manifest = "{gid} {aid} {version} {url} {time}".format(**metadata).encode() actual_id = p_info.extid() assert actual_id == ( "maven-jar", 0, hashlib.sha256(expected_manifest).digest(), ) - - -def test_jar_snapshot_append( - swh_storage, - requests_mock_datadir, - requests_mock, - data_jar_1, - data_pom_1, - data_jar_2, - data_pom_2, -): - - # first loading with a first artifact - artifact1 = MVN_ARTIFACTS[0] - url1 = MVN_ARTIFACT_URLS[0] - requests_mock.get(url1, content=data_jar_1) - requests_mock.get(MVN_ARTIFACTS_POM[0], content=data_pom_1) - loader = MavenLoader(swh_storage, url1, [artifact1]) - actual_load_status = loader.load() - assert actual_load_status["status"] == "eventful" - assert actual_load_status["snapshot_id"] is not None - assert_last_visit_matches(swh_storage, url1, status="full", type="maven") - - # check expected snapshot - snapshot = loader.last_snapshot() - assert len(snapshot.branches) == 2 - branch_artifact1_name = f"releases/{artifact1['version']}".encode() - assert b"HEAD" in snapshot.branches - assert branch_artifact1_name in snapshot.branches - assert snapshot.branches[b"HEAD"].target == branch_artifact1_name - - # second loading with a second artifact - artifact2 = MVN_ARTIFACTS[1] - url2 = MVN_ARTIFACT_URLS[1] - requests_mock.get(url2, content=data_jar_2) - requests_mock.get(MVN_ARTIFACTS_POM[1], content=data_pom_2) - loader = MavenLoader(swh_storage, url2, [artifact2]) - actual_load_status = loader.load() - assert actual_load_status["status"] == "eventful" - assert actual_load_status["snapshot_id"] is not None - assert_last_visit_matches(swh_storage, url2, status="full", type="maven") - - # check expected snapshot, should contain a new branch and the - # branch for the first artifact - snapshot = loader.last_snapshot() - assert len(snapshot.branches) == 2 - branch_artifact2_name = f"releases/{artifact2['version']}".encode() - assert b"HEAD" in snapshot.branches - assert branch_artifact2_name in snapshot.branches - assert branch_artifact1_name not in snapshot.branches - assert snapshot.branches[b"HEAD"].target == branch_artifact2_name diff --git a/swh/loader/package/maven/tests/test_tasks.py b/swh/loader/package/maven/tests/test_tasks.py index 479dce0..71773a5 100644 --- a/swh/loader/package/maven/tests/test_tasks.py +++ b/swh/loader/package/maven/tests/test_tasks.py @@ -1,54 +1,86 @@ -# Copyright (C) 2019-2021 The Software Heritage developers +# Copyright (C) 2019-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import uuid + +import pytest + +from swh.scheduler.model import ListedOrigin, Lister +from swh.scheduler.utils import create_origin_task_dict + MVN_ARTIFACTS = [ { "time": 1626109619335, "url": "https://repo1.maven.org/maven2/al/aldi/sprova4j/0.1.0/" + "sprova4j-0.1.0.jar", "gid": "al.aldi", "aid": "sprova4j", "filename": "sprova4j-0.1.0.jar", "version": "0.1.0", "base_url": "https://repo1.maven.org/maven2/", }, ] +@pytest.fixture(autouse=True) +def celery_worker_and_swh_config(swh_scheduler_celery_worker, swh_config): + pass + + +@pytest.fixture +def maven_lister(): + return Lister(name="maven-lister", instance_name="example", id=uuid.uuid4()) + + +@pytest.fixture +def maven_listed_origin(maven_lister): + return ListedOrigin( + lister_id=maven_lister.id, + url=MVN_ARTIFACTS[0]["url"], + visit_type="maven", + extra_loader_arguments={ + "artifacts": MVN_ARTIFACTS, + }, + ) + + def test_tasks_maven_loader( - mocker, swh_scheduler_celery_app, swh_scheduler_celery_worker, swh_config + mocker, + swh_scheduler_celery_app, ): mock_load = mocker.patch("swh.loader.package.maven.loader.MavenLoader.load") mock_load.return_value = {"status": "eventful"} res = swh_scheduler_celery_app.send_task( "swh.loader.package.maven.tasks.LoadMaven", kwargs=dict( url=MVN_ARTIFACTS[0]["url"], artifacts=MVN_ARTIFACTS, ), ) assert res res.wait() assert res.successful() assert mock_load.called assert res.result == {"status": "eventful"} -def test_tasks_maven_loader_snapshot_append( - mocker, swh_scheduler_celery_app, swh_scheduler_celery_worker, swh_config +def test_tasks_maven_loader_for_listed_origin( + mocker, swh_scheduler_celery_app, maven_lister, maven_listed_origin ): mock_load = mocker.patch("swh.loader.package.maven.loader.MavenLoader.load") mock_load.return_value = {"status": "eventful"} + task_dict = create_origin_task_dict(maven_listed_origin, maven_lister) + res = swh_scheduler_celery_app.send_task( "swh.loader.package.maven.tasks.LoadMaven", - kwargs=dict(url=MVN_ARTIFACTS[0]["url"], artifacts=[]), + kwargs=task_dict["arguments"]["kwargs"], ) assert res res.wait() assert res.successful() assert mock_load.called assert res.result == {"status": "eventful"} diff --git a/swh/loader/package/nixguix/tasks.py b/swh/loader/package/nixguix/tasks.py index 39ddf48..44abd63 100644 --- a/swh/loader/package/nixguix/tasks.py +++ b/swh/loader/package/nixguix/tasks.py @@ -1,14 +1,14 @@ -# Copyright (C) 2020-2021 The Software Heritage developers +# Copyright (C) 2020-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from celery import shared_task from swh.loader.package.nixguix.loader import NixGuixLoader @shared_task(name=__name__ + ".LoadNixguix") -def load_nixguix(*, url=None): +def load_nixguix(**kwargs): """Load functional (e.g. guix/nix) package""" - return NixGuixLoader.from_configfile(url=url).load() + return NixGuixLoader.from_configfile(**kwargs).load() diff --git a/swh/loader/package/nixguix/tests/test_tasks.py b/swh/loader/package/nixguix/tests/test_tasks.py index edb06e2..5249568 100644 --- a/swh/loader/package/nixguix/tests/test_tasks.py +++ b/swh/loader/package/nixguix/tests/test_tasks.py @@ -1,23 +1,72 @@ -# Copyright (C) 2020-2021 The Software Heritage developers +# Copyright (C) 2020-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import uuid + +import pytest + +from swh.scheduler.model import ListedOrigin, Lister +from swh.scheduler.utils import create_origin_task_dict + + +@pytest.fixture(autouse=True) +def celery_worker_and_swh_config(swh_scheduler_celery_worker, swh_config): + pass + + +@pytest.fixture +def nixguix_lister(): + return Lister(name="nixguix-lister", instance_name="example", id=uuid.uuid4()) + + +@pytest.fixture +def nixguix_listed_origin(nixguix_lister): + return ListedOrigin( + lister_id=nixguix_lister.id, + url="https://nixguix.example.org/", + visit_type="nixguix", + ) + def test_tasks_nixguix_loader( - mocker, swh_scheduler_celery_app, swh_scheduler_celery_worker, swh_config + mocker, + swh_scheduler_celery_app, ): mock_loader = mocker.patch( "swh.loader.package.nixguix.loader.NixGuixLoader.from_configfile" ) mock_loader.return_value = mock_loader mock_loader.load.return_value = {"status": "eventful"} res = swh_scheduler_celery_app.send_task( "swh.loader.package.nixguix.tasks.LoadNixguix", kwargs=dict(url="some-url") ) assert res res.wait() assert res.successful() assert mock_loader.called assert res.result == {"status": "eventful"} + + +def test_tasks_nixguix_loader_for_listed_origin( + mocker, swh_scheduler_celery_app, nixguix_lister, nixguix_listed_origin +): + mock_loader = mocker.patch( + "swh.loader.package.nixguix.loader.NixGuixLoader.from_configfile" + ) + mock_loader.return_value = mock_loader + mock_loader.load.return_value = {"status": "eventful"} + + task_dict = create_origin_task_dict(nixguix_listed_origin, nixguix_lister) + + res = swh_scheduler_celery_app.send_task( + "swh.loader.package.nixguix.tasks.LoadNixguix", + kwargs=task_dict["arguments"]["kwargs"], + ) + assert res + res.wait() + assert res.successful() + assert mock_loader.called + assert res.result == {"status": "eventful"} diff --git a/swh/loader/package/npm/tasks.py b/swh/loader/package/npm/tasks.py index 43fc0bf..991a349 100644 --- a/swh/loader/package/npm/tasks.py +++ b/swh/loader/package/npm/tasks.py @@ -1,14 +1,14 @@ -# Copyright (C) 2019-2021 The Software Heritage developers +# Copyright (C) 2019-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from celery import shared_task from swh.loader.package.npm.loader import NpmLoader @shared_task(name=__name__ + ".LoadNpm") -def load_npm(*, url: str): +def load_npm(**kwargs): """Load Npm package""" - return NpmLoader.from_configfile(url=url).load() + return NpmLoader.from_configfile(**kwargs).load() diff --git a/swh/loader/package/npm/tests/test_tasks.py b/swh/loader/package/npm/tests/test_tasks.py index 4cdbb36..83b4d5d 100644 --- a/swh/loader/package/npm/tests/test_tasks.py +++ b/swh/loader/package/npm/tests/test_tasks.py @@ -1,21 +1,67 @@ -# Copyright (C) 2019-2021 The Software Heritage developers +# Copyright (C) 2019-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import uuid + +import pytest + +from swh.scheduler.model import ListedOrigin, Lister +from swh.scheduler.utils import create_origin_task_dict + + +@pytest.fixture(autouse=True) +def celery_worker_and_swh_config(swh_scheduler_celery_worker, swh_config): + pass + + +@pytest.fixture +def npm_lister(): + return Lister(name="npm-lister", instance_name="npm", id=uuid.uuid4()) + + +@pytest.fixture +def npm_listed_origin(npm_lister): + return ListedOrigin( + lister_id=npm_lister.id, + url="https://www.npmjs.com/package/some-package", + visit_type="npm", + ) + def test_tasks_npm_loader( - mocker, swh_scheduler_celery_app, swh_scheduler_celery_worker, swh_config + mocker, + swh_scheduler_celery_app, ): mock_load = mocker.patch("swh.loader.package.npm.loader.NpmLoader.load") mock_load.return_value = {"status": "eventful"} res = swh_scheduler_celery_app.send_task( "swh.loader.package.npm.tasks.LoadNpm", kwargs=dict(url="https://www.npmjs.com/package/some-package"), ) assert res res.wait() assert res.successful() assert mock_load.called assert res.result == {"status": "eventful"} + + +def test_tasks_npm_loader_for_listed_origin( + mocker, swh_scheduler_celery_app, npm_lister, npm_listed_origin +): + mock_load = mocker.patch("swh.loader.package.npm.loader.NpmLoader.load") + mock_load.return_value = {"status": "eventful"} + + task_dict = create_origin_task_dict(npm_listed_origin, npm_lister) + + res = swh_scheduler_celery_app.send_task( + "swh.loader.package.npm.tasks.LoadNpm", + kwargs=task_dict["arguments"]["kwargs"], + ) + assert res + res.wait() + assert res.successful() + assert mock_load.called + assert res.result == {"status": "eventful"} diff --git a/swh/loader/package/opam/tasks.py b/swh/loader/package/opam/tasks.py index 2ca7f7a..28417a6 100644 --- a/swh/loader/package/opam/tasks.py +++ b/swh/loader/package/opam/tasks.py @@ -1,20 +1,14 @@ -# Copyright (C) 2021 The Software Heritage developers +# Copyright (C) 2021-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from celery import shared_task from swh.loader.package.opam.loader import OpamLoader @shared_task(name=__name__ + ".LoadOpam") -def load_opam(url, opam_root, opam_instance, opam_url, opam_package): +def load_opam(**kwargs): """Load Opam's artifacts""" - return OpamLoader.from_configfile( - url=url, - opam_root=opam_root, - opam_instance=opam_instance, - opam_url=opam_url, - opam_package=opam_package, - ).load() + return OpamLoader.from_configfile(**kwargs).load() diff --git a/swh/loader/package/opam/tests/test_tasks.py b/swh/loader/package/opam/tests/test_tasks.py index 8fe996c..eec635e 100644 --- a/swh/loader/package/opam/tests/test_tasks.py +++ b/swh/loader/package/opam/tests/test_tasks.py @@ -1,27 +1,78 @@ -# Copyright (C) 2019-2021 The Software Heritage developers +# Copyright (C) 2019-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import uuid + +import pytest + +from swh.scheduler.model import ListedOrigin, Lister +from swh.scheduler.utils import create_origin_task_dict + +OPAM_LOADER_ARGS = { + "url": "opam+https://opam.ocaml.org/packages/agrid", + "opam_root": "/tmp/test_tasks_opam_loader", + "opam_instance": "test_tasks_opam_loader", + "opam_url": "https://opam.ocaml.org", + "opam_package": "agrid", +} + + +@pytest.fixture(autouse=True) +def celery_worker_and_swh_config(swh_scheduler_celery_worker, swh_config): + pass + + +@pytest.fixture +def opam_lister(): + return Lister(name="opam-lister", instance_name="example", id=uuid.uuid4()) + + +@pytest.fixture +def opam_listed_origin(opam_lister): + return ListedOrigin( + lister_id=opam_lister.id, + url=OPAM_LOADER_ARGS["url"], + visit_type="opam", + extra_loader_arguments={ + k: v for k, v in OPAM_LOADER_ARGS.items() if k != "url" + }, + ) + def test_tasks_opam_loader( - mocker, swh_scheduler_celery_app, swh_scheduler_celery_worker, swh_config + mocker, + swh_scheduler_celery_app, +): + mock_load = mocker.patch("swh.loader.package.opam.loader.OpamLoader.load") + mock_load.return_value = {"status": "eventful"} + + res = swh_scheduler_celery_app.send_task( + "swh.loader.package.opam.tasks.LoadOpam", + kwargs=OPAM_LOADER_ARGS, + ) + assert res + res.wait() + assert res.successful() + assert mock_load.called + assert res.result == {"status": "eventful"} + + +def test_tasks_opam_loader_for_listed_origin( + mocker, swh_scheduler_celery_app, opam_lister, opam_listed_origin ): mock_load = mocker.patch("swh.loader.package.opam.loader.OpamLoader.load") mock_load.return_value = {"status": "eventful"} + task_dict = create_origin_task_dict(opam_listed_origin, opam_lister) + res = swh_scheduler_celery_app.send_task( "swh.loader.package.opam.tasks.LoadOpam", - args=( - "opam+https://opam.ocaml.org/packages/agrid", # url - "/tmp/test_tasks_opam_loader", # opam_root - "test_tasks_opam_loader", # opam_instance - "https://opam.ocaml.org", # opam_url - "agrid", # opam_package - ), + kwargs=task_dict["arguments"]["kwargs"], ) assert res res.wait() assert res.successful() assert mock_load.called assert res.result == {"status": "eventful"} diff --git a/swh/loader/package/pypi/tasks.py b/swh/loader/package/pypi/tasks.py index 45a60c3..1917e54 100644 --- a/swh/loader/package/pypi/tasks.py +++ b/swh/loader/package/pypi/tasks.py @@ -1,14 +1,14 @@ -# Copyright (C) 2019-2021 The Software Heritage developers +# Copyright (C) 2019-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from celery import shared_task from swh.loader.package.pypi.loader import PyPILoader @shared_task(name=__name__ + ".LoadPyPI") -def load_pypi(*, url=None): +def load_pypi(**kwargs): """Load PyPI package""" - return PyPILoader.from_configfile(url=url).load() + return PyPILoader.from_configfile(**kwargs).load() diff --git a/swh/loader/package/pypi/tests/test_tasks.py b/swh/loader/package/pypi/tests/test_tasks.py index 35387b0..c294762 100644 --- a/swh/loader/package/pypi/tests/test_tasks.py +++ b/swh/loader/package/pypi/tests/test_tasks.py @@ -1,20 +1,69 @@ -# Copyright (C) 2019-2021 The Software Heritage developers +# Copyright (C) 2019-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import uuid + +import pytest + +from swh.scheduler.model import ListedOrigin, Lister +from swh.scheduler.utils import create_origin_task_dict + + +@pytest.fixture(autouse=True) +def celery_worker_and_swh_config(swh_scheduler_celery_worker, swh_config): + pass + + +@pytest.fixture +def pypi_lister(): + return Lister(name="pypi-lister", instance_name="example", id=uuid.uuid4()) + + +@pytest.fixture +def pypi_listed_origin(pypi_lister): + return ListedOrigin( + lister_id=pypi_lister.id, + url="https://pypi.example.org/package", + visit_type="pypi", + ) + def test_tasks_pypi_loader( - mocker, swh_scheduler_celery_app, swh_scheduler_celery_worker, swh_config + mocker, + swh_scheduler_celery_app, ): mock_load = mocker.patch("swh.loader.package.pypi.loader.PyPILoader.load") mock_load.return_value = {"status": "eventful"} res = swh_scheduler_celery_app.send_task( "swh.loader.package.pypi.tasks.LoadPyPI", kwargs=dict(url="some-url") ) assert res res.wait() assert res.successful() assert mock_load.called assert res.result == {"status": "eventful"} + + +def test_tasks_pypi_loader_for_listed_origin( + mocker, + swh_scheduler_celery_app, + pypi_lister, + pypi_listed_origin, +): + mock_load = mocker.patch("swh.loader.package.pypi.loader.PyPILoader.load") + mock_load.return_value = {"status": "eventful"} + + task_dict = create_origin_task_dict(pypi_listed_origin, pypi_lister) + + res = swh_scheduler_celery_app.send_task( + "swh.loader.package.pypi.tasks.LoadPyPI", + kwargs=task_dict["arguments"]["kwargs"], + ) + assert res + res.wait() + assert res.successful() + assert mock_load.called + assert res.result == {"status": "eventful"}