diff --git a/.gitignore b/.gitignore index b349889..0154454 100644 --- a/.gitignore +++ b/.gitignore @@ -1,13 +1,14 @@ *.pyc *.sw? *~ /.coverage /.coverage.* .eggs/ __pycache__ *.egg-info/ build/ dist/ version.txt .tox/ .mypy_cache/ +.pc/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7e4b296..9a5ebc0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,51 +1,44 @@ repos: -- repo: https://github.com/pre-commit/pre-commit-hooks - rev: v2.4.0 - hooks: - - id: trailing-whitespace - - id: check-json - - id: check-yaml + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.1.0 + hooks: + - id: trailing-whitespace + - id: check-json + - id: check-yaml -- repo: https://gitlab.com/pycqa/flake8 - rev: 3.8.3 - hooks: - - id: flake8 + - repo: https://gitlab.com/pycqa/flake8 + rev: 4.0.1 + hooks: + - id: flake8 -- repo: https://github.com/codespell-project/codespell - rev: v1.16.0 - hooks: - - id: codespell - exclude: ^(swh/loader/package/.*[/]+tests/data/.*)$ - entry: codespell --ignore-words-list=iff + - repo: https://github.com/codespell-project/codespell + rev: v2.1.0 + hooks: + - id: codespell + name: Check source code spelling + exclude: ^(swh/loader/package/.*[/]+tests/data/.*)$ + entry: codespell --ignore-words-list=iff + stages: [commit] + - id: codespell + name: Check commit message spelling + stages: [commit-msg] -- repo: local - hooks: - - id: mypy - name: mypy - entry: mypy - args: [swh] - pass_filenames: false - language: system - types: [python] + - repo: local + hooks: + - id: mypy + name: mypy + entry: mypy + args: [swh] + pass_filenames: false + language: system + types: [python] -- repo: https://github.com/PyCQA/isort - rev: 5.5.2 - hooks: - - id: isort - -- repo: https://github.com/python/black - rev: 19.10b0 - hooks: - - id: black - -# unfortunately, we are far from being able to enable this... -# - repo: https://github.com/PyCQA/pydocstyle.git -# rev: 4.0.0 -# hooks: -# - id: pydocstyle -# name: pydocstyle -# description: pydocstyle is a static analysis tool for checking compliance with Python docstring conventions. -# entry: pydocstyle --convention=google -# language: python -# types: [python] + - repo: https://github.com/PyCQA/isort + rev: 5.10.1 + hooks: + - id: isort + - repo: https://github.com/python/black + rev: 19.10b0 + hooks: + - id: black diff --git a/PKG-INFO b/PKG-INFO index 1c6d837..3389dbe 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,56 +1,56 @@ Metadata-Version: 2.1 Name: swh.loader.core -Version: 2.5.0 +Version: 2.5.4 Summary: Software Heritage Base Loader Home-page: https://forge.softwareheritage.org/diffusion/DLDBASE Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-loader-core Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-loader-core/ Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Requires-Python: >=3.7 Description-Content-Type: text/markdown Provides-Extra: testing License-File: LICENSE License-File: AUTHORS Software Heritage - Loader foundations ====================================== The Software Heritage Loader Core is a low-level loading utilities and helpers used by :term:`loaders `. The main entry points are classes: - :class:`swh.loader.core.loader.BaseLoader` for loaders (e.g. svn) - :class:`swh.loader.core.loader.DVCSLoader` for DVCS loaders (e.g. hg, git, ...) - :class:`swh.loader.package.loader.PackageLoader` for Package loaders (e.g. PyPI, Npm, ...) Package loaders --------------- This package also implements many package loaders directly, out of convenience, as they usually are quite similar and each fits in a single file. They all roughly follow these steps, explained in the :py:meth:`swh.loader.package.loader.PackageLoader.load` documentation. See the :ref:`package-loader-tutorial` for details. VCS loaders ----------- Unlike package loaders, VCS loaders remain in separate packages, as they often need more advanced conversions and very VCS-specific operations. This usually involves getting the branches of a repository and recursively loading revisions in the history (and directory trees in these revisions), until a known revision is found diff --git a/debian/changelog b/debian/changelog index 6b3a5e6..786cf16 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,1631 +1,1672 @@ -swh-loader-core (2.5.0-1~swh1~bpo10+1) buster-swh; urgency=medium +swh-loader-core (2.5.4-1~swh2) unstable-swh; urgency=medium - * Rebuild for buster-swh + * Bump new release with opam tests deactivated - -- Software Heritage autobuilder (on jenkins-debian1) Tue, 08 Feb 2022 09:52:17 +0000 + -- Antoine R. Dumont (@ardumont) Fri, 25 Feb 2022 12:40:40 +0100 + +swh-loader-core (2.5.4-1~swh1) unstable-swh; urgency=medium + + * New upstream release 2.5.4 - (tagged by Antoine R. Dumont + (@ardumont) on 2022-02-25 10:23:51 + +0100) + * Upstream changes: - v2.5.4 - loader/opam/tests: Do not run + actual opam init command call + + -- Software Heritage autobuilder (on jenkins-debian1) Fri, 25 Feb 2022 09:28:10 +0000 + +swh-loader-core (2.5.3-1~swh1) unstable-swh; urgency=medium + + * New upstream release 2.5.3 - (tagged by Antoine R. Dumont + (@ardumont) on 2022-02-24 16:02:53 + +0100) + * Upstream changes: - v2.5.3 - opam: Allow build to run the + opam init completely + + -- Software Heritage autobuilder (on jenkins-debian1) Thu, 24 Feb 2022 15:07:20 +0000 + +swh-loader-core (2.5.2-1~swh1) unstable-swh; urgency=medium + + * New upstream release 2.5.2 - (tagged by Valentin Lorentz + on 2022-02-24 09:52:26 +0100) + * Upstream changes: - v2.5.2 - * deposit: Remove unused + raw_info + + -- Software Heritage autobuilder (on jenkins-debian1) Thu, 24 Feb 2022 08:57:52 +0000 + +swh-loader-core (2.5.1-1~swh1) unstable-swh; urgency=medium + + * New upstream release 2.5.1 - (tagged by Antoine R. Dumont + (@ardumont) on 2022-02-16 15:27:02 + +0100) + * Upstream changes: - v2.5.1 - Add URL and directory to CLI + loader status echo - Fix load_maven scheduling task name - + docs: Fix typo detected with codespell - pre-commit: Bump hooks + and add new one to check commit message spelling + + -- Software Heritage autobuilder (on jenkins-debian1) Wed, 16 Feb 2022 14:30:47 +0000 swh-loader-core (2.5.0-1~swh1) unstable-swh; urgency=medium * New upstream release 2.5.0 - (tagged by Antoine R. Dumont (@ardumont) on 2022-02-08 10:46:14 +0100) * Upstream changes: - v2.5.0 - Move visit date helper from hg loader to core -- Software Heritage autobuilder (on jenkins-debian1) Tue, 08 Feb 2022 09:49:53 +0000 swh-loader-core (2.4.1-1~swh1) unstable-swh; urgency=medium * New upstream release 2.4.1 - (tagged by Nicolas Dandrimont on 2022-02-03 14:12:05 +0100) * Upstream changes: - Release swh.loader.core 2.4.1 - fix Person mangling -- Software Heritage autobuilder (on jenkins-debian1) Thu, 03 Feb 2022 13:17:35 +0000 swh-loader-core (2.3.0-1~swh1) unstable-swh; urgency=medium * New upstream release 2.3.0 - (tagged by Nicolas Dandrimont on 2022-01-24 11:18:43 +0100) * Upstream changes: - Release swh.loader.core - Stop using the deprecated 'TimestampWithTimezone.offset' attribute - Include clone_with_timeout utility from swh.loader.mercurial -- Software Heritage autobuilder (on jenkins-debian1) Mon, 24 Jan 2022 10:22:35 +0000 swh-loader-core (2.2.0-1~swh1) unstable-swh; urgency=medium * New upstream release 2.2.0 - (tagged by Antoine R. Dumont (@ardumont) on 2022-01-18 14:33:08 +0100) * Upstream changes: - v2.2.0 - tests: Replace 'offset' and 'negative_utc' with 'offset_bytes' - deposit: Remove 'negative_utc' from test data - tests: Use TimestampWithTimezone.from_datetime() instead of the constructor - Add releases notes (from user-provided Atom document) to release messages. - deposit: Strip 'offset_bytes' from date dicts to support swh-model 4.0.0 - Pin mypy and drop type annotations which makes mypy unhappy -- Software Heritage autobuilder (on jenkins-debian1) Tue, 18 Jan 2022 15:52:53 +0000 swh-loader-core (2.1.1-1~swh1) unstable-swh; urgency=medium * New upstream release 2.1.1 - (tagged by Valentin Lorentz on 2021-12-09 17:14:12 +0100) * Upstream changes: - v2.1.1 - * nixguix: Fix crash when filtering extids on archives that were already loaded, but only from different URLs -- Software Heritage autobuilder (on jenkins-debian1) Thu, 09 Dec 2021 16:17:54 +0000 swh-loader-core (2.1.0-1~swh1) unstable-swh; urgency=medium * New upstream release 2.1.0 - (tagged by Valentin Lorentz on 2021-12-09 16:34:51 +0100) * Upstream changes: - v2.1.0 - * maven: various refactorings - * nixguix: Filter out releases with URLs different from the expected one -- Software Heritage autobuilder (on jenkins-debian1) Thu, 09 Dec 2021 15:38:14 +0000 swh-loader-core (2.0.0-1~swh1) unstable-swh; urgency=medium * New upstream release 2.0.0 - (tagged by Antoine R. Dumont (@ardumont) on 2021-12-07 15:53:23 +0100) * Upstream changes: - v2.0.0 - package-loaders: Add support for extid versions, and bump it for Debian - debian: Remove the extrinsic version from release names - debian: Fix confusion between the two versions -- Software Heritage autobuilder (on jenkins-debian1) Tue, 07 Dec 2021 14:57:19 +0000 swh-loader-core (1.3.0-1~swh1) unstable-swh; urgency=medium * New upstream release 1.3.0 - (tagged by Antoine Lambert on 2021-12-07 10:54:49 +0100) * Upstream changes: - version 1.3.0 -- Software Heritage autobuilder (on jenkins-debian1) Tue, 07 Dec 2021 09:58:53 +0000 swh-loader-core (1.2.1-1~swh1) unstable-swh; urgency=medium * New upstream release 1.2.1 - (tagged by Antoine R. Dumont (@ardumont) on 2021-12-03 16:15:32 +0100) * Upstream changes: - v1.2.1 - package.loader: Deduplicate extid target -- Software Heritage autobuilder (on jenkins-debian1) Fri, 03 Dec 2021 15:19:13 +0000 swh-loader-core (1.2.0-1~swh1) unstable-swh; urgency=medium * New upstream release 1.2.0 - (tagged by Antoine R. Dumont (@ardumont) on 2021-12-03 12:16:04 +0100) * Upstream changes: - v1.2.0 - debian: Rename loading task function to fix scheduling - debian: Handle extra sha1 sum in source package metadata - debian: Remove unused date parameter of DebianLoader - package.loader: Deduplicate target SWHIDs - package-loader-tutorial: Update to mention releases instead of revisions - package-loader-tutorial: Add a checklist - package-loader-tutorial: Highlight the recommendation to submit the loader early. -- Software Heritage autobuilder (on jenkins-debian1) Fri, 03 Dec 2021 11:19:52 +0000 swh-loader-core (1.1.0-1~swh1) unstable-swh; urgency=medium * New upstream release 1.1.0 - (tagged by Valentin Lorentz on 2021-11-22 11:58:11 +0100) * Upstream changes: - v1.1.0 - * Package loader: Uniformize author and message -- Software Heritage autobuilder (on jenkins-debian1) Mon, 22 Nov 2021 11:01:45 +0000 swh-loader-core (1.0.1-1~swh1) unstable-swh; urgency=medium * New upstream release 1.0.1 - (tagged by Valentin Lorentz on 2021-11-10 14:47:52 +0100) * Upstream changes: - v1.0.1 - * utils: Add types and let log instruction do the formatting - * Fix tests when run by gbp on Sid. -- Software Heritage autobuilder (on jenkins-debian1) Wed, 10 Nov 2021 13:53:43 +0000 swh-loader-core (1.0.0-1~swh1) unstable-swh; urgency=medium * New upstream release 1.0.0 - (tagged by Valentin Lorentz on 2021-11-10 14:25:24 +0100) * Upstream changes: - v1.0.0 - Main change: thismakes package loaders write releases instead of revisions - Other more-or-less related changes: - * Add missing documentation for `get_metadata_authority`. - * opam: Write package definitions to the extrinsic metadata storage - * deposit: Remove 'parent' deposit - * cleanup tests and unused code - * Document how each package loader populates fields. - * Refactor package loaders to make the version part of BasePackageInfo -- Software Heritage autobuilder (on jenkins-debian1) Wed, 10 Nov 2021 13:38:43 +0000 swh-loader-core (0.25.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.25.0 - (tagged by Antoine R. Dumont (@ardumont) on 2021-09-29 09:19:10 +0200) * Upstream changes: - v0.25.0 - Allow opam loader to actually use multi-instance opam root - opam: Define a initialize_opam_root parameter for opam loader -- Software Heritage autobuilder (on jenkins-debian1) Wed, 29 Sep 2021 07:26:12 +0000 swh-loader-core (0.23.5-1~swh1) unstable-swh; urgency=medium * New upstream release 0.23.5 - (tagged by Antoine R. Dumont (@ardumont) on 2021-09-24 17:31:22 +0200) * Upstream changes: - v0.23.5 - opam: Initialize opam root directory outside the constructor -- Software Heritage autobuilder (on jenkins-debian1) Fri, 24 Sep 2021 15:34:52 +0000 swh-loader-core (0.23.4-1~swh1) unstable-swh; urgency=medium * New upstream release 0.23.4 - (tagged by Antoine R. Dumont (@ardumont) on 2021-09-20 11:53:11 +0200) * Upstream changes: - v0.23.4 - Ensure that filename fallback out of an url is properly sanitized -- Software Heritage autobuilder (on jenkins-debian1) Mon, 20 Sep 2021 09:56:31 +0000 swh-loader-core (0.23.3-1~swh1) unstable-swh; urgency=medium * New upstream release 0.23.3 - (tagged by Antoine Lambert on 2021-09-16 10:47:40 +0200) * Upstream changes: - version 0.23.3 -- Software Heritage autobuilder (on jenkins-debian1) Thu, 16 Sep 2021 08:51:47 +0000 swh-loader-core (0.23.2-1~swh1) unstable-swh; urgency=medium * New upstream release 0.23.2 - (tagged by Valentin Lorentz on 2021-08-12 12:22:44 +0200) * Upstream changes: - v0.23.2 - * deposit: Update status_detail on loader failure -- Software Heritage autobuilder (on jenkins-debian1) Thu, 12 Aug 2021 10:25:44 +0000 swh-loader-core (0.23.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.23.1 - (tagged by Antoine R. Dumont (@ardumont) on 2021-08-05 16:11:02 +0200) * Upstream changes: - v0.23.1 - Fix pypi upload issue. -- Software Heritage autobuilder (on jenkins-debian1) Thu, 05 Aug 2021 14:20:37 +0000 swh-loader-core (0.22.3-1~swh1) unstable-swh; urgency=medium * New upstream release 0.22.3 - (tagged by Valentin Lorentz on 2021-06-25 14:50:40 +0200) * Upstream changes: - v0.22.3 - * Use the postgresql class to instantiate storage in tests - * package-loader-tutorial: Add anchor so it can be referenced from swh-docs -- Software Heritage autobuilder (on jenkins-debian1) Fri, 25 Jun 2021 12:57:33 +0000 swh-loader-core (0.22.2-1~swh1) unstable-swh; urgency=medium * New upstream release 0.22.2 - (tagged by Antoine Lambert on 2021-06-10 16:11:30 +0200) * Upstream changes: - version 0.22.2 -- Software Heritage autobuilder (on jenkins-debian1) Thu, 10 Jun 2021 14:19:06 +0000 swh-loader-core (0.22.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.22.1 - (tagged by Antoine Lambert on 2021-05-27 14:02:35 +0200) * Upstream changes: - version 0.22.1 -- Software Heritage autobuilder (on jenkins-debian1) Thu, 27 May 2021 12:20:04 +0000 swh-loader-core (0.22.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.22.0 - (tagged by Valentin Lorentz on 2021-04-15 15:13:56 +0200) * Upstream changes: - v0.22.0 - Documentation: - * Document the big picture view of VCS and package loaders - * Add a package loader tutorial. - * Write an overview of how to write VCS loaders. - * Fix various Sphinx warnings - Package loaders: - * Add sha512 as a valid field in dsc metadata - * package loaders: Stop reading/writing Revision.metadata -- Software Heritage autobuilder (on jenkins-debian1) Thu, 15 Apr 2021 13:18:13 +0000 swh-loader-core (0.21.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.21.0 - (tagged by Valentin Lorentz on 2021-03-30 17:19:13 +0200) * Upstream changes: - v0.21.0 - * tests: recompute ids when evolving RawExtrinsicMetadata objects, to support swh-model 2.0.0 - * deposit.loader: Make archive.tar the default_filename - * debian: Make resolve_revision_from use the sha256 of the .dsc - * package.loader.*: unify package "cache"/deduplication using ExtIDs - * package.loader: Lookup packages from the ExtID storage - * package.loader: Write to the ExtID storage -- Software Heritage autobuilder (on jenkins-debian1) Tue, 30 Mar 2021 15:26:35 +0000 swh-loader-core (0.20.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.20.0 - (tagged by Valentin Lorentz on 2021-03-02 10:52:18 +0100) * Upstream changes: - v0.20.0 - * RawExtrinsicMetadata: update to use the API in swh-model 1.0.0 -- Software Heritage autobuilder (on jenkins-debian1) Tue, 02 Mar 2021 09:57:21 +0000 swh-loader-core (0.19.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.19.0 - (tagged by Antoine R. Dumont (@ardumont) on 2021-02-25 15:52:12 +0100) * Upstream changes: - v0.19.0 - deposit: Make deposit loader deal with tarball as well - deposit: Update deposit status when the load status is 'partial' - Make finalize_visit a method instead of nested function. -- Software Heritage autobuilder (on jenkins-debian1) Thu, 25 Feb 2021 14:55:54 +0000 swh-loader-core (0.18.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.18.1 - (tagged by Antoine R. Dumont (@ardumont) on 2021-02-19 18:02:58 +0100) * Upstream changes: - v0.18.1 - nixguix: Fix missing max_content_size constructor parameter -- Software Heritage autobuilder (on jenkins-debian1) Fri, 19 Feb 2021 17:06:33 +0000 swh-loader-core (0.18.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.18.0 - (tagged by Antoine R. Dumont (@ardumont) on 2021-02-17 13:13:24 +0100) * Upstream changes: - v0.18.0 - core.loader: Merge Loader into BaseLoader - Unify loader instantiation - nixguix: Ensure interaction with the origin url for edge case tests -- Software Heritage autobuilder (on jenkins-debian1) Wed, 17 Feb 2021 12:16:47 +0000 swh-loader-core (0.17.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.17.0 - (tagged by Antoine R. Dumont (@ardumont) on 2021-02-11 11:20:55 +0100) * Upstream changes: - v0.17.0 - package: Mark visit as not_found when relevant - package: Mark visit status as failed when relevant - core: Allow vcs loaders to deal with not_found status - core: Mark visit status as failed when relevant - loader: Make loader write the origin_visit_status' type -- Software Heritage autobuilder (on jenkins-debian1) Thu, 11 Feb 2021 10:23:42 +0000 swh-loader-core (0.16.0-1~swh2) unstable-swh; urgency=medium * Bump dependencies -- Antoine R. Dumont (@ardumont) Wed, 03 Feb 2021 14:25:26 +0100 swh-loader-core (0.16.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.16.0 - (tagged by Antoine R. Dumont (@ardumont) on 2021-02-03 14:14:01 +0100) * Upstream changes: - v0.16.0 - Adapt origin_get_latest_visit_status according to latest api change - Add a cli section in the doc - tox.ini: Add swh.core[testing] requirement - Small docstring improvements in the deposit loader code -- Software Heritage autobuilder (on jenkins-debian1) Wed, 03 Feb 2021 13:17:30 +0000 swh-loader-core (0.15.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.15.0 - (tagged by Nicolas Dandrimont on 2020-11-03 17:21:21 +0100) * Upstream changes: - Release swh-loader-core v0.15.0 - Attach raw extrinsic metadata to directories, not revisions - Handle a bunch of deprecation warnings: - explicit args in swh.objstorage get_objstorage - id -> target for raw extrinsic metadata objects - positional arguments for storage.raw_extrinsic_metadata_get -- Software Heritage autobuilder (on jenkins-debian1) Tue, 03 Nov 2020 16:26:20 +0000 swh-loader-core (0.14.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.14.0 - (tagged by Valentin Lorentz on 2020-10-16 18:23:28 +0200) * Upstream changes: - v0.14.0 - * npm: write metadata on revisions instead of snapshots. - * pypi: write metadata on revisions instead of snapshots. - * deposit.loader: Avoid unnecessary metadata json transformation -- Software Heritage autobuilder (on jenkins-debian1) Fri, 16 Oct 2020 16:26:14 +0000 swh-loader-core (0.13.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.13.1 - (tagged by Antoine R. Dumont (@ardumont) on 2020-10-02 16:54:05 +0200) * Upstream changes: - v0.13.1 - core.loader: Allow config parameter passing through constructor - tox.ini: pin black to the pre-commit version (19.10b0) to avoid flip-flops -- Software Heritage autobuilder (on jenkins-debian1) Fri, 02 Oct 2020 14:55:59 +0000 swh-loader-core (0.13.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.13.0 - (tagged by Antoine R. Dumont (@ardumont) on 2020-10-02 13:18:55 +0200) * Upstream changes: - v0.13.0 - package.loader: Migrate away from SWHConfig mixin - core.loader: Migrate away from SWHConfig mixin - Expose deposit configuration only within the deposit tests -- Software Heritage autobuilder (on jenkins-debian1) Fri, 02 Oct 2020 11:21:55 +0000 swh-loader-core (0.12.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.12.0 - (tagged by Antoine R. Dumont (@ardumont) on 2020-10-01 16:03:45 +0200) * Upstream changes: - v0.12.0 - deposit: Adapt loader to send extrinsic raw metadata to the metadata storage - core.loader: Log information about origin currently being ingested - Adapt cli declaration entrypoint to swh.core 0.3 -- Software Heritage autobuilder (on jenkins-debian1) Thu, 01 Oct 2020 14:04:59 +0000 swh-loader-core (0.11.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.11.0 - (tagged by Antoine R. Dumont (@ardumont) on 2020-09-18 10:19:56 +0200) * Upstream changes: - v0.11.0 - loader: Stop materializing full lists of objects to be stored - tests.get_stats: Don't return a 'person' count - python: Reorder imports with isort - pre-commit: Add isort hook and configuration - pre-commit: Update flake8 hook configuration - cli: speedup the `swh` cli command startup time -- Software Heritage autobuilder (on jenkins-debian1) Fri, 18 Sep 2020 09:12:18 +0000 swh-loader-core (0.10.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.10.0 - (tagged by Antoine R. Dumont (@ardumont) on 2020-09-04 13:19:29 +0200) * Upstream changes: - v0.10.0 - loader: Adapt to latest storage revision_get change - origin/master Rename metadata format 'original-artifact-json' to 'original-artifacts-json'. - Tell pytest not to recurse in dotdirs. - package loader: Add the 'url' to the 'original_artifact' extrinsic metadata. - Write 'original_artifact' metadata to the extrinsic metadata storage. - Move parts of _load_revision to a new _load_directory method. - tests: Don't use naive datetimes. - package.loader: Split the warning message into multiple chunks - Replace calls to snapshot_get with snapshot_get_all_branches. -- Software Heritage autobuilder (on jenkins-debian1) Fri, 04 Sep 2020 11:28:09 +0000 swh-loader-core (0.9.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.9.1 - (tagged by Antoine R. Dumont (@ardumont) on 2020-08-08 14:47:52 +0200) * Upstream changes: - v0.9.1 - nixguix: Make the unsupported artifact extensions configurable - package.loader: Log a failure summary report at the end of the task -- Software Heritage autobuilder (on jenkins-debian1) Sat, 08 Aug 2020 12:51:33 +0000 swh-loader-core (0.9.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.9.0 - (tagged by Antoine R. Dumont (@ardumont) on 2020-08-07 22:57:14 +0200) * Upstream changes: - v0.9.0 - nixguix: Filter out unsupported artifact extensions - swh.loader.tests: Use snapshot_get_all_branches in check_snapshot - test_npm: Adapt content_get_metadata call to content_get - npm: Fix assertion to use the correct storage api -- Software Heritage autobuilder (on jenkins-debian1) Fri, 07 Aug 2020 21:00:40 +0000 swh-loader-core (0.8.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.8.1 - (tagged by Antoine R. Dumont (@ardumont) on 2020-08-06 16:48:38 +0200) * Upstream changes: - v0.8.1 - Adapt code according to storage signature -- Software Heritage autobuilder (on jenkins-debian1) Thu, 06 Aug 2020 14:50:39 +0000 swh-loader-core (0.8.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.8.0 - (tagged by Antoine R. Dumont (@ardumont) on 2020-08-05 10:16:36 +0200) * Upstream changes: - v0.8.0 - archive: fix docstring - nixguix: Fix docstring - nixguix: Align error message formatting using f-string - nixguix: Fix format issue in error message - Convert the 'metadata' and 'info' cached-properties/lazy-attributes into methods - cran: fix call to logger.warning - pypi: Load the content of the API's response as extrinsic snapshot metadata - Add a default value for RawExtrinsicMetadataCore.discovery_date - npm: Load the content of the API's response as extrinsic snapshot metadata - Make retrieve_sources use generic api_info instead of duplicating its code - nixguix: Load the content of sources.json as extrinsic snapshot metadata - Update tests to accept PagedResult from storage.raw_extrinsic_metadata_get -- Software Heritage autobuilder (on jenkins-debian1) Wed, 05 Aug 2020 08:19:20 +0000 swh-loader-core (0.7.3-1~swh1) unstable-swh; urgency=medium * New upstream release 0.7.3 - (tagged by Valentin Lorentz on 2020-07-30 19:16:21 +0200) * Upstream changes: - v0.7.3 - core.loader: Fix Iterable/List typing issues - package.loader: Fix type warning -- Software Heritage autobuilder (on jenkins-debian1) Thu, 30 Jul 2020 17:23:57 +0000 swh-loader-core (0.7.2-1~swh1) unstable-swh; urgency=medium * New upstream release 0.7.2 - (tagged by Valentin Lorentz on 2020-07-29 11:41:39 +0200) * Upstream changes: - v0.7.2 - * Fix typo in message logged on extrinsic metadata loading errors. - * Don't pass non-sequence iterables to the storage API. -- Software Heritage autobuilder (on jenkins-debian1) Wed, 29 Jul 2020 09:45:52 +0000 swh-loader-core (0.7.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.7.1 - (tagged by Antoine R. Dumont (@ardumont) on 2020-07-28 12:14:02 +0200) * Upstream changes: - v0.7.1 - Apply rename of object_metadata to raw_extrinsic_metadata. -- Software Heritage autobuilder (on jenkins-debian1) Tue, 28 Jul 2020 10:16:56 +0000 swh-loader-core (0.6.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.6.1 - (tagged by Antoine R. Dumont (@ardumont) on 2020-07-23 11:12:29 +0200) * Upstream changes: - v0.6.1 - npm.loader: Fix null author parsing corner case - npm.loader: Fix author parsing corner case - npm.loader: Extract _author_str function + add types, tests - core.loader: docs: Update origin_add reference -- Software Heritage autobuilder (on jenkins-debian1) Thu, 23 Jul 2020 09:15:41 +0000 swh-loader-core (0.6.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.6.0 - (tagged by Valentin Lorentz on 2020-07-20 13:23:22 +0200) * Upstream changes: - v0.6.0 - * Use the new object_metadata_add endpoint instead of origin_metadata_add. - * Apply renaming of MetadataAuthorityType.DEPOSIT to MetadataAuthorityType.DEPOSIT_CLIENT. -- Software Heritage autobuilder (on jenkins-debian1) Mon, 20 Jul 2020 11:27:53 +0000 swh-loader-core (0.5.10-1~swh1) unstable-swh; urgency=medium * New upstream release 0.5.10 - (tagged by Antoine R. Dumont (@ardumont) on 2020-07-17 15:10:42 +0200) * Upstream changes: - v0.5.10 - test_init: Decrease assertion checks so debian package builds fine - test_nixguix: Simplify the nixguix specific check_snapshot function -- Software Heritage autobuilder (on jenkins-debian1) Fri, 17 Jul 2020 13:13:19 +0000 swh-loader-core (0.5.9-1~swh1) unstable-swh; urgency=medium * New upstream release 0.5.9 - (tagged by Antoine R. Dumont (@ardumont) on 2020-07-17 11:52:38 +0200) * Upstream changes: - v0.5.9 - test.check_snapshot: Drop accepting using dict for snapshot comparison - test: Check against snapshot model object -- Software Heritage autobuilder (on jenkins-debian1) Fri, 17 Jul 2020 09:55:12 +0000 swh-loader-core (0.5.8-1~swh1) unstable-swh; urgency=medium * New upstream release 0.5.8 - (tagged by Antoine R. Dumont (@ardumont) on 2020-07-16 17:18:17 +0200) * Upstream changes: - v0.5.8 - test_init: Use snapshot object -- Software Heritage autobuilder (on jenkins-debian1) Thu, 16 Jul 2020 15:20:49 +0000 swh-loader-core (0.5.7-1~swh1) unstable-swh; urgency=medium * New upstream release 0.5.7 - (tagged by Antoine R. Dumont (@ardumont) on 2020-07-16 16:10:57 +0200) * Upstream changes: - v0.5.7 - test_init: Fix tests using the latest swh-storage fixture -- Software Heritage autobuilder (on jenkins-debian1) Thu, 16 Jul 2020 14:14:59 +0000 swh-loader-core (0.5.5-1~swh1) unstable-swh; urgency=medium * New upstream release 0.5.5 - (tagged by Antoine R. Dumont (@ardumont) on 2020-07-15 12:34:09 +0200) * Upstream changes: - v0.5.5 - check_snapshot: Check existence down to contents - Expose a pytest_plugin module so other loaders can reuse for tests - pytest: Remove no longer needed pytest setup - Fix branches types in tests - Small code improvement in package/loader.py -- Software Heritage autobuilder (on jenkins-debian1) Wed, 15 Jul 2020 10:37:11 +0000 swh-loader-core (0.5.4-1~swh1) unstable-swh; urgency=medium * New upstream release 0.5.4 - (tagged by Antoine R. Dumont (@ardumont) on 2020-07-10 09:52:21 +0200) * Upstream changes: - v0.5.4 - Clean up the swh.scheduler / swh.storage pytest plugin imports -- Software Heritage autobuilder (on jenkins-debian1) Fri, 10 Jul 2020 07:54:56 +0000 swh-loader-core (0.5.3-1~swh1) unstable-swh; urgency=medium * New upstream release 0.5.3 - (tagged by Antoine R. Dumont (@ardumont) on 2020-07-09 09:46:21 +0200) * Upstream changes: - v0.5.3 - Update the revision metadata field as an immutable dict - tests: Use dedicated storage and scheduler fixtures - loaders.tests: Simplify and add coverage to check_snapshot -- Software Heritage autobuilder (on jenkins-debian1) Thu, 09 Jul 2020 07:48:33 +0000 swh-loader-core (0.5.2-1~swh1) unstable-swh; urgency=medium * New upstream release 0.5.2 - (tagged by Antoine R. Dumont (@ardumont) on 2020-07-07 12:29:17 +0200) * Upstream changes: - v0.5.2 - nixguix/loader: Check further the source entry only if it's valid - nixguix/loader: Allow version both as string or integer - Move remaining common test utility functions to top-level arborescence - Move common test utility function to the top-level arborescence - Define common test helper function - Reuse swh.model.from_disk.iter_directory function -- Software Heritage autobuilder (on jenkins-debian1) Tue, 07 Jul 2020 10:31:36 +0000 swh-loader-core (0.5.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.5.1 - (tagged by Antoine R. Dumont (@ardumont) on 2020-07-01 12:32:54 +0200) * Upstream changes: - v0.5.1 - Use origin_add instead of deprecated origin_add_one endpoint - Migrate to use object's "object_type" field when computing objects -- Software Heritage autobuilder (on jenkins-debian1) Wed, 01 Jul 2020 10:34:59 +0000 swh-loader-core (0.5.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.5.0 - (tagged by Antoine R. Dumont (@ardumont) on 2020-06-29 13:18:41 +0200) * Upstream changes: - v0.5.0 - loader*: Drop obsolete origin visit fields -- Software Heritage autobuilder (on jenkins-debian1) Mon, 29 Jun 2020 11:20:59 +0000 swh-loader-core (0.4.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.4.0 - (tagged by Antoine R. Dumont (@ardumont) on 2020-06-23 15:02:20 +0200) * Upstream changes: - v0.4.0 - loader: Retrieve latest snapshot with snapshot-get-latest function -- Software Heritage autobuilder (on jenkins-debian1) Tue, 23 Jun 2020 13:14:09 +0000 swh-loader-core (0.3.2-1~swh1) unstable-swh; urgency=medium * New upstream release 0.3.2 - (tagged by Antoine R. Dumont (@ardumont) on 2020-06-22 15:13:05 +0200) * Upstream changes: - v0.3.2 - Add helper function to ensure loader visit are as expected -- Software Heritage autobuilder (on jenkins-debian1) Mon, 22 Jun 2020 13:15:41 +0000 swh-loader-core (0.3.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.3.1 - (tagged by Antoine Lambert on 2020-06-12 16:43:18 +0200) * Upstream changes: - version 0.3.1 -- Software Heritage autobuilder (on jenkins-debian1) Fri, 12 Jun 2020 14:47:42 +0000 swh-loader-core (0.3.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.3.0 - (tagged by Antoine R. Dumont (@ardumont) on 2020-06-12 11:05:41 +0200) * Upstream changes: - v0.3.0 - Migrate to new storage.origin_visit_add endpoint - loader: Migrate to origin visit status - test_deposits: Fix origin_metadata_get which is a paginated endpoint - Fix a potential UnboundLocalError in clean_dangling_folders() -- Software Heritage autobuilder (on jenkins-debian1) Fri, 12 Jun 2020 09:08:17 +0000 swh-loader-core (0.2.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.2.0 - (tagged by David Douard on 2020-06-04 14:20:08 +0200) * Upstream changes: - v0.2.0 -- Software Heritage autobuilder (on jenkins-debian1) Thu, 04 Jun 2020 12:25:57 +0000 swh-loader-core (0.1.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.1.0 - (tagged by Nicolas Dandrimont on 2020-05-29 16:01:11 +0200) * Upstream changes: - Release swh.loader.core v0.1.0 - Make sure partial visits don't reference unloaded snapshots - Ensure proper behavior when loading into partial archives (e.g. staging) - Improve test coverage -- Software Heritage autobuilder (on jenkins-debian1) Fri, 29 May 2020 14:05:36 +0000 swh-loader-core (0.0.97-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.97 - (tagged by Antoine R. Dumont (@ardumont) on 2020-05-26 14:22:51 +0200) * Upstream changes: - v0.0.97 - nixguix: catch and log artifact resolution failures - nixguix: Override known_artifacts to filter out "evaluation" branch - nixguix.tests: Add missing __init__ file -- Software Heritage autobuilder (on jenkins-debian1) Tue, 26 May 2020 12:25:35 +0000 swh-loader-core (0.0.96-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.96 - (tagged by Valentin Lorentz on 2020-05-19 18:42:23 +0200) * Upstream changes: - v0.0.96 - * Pass bytes instead a dict to origin_metadata_add. -- Software Heritage autobuilder (on jenkins-debian1) Tue, 19 May 2020 16:45:03 +0000 swh-loader-core (0.0.95-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.95 - (tagged by Valentin Lorentz on 2020-05-19 14:44:01 +0200) * Upstream changes: - v0.0.95 - * Use the new swh-storage API for storing metadata. -- Software Heritage autobuilder (on jenkins-debian1) Tue, 19 May 2020 12:47:48 +0000 swh-loader-core (0.0.94-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.94 - (tagged by Antoine R. Dumont (@ardumont) on 2020-05-15 12:49:22 +0200) * Upstream changes: - v0.0.94 - deposit: Adapt loader to use the latest deposit update api - tests: Use proper date initialization - setup.py: add documentation link -- Software Heritage autobuilder (on jenkins-debian1) Fri, 15 May 2020 10:52:16 +0000 swh-loader-core (0.0.93-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.93 - (tagged by Antoine R. Dumont (@ardumont) on 2020-04-23 16:43:16 +0200) * Upstream changes: - v0.0.93 - deposit.loader: Build revision out of the deposit api read metadata -- Software Heritage autobuilder (on jenkins-debian1) Thu, 23 Apr 2020 14:46:48 +0000 swh-loader-core (0.0.92-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.92 - (tagged by Antoine R. Dumont (@ardumont) on 2020-04-23 11:49:30 +0200) * Upstream changes: - v0.0.92 - deposit.loader: Fix revision metadata redundancy in deposit metadata - loader.deposit: Clarify FIXME intent - test_nixguix: Remove the incorrect fixme - test_nixguix: Add a fixme note on test_loader_two_visits - package.nixguix: Ensure the revisions are structurally sound -- Software Heritage autobuilder (on jenkins-debian1) Thu, 23 Apr 2020 09:52:18 +0000 swh-loader-core (0.0.91-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.91 - (tagged by Antoine R. Dumont (@ardumont) on 2020-04-21 15:59:55 +0200) * Upstream changes: - v0.0.91 - deposit.loader: Fix committer date appropriately - tests_deposit: Define specific requests_mock_datadir fixture - nixguix: Move helper function below the class definition - setup: Update the minimum required runtime python3 version -- Software Heritage autobuilder (on jenkins-debian1) Tue, 21 Apr 2020 14:02:51 +0000 swh-loader-core (0.0.90-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.90 - (tagged by Antoine R. Dumont (@ardumont) on 2020-04-15 14:27:01 +0200) * Upstream changes: - v0.0.90 - Improve exception handling -- Software Heritage autobuilder (on jenkins-debian1) Wed, 15 Apr 2020 12:30:07 +0000 swh-loader-core (0.0.89-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.89 - (tagged by Antoine R. Dumont (@ardumont) on 2020-04-14 15:48:15 +0200) * Upstream changes: - v0.0.89 - package.utils: Define a timeout on download connections - package.loader: Clear proxy buffer state when failing to load revision - Fix a couple of storage args deprecation warnings - cli: Sort loaders list and fix some tests - Add a pyproject.toml file to target py37 for black - Enable black -- Software Heritage autobuilder (on jenkins-debian1) Tue, 14 Apr 2020 15:30:08 +0000 swh-loader-core (0.0.88-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.88 - (tagged by Antoine R. Dumont (@ardumont) on 2020-04-03 15:52:07 +0200) * Upstream changes: - v0.0.88 - v0.0.88 nixguix: validate and clean sources.json structure -- Software Heritage autobuilder (on jenkins-debian1) Fri, 03 Apr 2020 13:54:24 +0000 swh-loader-core (0.0.87-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.87 - (tagged by Antoine R. Dumont (@ardumont) on 2020-04-02 14:37:37 +0200) * Upstream changes: - v0.0.87 - nixguix: rename the `url` source attribute to `urls` - nixguix: rename the test file - nixguix: add the integrity attribute in release metadata -- Software Heritage autobuilder (on jenkins-debian1) Thu, 02 Apr 2020 12:39:58 +0000 swh-loader-core (0.0.86-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.86 - (tagged by Antoine R. Dumont (@ardumont) on 2020-03-26 16:15:24 +0100) * Upstream changes: - v0.0.86 - core.loader: Remove origin_visit_update call from DVCSLoader class -- Software Heritage autobuilder (on jenkins-debian1) Thu, 26 Mar 2020 15:19:29 +0000 swh-loader-core (0.0.85-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.85 - (tagged by Antoine R. Dumont (@ardumont) on 2020-03-26 15:36:58 +0100) * Upstream changes: - v0.0.85 - core.loader: Allow core loader to update origin_visit in one call - Rename the functional loader to nixguix loader -- Software Heritage autobuilder (on jenkins-debian1) Thu, 26 Mar 2020 14:43:17 +0000 swh-loader-core (0.0.84-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.84 - (tagged by Antoine R. Dumont (@ardumont) on 2020-03-24 11:29:49 +0100) * Upstream changes: - v0.0.84 - test: Use storage endpoint to check latest origin visit status - package.loader: Fix status visit to 'partial' - package.loader: add a test to reproduce EOFError error -- Software Heritage autobuilder (on jenkins-debian1) Tue, 24 Mar 2020 10:32:55 +0000 swh-loader-core (0.0.83-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.83 - (tagged by Antoine R. Dumont (@ardumont) on 2020-03-23 15:16:14 +0100) * Upstream changes: - v0.0.83 - Make the swh.loader.package exception handling more granular - package.loader: Reference a snapshot on partial visit - package.loader: Extract a _load_snapshot method - functional: create a branch named evaluation pointing to the evaluation commit - package.loader: add extra_branches method -- Software Heritage autobuilder (on jenkins-debian1) Mon, 23 Mar 2020 14:19:43 +0000 swh-loader-core (0.0.82-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.82 - (tagged by Antoine R. Dumont (@ardumont) on 2020-03-18 11:55:48 +0100) * Upstream changes: - v0.0.82 - functional.loader: Add loader - package.loader: ignore non tarball source -- Software Heritage autobuilder (on jenkins-debian1) Wed, 18 Mar 2020 10:59:38 +0000 swh-loader-core (0.0.81-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.81 - (tagged by Antoine R. Dumont (@ardumont) on 2020-03-16 13:14:33 +0100) * Upstream changes: - v0.0.81 - Migrate to latest storage.origin_visit_add api change - Move Person parsing to swh- model. -- Software Heritage autobuilder (on jenkins-debian1) Mon, 16 Mar 2020 12:17:43 +0000 swh-loader-core (0.0.80-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.80 - (tagged by Valentin Lorentz on 2020-02-28 17:05:14 +0100) * Upstream changes: - v0.0.80 - * use swh-model objects instead of dicts. -- Software Heritage autobuilder (on jenkins-debian1) Fri, 28 Feb 2020 16:10:06 +0000 swh-loader-core (0.0.79-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.79 - (tagged by Antoine R. Dumont (@ardumont) on 2020-02-25 11:40:05 +0100) * Upstream changes: - v0.0.79 - Move revision loading logic to its own function. - Use swh-storage validation proxy earlier in the pipeline. - Use swh-storage validation proxy. - Add missing __init__.py and fix tests. -- Software Heritage autobuilder (on jenkins-debian1) Tue, 25 Feb 2020 10:48:07 +0000 swh-loader-core (0.0.78-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.78 - (tagged by Antoine R. Dumont (@ardumont) on 2020-02-06 15:28:11 +0100) * Upstream changes: - v0.0.78 - tests: Use new get_storage signature - loader.core.converters: Prefer the with open pattern to read file - test_converters: Add coverage on prepare_contents method - test_converters: Migrate to pytest - loader.core/package: Call storage's (skipped_)content_add endpoints -- Software Heritage autobuilder (on jenkins-debian1) Thu, 06 Feb 2020 15:09:05 +0000 swh-loader-core (0.0.77-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.77 - (tagged by Antoine R. Dumont (@ardumont) on 2020-01-30 10:32:08 +0100) * Upstream changes: - v0.0.77 - loader.npm: If no upload time provided, use artifact's mtime if provided - loader.npm: Fail ingestion if at least 1 artifact has no upload time -- Software Heritage autobuilder (on jenkins-debian1) Thu, 30 Jan 2020 09:37:58 +0000 swh-loader-core (0.0.76-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.76 - (tagged by Antoine R. Dumont (@ardumont) on 2020-01-28 13:07:30 +0100) * Upstream changes: - v0.0.76 - npm.loader: Skip artifacts with no intrinsic metadata - pypi.loader: Skip artifacts with no intrinsic metadata - package.loader: Fix edge case when some listing returns no content - core.loader: Drop retro- compatibility class names - loader.tests: Add filter and buffer proxy storage - docs: Fix sphinx warnings - README: Update class names -- Software Heritage autobuilder (on jenkins-debian1) Tue, 28 Jan 2020 12:11:07 +0000 swh-loader-core (0.0.75-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.75 - (tagged by Antoine R. Dumont (@ardumont) on 2020-01-16 14:14:29 +0100) * Upstream changes: - v0.0.75 - cran.loader: Align cran loader with other package loaders -- Software Heritage autobuilder (on jenkins-debian1) Thu, 16 Jan 2020 13:17:30 +0000 swh-loader-core (0.0.74-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.74 - (tagged by Antoine R. Dumont (@ardumont) on 2020-01-15 15:30:13 +0100) * Upstream changes: - v0.0.74 - Drop no longer used retrying dependency - core.loader: Clean up indirection and retry behavior - tests: Use retry proxy storage in loaders - core.loader: Drop dead code - cran.loader: Fix parsing description file error -- Software Heritage autobuilder (on jenkins-debian1) Wed, 15 Jan 2020 14:33:57 +0000 swh-loader-core (0.0.73-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.73 - (tagged by Antoine R. Dumont (@ardumont) on 2020-01-09 10:00:21 +0100) * Upstream changes: - v0.0.73 - package.cran: Name CRAN task appropriately -- Software Heritage autobuilder (on jenkins-debian1) Thu, 09 Jan 2020 09:05:07 +0000 swh-loader-core (0.0.72-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.72 - (tagged by Antoine R. Dumont (@ardumont) on 2020-01-06 16:37:58 +0100) * Upstream changes: - v0.0.72 - package.loader: Fail fast when unable to create origin/origin_visit - cran.loader: Add implementation -- Software Heritage autobuilder (on jenkins-debian1) Mon, 06 Jan 2020 15:50:08 +0000 swh-loader-core (0.0.71-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.71 - (tagged by Antoine R. Dumont (@ardumont) on 2019-12-20 14:22:31 +0100) * Upstream changes: - v0.0.71 - package.utils: Drop unneeded hashes from download computation -- Software Heritage autobuilder (on jenkins-debian1) Fri, 20 Dec 2019 13:26:09 +0000 swh-loader-core (0.0.70-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.70 - (tagged by Antoine R. Dumont (@ardumont) on 2019-12-20 11:32:09 +0100) * Upstream changes: - v0.0.70 - debian.loader: Improve and fix revision resolution's corner cases -- Software Heritage autobuilder (on jenkins-debian1) Fri, 20 Dec 2019 10:39:34 +0000 swh-loader-core (0.0.69-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.69 - (tagged by Antoine R. Dumont (@ardumont) on 2019-12-12 16:21:59 +0100) * Upstream changes: - v0.0.69 - loader.core: Fix correctly loader initialization -- Software Heritage autobuilder (on jenkins-debian1) Thu, 12 Dec 2019 15:26:13 +0000 swh-loader-core (0.0.68-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.68 - (tagged by Antoine R. Dumont (@ardumont) on 2019-12-12 15:45:21 +0100) * Upstream changes: - v0.0.68 - loader.core: Fix initialization issue in dvcs loaders -- Software Heritage autobuilder (on jenkins-debian1) Thu, 12 Dec 2019 14:49:12 +0000 swh-loader-core (0.0.67-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.67 - (tagged by Antoine R. Dumont (@ardumont) on 2019-12-12 14:02:47 +0100) * Upstream changes: - v0.0.67 - loader.core: Type methods - loader.core: Transform data input into list - loader.core: Add missing conversion step on content -- Software Heritage autobuilder (on jenkins-debian1) Thu, 12 Dec 2019 13:07:47 +0000 swh-loader-core (0.0.66-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.66 - (tagged by Antoine R. Dumont (@ardumont) on 2019-12-12 12:01:14 +0100) * Upstream changes: - v0.0.66 - Drop deprecated behavior -- Software Heritage autobuilder (on jenkins-debian1) Thu, 12 Dec 2019 11:05:17 +0000 swh-loader-core (0.0.65-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.65 - (tagged by Antoine R. Dumont (@ardumont) on 2019-12-12 11:42:46 +0100) * Upstream changes: - v0.0.65 - loader.cli: Improve current implementation - tasks: Enforce kwargs use in task message -- Software Heritage autobuilder (on jenkins-debian1) Thu, 12 Dec 2019 10:51:02 +0000 swh-loader-core (0.0.64-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.64 - (tagged by Antoine R. Dumont (@ardumont) on 2019-12-10 09:49:06 +0100) * Upstream changes: - v0.0.64 - requirements-test: Add missing test dependency - tests: Refactor using pytest-mock's mocker fixture - loader.cli: Add tests around cli - package.npm: Align loader instantiation - loader.cli: Reference new loader cli -- Software Heritage autobuilder (on jenkins-debian1) Tue, 10 Dec 2019 08:56:02 +0000 swh-loader-core (0.0.63-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.63 - (tagged by Antoine R. Dumont (@ardumont) on 2019-12-05 16:01:49 +0100) * Upstream changes: - v0.0.63 - Add missing inclusion instruction -- Software Heritage autobuilder (on jenkins-debian1) Thu, 05 Dec 2019 15:05:39 +0000 swh-loader-core (0.0.62-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.62 - (tagged by Antoine R. Dumont (@ardumont) on 2019-12-05 15:46:46 +0100) * Upstream changes: - v0.0.62 - Move package loaders to their own namespace -- Software Heritage autobuilder (on jenkins-debian1) Thu, 05 Dec 2019 14:50:19 +0000 swh-loader-core (0.0.61-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.61 - (tagged by Antoine R. Dumont (@ardumont) on 2019-11-28 17:25:49 +0100) * Upstream changes: - v0.0.61 - pypi: metadata -> revision: Deal with previous metadata format - npm: metadata -> revision: Deal with previous metadata format -- Software Heritage autobuilder (on jenkins-debian1) Thu, 28 Nov 2019 16:29:47 +0000 swh-loader-core (0.0.60-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.60 - (tagged by Antoine R. Dumont (@ardumont) on 2019-11-26 12:09:28 +0100) * Upstream changes: - v0.0.60 - package.deposit: Fix revision- get inconsistency - package.deposit: Provide parents in any case - package.deposit: Fix url computation issue - utils: Work around header issue during download -- Software Heritage autobuilder (on jenkins-debian1) Tue, 26 Nov 2019 11:18:41 +0000 swh-loader-core (0.0.59-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.59 - (tagged by Antoine R. Dumont (@ardumont) on 2019-11-22 18:11:33 +0100) * Upstream changes: - v0.0.59 - npm: Explicitly retrieve the revision date from extrinsic metadata -- Software Heritage autobuilder (on jenkins-debian1) Fri, 22 Nov 2019 17:15:34 +0000 swh-loader-core (0.0.58-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.58 - (tagged by Antoine R. Dumont (@ardumont) on 2019-11-22 12:08:10 +0100) * Upstream changes: - v0.0.58 - package.pypi: Filter out non- sdist package type -- Software Heritage autobuilder (on jenkins-debian1) Fri, 22 Nov 2019 11:11:56 +0000 swh-loader-core (0.0.57-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.57 - (tagged by Antoine R. Dumont (@ardumont) on 2019-11-22 11:26:11 +0100) * Upstream changes: - v0.0.57 - package.pypi: Fix project url computation edge case - Use pkg_resources to get the package version instead of vcversioner -- Software Heritage autobuilder (on jenkins-debian1) Fri, 22 Nov 2019 10:31:11 +0000 swh-loader-core (0.0.56-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.56 - (tagged by Antoine R. Dumont (@ardumont) on 2019-11-21 16:12:46 +0100) * Upstream changes: - v0.0.56 - package.tasks: Rename appropriately load_deb_package task type name - Fix typos reported by codespell - Add a pre-commit config file -- Software Heritage autobuilder (on jenkins-debian1) Thu, 21 Nov 2019 15:16:23 +0000 swh-loader-core (0.0.55-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.55 - (tagged by Antoine R. Dumont (@ardumont) on 2019-11-21 13:51:03 +0100) * Upstream changes: - v0.0.55 - package.tasks: Rename load_archive into load_archive_files - Migrate tox.ini to extras = xxx instead of deps = .[testing] - Merge tox test environments -- Software Heritage autobuilder (on jenkins-debian1) Thu, 21 Nov 2019 12:56:07 +0000 swh-loader-core (0.0.54-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.54 - (tagged by Antoine R. Dumont (@ardumont) on 2019-11-21 11:29:20 +0100) * Upstream changes: - v0.0.54 - loader.package.deposit: Drop swh.deposit.client requirement - Include all requirements in MANIFEST.in -- Software Heritage autobuilder (on jenkins-debian1) Thu, 21 Nov 2019 10:32:23 +0000 swh-loader-core (0.0.53-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.53 - (tagged by Antoine R. Dumont (@ardumont) on 2019-11-20 14:26:36 +0100) * Upstream changes: - v0.0.53 - loader.package.tasks: Document tasks - Define correctly the setup.py's entry_points -- Software Heritage autobuilder (on jenkins-debian1) Wed, 20 Nov 2019 13:30:10 +0000 swh-loader-core (0.0.52-1~swh3) unstable-swh; urgency=medium * Update dh-python version constraint -- Antoine R. Dumont (@ardumont) Wed, 20 Nov 2019 12:03:00 +0100 swh-loader-core (0.0.52-1~swh2) unstable-swh; urgency=medium * Add egg-info to pybuild.testfiles. -- Antoine R. Dumont (@ardumont) Wed, 20 Nov 2019 11:42:42 +0100 swh-loader-core (0.0.52-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.52 - (tagged by Antoine R. Dumont (@ardumont) on 2019-11-19 15:15:40 +0100) * Upstream changes: - v0.0.52 - Ensure BufferedLoader and UnbufferedLoader do flush their storage - loader.package: Register loader package tasks - package.tasks: Rename debian task to load_deb -- Software Heritage autobuilder (on jenkins-debian1) Tue, 19 Nov 2019 14:18:41 +0000 swh-loader-core (0.0.51-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.51 - (tagged by David Douard on 2019-11-18 17:05:17 +0100) * Upstream changes: - v0.0.51 -- Software Heritage autobuilder (on jenkins-debian1) Mon, 18 Nov 2019 16:09:44 +0000 swh-loader-core (0.0.50-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.50 - (tagged by Antoine R. Dumont (@ardumont) on 2019-11-13 15:56:55 +0100) * Upstream changes: - v0.0.50 - package.loader: Check snapshot_id is set as returned value - package.loader: Ensure the origin visit type is set appropriately - package.loader: Fix serialization issue - package.debian: Align origin_visit type to 'deb' as in production -- Software Heritage autobuilder (on jenkins-debian1) Wed, 13 Nov 2019 15:04:37 +0000 swh-loader-core (0.0.49-1~swh2) unstable-swh; urgency=medium * Update dependencies -- Antoine R. Dumont Fri, 08 Nov 2019 14:07:20 +0100 swh-loader-core (0.0.49-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.49 - (tagged by Antoine R. Dumont (@ardumont) on 2019-11-08 13:21:56 +0100) * Upstream changes: - v0.0.49 - New package loader implementations: archive, pypi, npm, deposit, debian -- Software Heritage autobuilder (on jenkins-debian1) Fri, 08 Nov 2019 12:29:47 +0000 swh-loader-core (0.0.48-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.48 - (tagged by Stefano Zacchiroli on 2019-10-01 16:49:39 +0200) * Upstream changes: - v0.0.48 - * typing: minimal changes to make a no-op mypy run pass -- Software Heritage autobuilder (on jenkins-debian1) Tue, 01 Oct 2019 14:52:59 +0000 swh-loader-core (0.0.47-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.47 - (tagged by Antoine Lambert on 2019-10-01 11:32:50 +0200) * Upstream changes: - version 0.0.47: Workaround HashCollision errors -- Software Heritage autobuilder (on jenkins-debian1) Tue, 01 Oct 2019 09:35:38 +0000 swh-loader-core (0.0.46-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.46 - (tagged by Antoine R. Dumont (@ardumont) on 2019-09-06 18:30:42 +0200) * Upstream changes: - v0.0.46 - pytest.ini: Remove warnings about our custom markers - pep8: Fix log.warning calls - core/loader: Fix get_save_data_path implementation - Fix validation errors in test. -- Software Heritage autobuilder (on jenkins-debian1) Fri, 06 Sep 2019 16:33:13 +0000 swh-loader-core (0.0.45-1~swh2) unstable-swh; urgency=medium * Fix missing build dependency -- Antoine R. Dumont (@ardumont) Tue, 03 Sep 2019 14:12:13 +0200 swh-loader-core (0.0.45-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.45 - (tagged by Antoine R. Dumont (@ardumont) on 2019-09-03 10:38:36 +0200) * Upstream changes: - v0.0.45 - loader: Provide visit type when calling origin_visit_add - loader: Drop keys 'perms' and 'path' from content before sending to the - storage - swh.loader.package: Implement GNU loader - docs: add code of conduct document -- Software Heritage autobuilder (on jenkins-debian1) Tue, 03 Sep 2019 08:41:49 +0000 swh-loader-core (0.0.44-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.44 - (tagged by Valentin Lorentz on 2019-06-25 12:18:27 +0200) * Upstream changes: - Drop use of deprecated methods fetch_history_* -- Software Heritage autobuilder (on jenkins-debian1) Wed, 26 Jun 2019 09:40:59 +0000 swh-loader-core (0.0.43-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.43 - (tagged by Valentin Lorentz on 2019-06-18 16:21:58 +0200) * Upstream changes: - Use origin urls instead of origin ids. -- Software Heritage autobuilder (on jenkins-debian1) Wed, 19 Jun 2019 09:33:53 +0000 swh-loader-core (0.0.42-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.42 - (tagged by David Douard on 2019-05-20 11:28:49 +0200) * Upstream changes: - v0.0.42 - update/fix requirements -- Software Heritage autobuilder (on jenkins-debian1) Mon, 20 May 2019 09:33:47 +0000 swh-loader-core (0.0.41-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.41 - (tagged by Antoine R. Dumont (@ardumont) on 2019-04-11 11:46:00 +0200) * Upstream changes: - v0.0.41 - core.loader: Migrate to latest snapshot_add, origin_visit_update api - core.loader: Count only the effectively new objects ingested - test_utils: Add coverage on utils module -- Software Heritage autobuilder (on jenkins-debian1) Thu, 11 Apr 2019 09:52:55 +0000 swh-loader-core (0.0.40-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.40 - (tagged by Antoine Lambert on 2019-03-29 10:57:14 +0100) * Upstream changes: - version 0.0.40 -- Software Heritage autobuilder (on jenkins-debian1) Fri, 29 Mar 2019 10:02:37 +0000 swh-loader-core (0.0.39-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.39 - (tagged by Antoine R. Dumont (@ardumont) on 2019-01-30 11:10:39 +0100) * Upstream changes: - v0.0.39 -- Software Heritage autobuilder (on jenkins-debian1) Wed, 30 Jan 2019 10:13:56 +0000 swh-loader-core (0.0.35-1~swh1) unstable-swh; urgency=medium * v0.0.35 * tests: Initialize tox.ini use * tests, debian/*: Migrate to pytest -- Antoine R. Dumont (@ardumont) Tue, 23 Oct 2018 15:47:22 +0200 swh-loader-core (0.0.34-1~swh1) unstable-swh; urgency=medium * v0.0.34 * setup: prepare for PyPI upload * README.md: Simplify module description * core.tests: Install tests fixture for derivative loaders to use -- Antoine R. Dumont (@ardumont) Tue, 09 Oct 2018 14:11:29 +0200 swh-loader-core (0.0.33-1~swh1) unstable-swh; urgency=medium * v0.0.33 * loader/utils: Add clean_dangling_folders function to ease clean up * loader/core: Add optional pre_cleanup for dangling files cleaning -- Antoine R. Dumont (@ardumont) Fri, 09 Mar 2018 14:41:17 +0100 swh-loader-core (0.0.32-1~swh1) unstable-swh; urgency=medium * v0.0.32 * Improve origin_visit initialization step * Properly sandbox the prepare statement so that if it breaks, we can * update appropriately the visit with the correct status -- Antoine R. Dumont (@ardumont) Wed, 07 Mar 2018 11:06:27 +0100 swh-loader-core (0.0.31-1~swh1) unstable-swh; urgency=medium * Release swh.loader.core v0.0.31 * Remove backwards-compatibility when sending snapshots -- Nicolas Dandrimont Tue, 13 Feb 2018 18:52:20 +0100 swh-loader-core (0.0.30-1~swh1) unstable-swh; urgency=medium * Release swh.loader.core v0.0.30 * Update Debian metadata for snapshot-related breakage -- Nicolas Dandrimont Tue, 06 Feb 2018 14:22:53 +0100 swh-loader-core (0.0.29-1~swh1) unstable-swh; urgency=medium * Release swh.loader.core v0.0.29 * Replace occurrences with snapshots * Enhance logging on error cases -- Nicolas Dandrimont Tue, 06 Feb 2018 14:13:11 +0100 swh-loader-core (0.0.28-1~swh1) unstable-swh; urgency=medium * v0.0.28 * Add stateless loader base class * Remove bare exception handlers -- Antoine R. Dumont (@ardumont) Tue, 19 Dec 2017 17:48:09 +0100 swh-loader-core (0.0.27-1~swh1) unstable-swh; urgency=medium * v0.0.27 * Migrate from indexer's indexer_configuration to storage's tool notion. -- Antoine R. Dumont (@ardumont) Thu, 07 Dec 2017 10:36:23 +0100 swh-loader-core (0.0.26-1~swh1) unstable-swh; urgency=medium * v0.0.26 * Fix send_provider method -- Antoine R. Dumont (@ardumont) Tue, 05 Dec 2017 15:40:57 +0100 swh-loader-core (0.0.25-1~swh1) unstable-swh; urgency=medium * v0.0.25 * swh.loader.core: Fix to retrieve the provider_id as an actual id * swh.loader.core: Fix log format error * swh.loader.core: Align log message according to conventions -- Antoine R. Dumont (@ardumont) Wed, 29 Nov 2017 12:55:45 +0100 swh-loader-core (0.0.24-1~swh1) unstable-swh; urgency=medium * v0.0.24 * Added metadata injection possible from loader core -- Antoine R. Dumont (@ardumont) Fri, 24 Nov 2017 11:35:40 +0100 swh-loader-core (0.0.23-1~swh1) unstable-swh; urgency=medium * v0.0.23 * loader: Fix dangling data flush -- Antoine R. Dumont (@ardumont) Tue, 07 Nov 2017 16:25:20 +0100 swh-loader-core (0.0.22-1~swh1) unstable-swh; urgency=medium * v0.0.22 * core.loader: Use the global setup set in swh.core.config * core.loader: Properly batch object insertions for big requests -- Antoine R. Dumont (@ardumont) Mon, 30 Oct 2017 18:50:00 +0100 swh-loader-core (0.0.21-1~swh1) unstable-swh; urgency=medium * v0.0.21 * swh.loader.core: Only send origin if not already sent before -- Antoine R. Dumont (@ardumont) Tue, 24 Oct 2017 16:30:53 +0200 swh-loader-core (0.0.20-1~swh1) unstable-swh; urgency=medium * v0.0.20 * Permit to add 'post_load' actions in loaders -- Antoine R. Dumont (@ardumont) Fri, 13 Oct 2017 14:30:37 +0200 swh-loader-core (0.0.19-1~swh1) unstable-swh; urgency=medium * v0.0.19 * Permit to add 'post_load' actions in loaders -- Antoine R. Dumont (@ardumont) Fri, 13 Oct 2017 14:14:14 +0200 swh-loader-core (0.0.18-1~swh1) unstable-swh; urgency=medium * Release swh.loader.core version 0.0.18 * Update packaging runes -- Nicolas Dandrimont Thu, 12 Oct 2017 18:07:53 +0200 swh-loader-core (0.0.17-1~swh1) unstable-swh; urgency=medium * Release swh.loader.core v0.0.17 * Allow iterating when fetching and storing data * Allow overriding the status of the loaded visit * Allow overriding the status of the load itself -- Nicolas Dandrimont Wed, 11 Oct 2017 16:38:29 +0200 swh-loader-core (0.0.16-1~swh1) unstable-swh; urgency=medium * Release swh.loader.core v0.0.16 * Migrate from swh.model.git to swh.model.from_disk -- Nicolas Dandrimont Fri, 06 Oct 2017 14:46:41 +0200 swh-loader-core (0.0.15-1~swh1) unstable-swh; urgency=medium * v0.0.15 * docs: Add sphinx apidoc generation skeleton * docs: Add a simple README.md explaining the module's goal * swh.loader.core.loader: Unify origin_visit add/update function call -- Antoine R. Dumont (@ardumont) Fri, 29 Sep 2017 11:47:37 +0200 swh-loader-core (0.0.14-1~swh1) unstable-swh; urgency=medium * v0.0.14 * Add the blake2s256 hash computation -- Antoine R. Dumont (@ardumont) Sat, 25 Mar 2017 18:20:52 +0100 swh-loader-core (0.0.13-1~swh1) unstable-swh; urgency=medium * v0.0.13 * Improve core loader's interface api -- Antoine R. Dumont (@ardumont) Wed, 22 Feb 2017 13:43:54 +0100 swh-loader-core (0.0.12-1~swh1) unstable-swh; urgency=medium * v0.0.12 * Update storage configuration reading -- Antoine R. Dumont (@ardumont) Thu, 15 Dec 2016 18:34:41 +0100 swh-loader-core (0.0.11-1~swh1) unstable-swh; urgency=medium * v0.0.11 * d/control: Bump dependency to latest storage * Fix: Objects can be injected even though global loading failed * Populate the counters in fetch_history * Open open/close fetch_history function in the core loader -- Antoine R. Dumont (@ardumont) Wed, 24 Aug 2016 14:38:55 +0200 swh-loader-core (0.0.10-1~swh1) unstable-swh; urgency=medium * v0.0.10 * d/control: Update dependency -- Antoine R. Dumont (@ardumont) Sat, 11 Jun 2016 02:26:50 +0200 swh-loader-core (0.0.9-1~swh1) unstable-swh; urgency=medium * v0.0.9 * Improve default task that initialize storage as well -- Antoine R. Dumont (@ardumont) Fri, 10 Jun 2016 15:12:14 +0200 swh-loader-core (0.0.8-1~swh1) unstable-swh; urgency=medium * v0.0.8 * Migrate specific converter to the right module * Fix dangling parameter -- Antoine R. Dumont (@ardumont) Wed, 08 Jun 2016 18:09:23 +0200 swh-loader-core (0.0.7-1~swh1) unstable-swh; urgency=medium * v0.0.7 * Fix on revision conversion -- Antoine R. Dumont (@ardumont) Wed, 08 Jun 2016 16:19:02 +0200 swh-loader-core (0.0.6-1~swh1) unstable-swh; urgency=medium * v0.0.6 * d/control: Bump dependency on swh-model * d/control: Add missing description * Keep the abstraction for all entities * Align parameter definition order * Fix missing option in DEFAULT ones * Decrease verbosity * Fix missing origin_id assignment * d/rules: Add target to run tests during packaging -- Antoine R. Dumont (@ardumont) Wed, 08 Jun 2016 16:00:40 +0200 swh-loader-core (0.0.5-1~swh1) unstable-swh; urgency=medium * v0.0.5 -- Antoine R. Dumont (@ardumont) Wed, 25 May 2016 12:17:06 +0200 swh-loader-core (0.0.4-1~swh1) unstable-swh; urgency=medium * v0.0.4 * Rename package from python3-swh.loader to python3-swh.loader.core -- Antoine R. Dumont (@ardumont) Wed, 25 May 2016 11:44:48 +0200 swh-loader-core (0.0.3-1~swh1) unstable-swh; urgency=medium * v0.0.3 * Improve default configuration * Rename package from swh-loader-vcs to swh-loader -- Antoine R. Dumont (@ardumont) Wed, 25 May 2016 11:23:06 +0200 swh-loader-core (0.0.2-1~swh1) unstable-swh; urgency=medium * v0.0.2 * Fix: Flush data even when no data is sent to swh-storage -- Antoine R. Dumont (@ardumont) Tue, 24 May 2016 16:41:49 +0200 swh-loader-core (0.0.1-1~swh1) unstable-swh; urgency=medium * Initial release * v0.0.1 -- Antoine R. Dumont (@ardumont) Wed, 13 Apr 2016 16:54:47 +0200 diff --git a/debian/patches/deactivate-opam-tests.diff b/debian/patches/deactivate-opam-tests.diff new file mode 100644 index 0000000..10c7489 --- /dev/null +++ b/debian/patches/deactivate-opam-tests.diff @@ -0,0 +1,54 @@ +Index: swh-loader-core/swh/loader/package/opam/tests/test_opam.py +=================================================================== +--- swh-loader-core.orig/swh/loader/package/opam/tests/test_opam.py ++++ swh-loader-core/swh/loader/package/opam/tests/test_opam.py +@@ -96,6 +96,9 @@ def fake_opam_root(mocker, tmpdir, datad + assert mock_init.called, "This should be called when loader use this fixture" + + ++@pytest.mark.skip( ++ reason="opam > 2.0 not supported, see https://forge.softwareheritage.org/T3976" ++) + def test_opam_loader_no_opam_repository_fails(swh_storage, tmpdir, datadir): + """Running opam loader without a prepared opam repository fails""" + opam_url = f"file://{datadir}/fake_opam_repo" +@@ -122,6 +125,9 @@ def test_opam_loader_no_opam_repository_ + assert actual_load_status == {"status": "failed"} + + ++@pytest.mark.skip( ++ reason="opam > 2.0 not supported, see https://forge.softwareheritage.org/T3976" ++) + def test_opam_loader_one_version( + tmpdir, requests_mock_datadir, fake_opam_root, datadir, swh_storage + ): +@@ -192,6 +198,9 @@ def test_opam_loader_one_version( + } == stats + + ++@pytest.mark.skip( ++ reason="opam > 2.0 not supported, see https://forge.softwareheritage.org/T3976" ++) + def test_opam_loader_many_version( + tmpdir, requests_mock_datadir, fake_opam_root, datadir, swh_storage + ): +@@ -248,6 +257,9 @@ def test_opam_loader_many_version( + check_snapshot(expected_snapshot, swh_storage) + + ++@pytest.mark.skip( ++ reason="opam > 2.0 not supported, see https://forge.softwareheritage.org/T3976" ++) + def test_opam_release( + tmpdir, requests_mock_datadir, fake_opam_root, swh_storage, datadir + ): +@@ -321,6 +333,9 @@ def test_opam_release( + assert release.author == expected_package_info.author + + ++@pytest.mark.skip( ++ reason="opam > 2.0 not supported, see https://forge.softwareheritage.org/T3976" ++) + def test_opam_metadata( + tmpdir, requests_mock_datadir, fake_opam_root, swh_storage, datadir + ): diff --git a/debian/patches/series b/debian/patches/series new file mode 100644 index 0000000..852ad56 --- /dev/null +++ b/debian/patches/series @@ -0,0 +1 @@ +deactivate-opam-tests.diff diff --git a/docs/package-loader-tutorial.rst b/docs/package-loader-tutorial.rst index 3defab7..304936c 100644 --- a/docs/package-loader-tutorial.rst +++ b/docs/package-loader-tutorial.rst @@ -1,699 +1,699 @@ .. _package-loader-tutorial: Package Loader Tutorial ======================= In this tutorial, we will see how to write a loader for |swh| that loads packages from a package manager, such as PyPI or Debian's. First, you should be familiar with Python, unit-testing, |swh|'s :ref:`data-model` and :ref:`architecture`, and go through the :ref:`developer-setup`. Creating the files hierarchy ---------------------------- Once this is done, you should create a new directory (ie. a (sub)package from Python's point of view) for you loader. It can be either a subdirectory of ``swh-loader-core/swh/loader/package/`` like the other package loaders, or it can be in its own package. If you choose the latter, you should also create the base file of any Python package (such as ``setup.py``), you should import them from the `swh-py-template`_ repository. In the rest of this tutorial, we will assume you chose the former and your loader is named "New Loader", so your package loader is in ``swh-loader-core/swh/loader/package/newloader/``. Next, you should create boilerplate files needed for SWH loaders: ``__init__.py``, ``tasks.py``, ``tests/__init__.py``, and ``tests/test_tasks.py``; copy them from an existing package, such as ``swh-loader-core/swh/loader/package/pypi/``, and replace the names in those with your loader's. Finally, create an `entrypoint`_ in :file:`setup.py`, so your loader can be discovered by the SWH Celery workers:: entry_points=""" [swh.workers] loader.newloader=swh.loader.package.newloader:register """, .. _swh-py-template: https://forge.softwareheritage.org/source/swh-py-template/ .. _entrypoint: https://setuptools.readthedocs.io/en/latest/userguide/entry_point.html Writing a minimal loader ------------------------ It is now time for the interesting part: writing the code to load packages from a package manager into the |swh| archive. Create a file named :file:`loader.py` in your package's directory, with two empty classes (replace the names with what you think is relevant):: from typing import Optional import attr from swh.loader.package.loader import BasePackageInfo, PackageLoader from swh.model.model import Person, Release, Sha1Git, TimestampWithTimezone @attr.s class NewPackageInfo(BasePackageInfo): pass class NewLoader(PackageLoader[NewPackageInfo]): visit_type = "newloader" We now have to fill some of the methods declared by :class:`swh.loader.package.PackageLoader`: in your new ``NewLoader`` class. Listing versions ++++++++++++++++ ``get_versions`` should return the list of names of all versions of the origin defined at ``self.url`` by the default constructor; and ``get_default_version`` should return the name of the default version (usually the latest stable release). They are both implemented with an API call to the package repository. For example, for PyPI origin https://pypi.org/project/requests, this is done with a request to https://pypi.org/pypi/requests/json. Getting package information +++++++++++++++++++++++++++ Next, ``get_package_info`` takes as argument a version name (as returned by ``get_versions``) and yields ``(branch_name, p_info)`` tuples, where ``branch_name`` is a string and ``pkg_info`` is an instance of the ``NewPackageInfo`` class we defined earlier. Each of these tuples should match a single file the loader will download from the origin. Usually, there is only one file per versions, but this is not true for all package repositories (eg. CRAN and PyPI allow multiple version artifacts per version). As ``NewPackageInfo`` derives from :py:class:`swh.loader.package.BasePackageInfo`, it can be created like this:: return NewPackageInfo(url="https://...", filename="...-versionX.Y.tar.gz") The ``url`` must be a URL where to download the archive from. ``filename`` is optional, but it is nice to fill it when possible/relevant. The base ``PackageLoader`` will then take care of calling ``get_versions()`` to get all the versions, then call ``get_package_info()`` get the list of archives to download, download them, and load all the directories in the archive. This means you do not need to manage downloads yourself; and we are now done with interactions with the package repository. Building a release +++++++++++++++++++ The final step for your minimal loader to work, is to implement ``build_release``. This is a very important part, as it will create a release object that will be inserted in |swh|, as a link between origins and the directories. This function takes three important arguments: * ``p_info`` is an object returned by ``get_package_info()`` * ``uncompressed_path`` is the location on the disk where the base ``PackageLoader`` extracted the archive, so you can access files from the archive. * ``directory`` is an :term:`intrinsic identifier` of the directory that was loaded from the archive The way to implement it depends very much on how the package manager works, but here is a rough idea:: def build_release( self, p_info: NewPackageInfo, uncompressed_path: str, directory: Sha1Git ) -> Optional[Release]: author = Person(name="Jane Doe", email="jdoe@example.org") date = TimestampWithTimezone.from_iso8601("2021-04-01T11:55:20Z") return Release( name="v2.0.0", message="This is a new release of the project", author=author, date=date, target=directory, target_type=ObjectType.DIRECTORY, synthetic=True, ) The strings here are placeholders, and you should extract them from either the extracted archive (using ``uncompressed_path``), or from the package repository's API; see the :ref:`existing specifications ` for examples of values to use. The various classes used in this example are :py:class:`swh.model.model.Person`, :py:class:`swh.model.model.TimestampWithTimezone`, and :py:class:`swh.model.model.Release`. Note that you have access to the ``NewPackageInfo`` object created by ``get_package_info()``, so you can extend the ``NewPackageInfo`` class to pass data between these two functions. A few caveats: * Make sure the timezone matches the source's * ``Person`` can also be built with just a ``fullname``, if there aren't distinct fields for name and email. When in doubt, it's better to just write the ``fullname`` than try to parse it * ``author`` and ``committer`` (resp. ``date`` and ``committer_date``) may be different if the release was written and published by different people (resp. dates). This is only relevant when loading from VCS, so you can usually ignore it in you package loader. Running your loader +++++++++++++++++++ .. _docker-run-loader-cli: With Docker ^^^^^^^^^^^ We recommend you use our `Docker environment`_ to test your loader. In short, install Docker, ``cd`` to ``swh-environment/docker/``, then `edit docker-compose.override.yml`_ to insert your new loader in the Docker environment, something like this will do:: version: '2' services: swh-loader-core: volumes: - "$HOME/swh-environment/swh-loader-core:/src/swh-loader-core" Then start the Docker environment:: docker-compose start Then, you can run your loader:: docker-compose exec swh-loader swh loader run newloader "https://example.org/~jdoe/project/" where ``newloader`` is the name you registered as an entrypoint in ``setup.py`` and ``https://example.org/~jdoe/project/`` is the origin URL, that will be set as the ``self.url`` attribute of your loader. For example, to run the PyPI loader, the command would be:: docker-compose exec swh-loader swh loader run pypi "https://pypi.org/project/requests/" If you get this error, make sure you properly configured ``docker-compose.override.yml``:: Error: Invalid value for '[...]': invalid choice: newloader Without Docker ^^^^^^^^^^^^^^ If you do not want to use the Docker environment, you will need to start an :ref:`swh-storage` instance yourself, and create a config file that references it:: storage: cls: remote url: http://localhost:5002/ Or alternatively, this more efficient configuration:: storage: cls: pipeline steps: - cls: buffer min_batch_size: content: 10000 content_bytes: 104857600 directory: 1000 release: 1000 - cls: filter - cls: remote url: http://localhost:5002/ And run your loader with:: swh loader -C loader.yml run newloader "https://example.org/~jdoe/project/" where ``newloader`` is the name you registered as an entrypoint in ``setup.py`` and ``https://example.org/~jdoe/project/`` is the origin URL, that will be set as the ``self.url`` attribute of your loader. For example, with PyPI:: swh loader -C loader.yml run pypi "https://pypi.org/project/requests/" .. _Docker environment: https://forge.softwareheritage.org/source/swh-environment/browse/master/docker/ .. _edit docker-compose.override.yml: https://forge.softwareheritage.org/source/swh-environment/browse/master/docker/#install-a-swh-package-from Testing your loader +++++++++++++++++++ You must write tests for your loader. First, of course, unit tests for the internal functions of your loader, if any (eg. the functions used to extract metadata); but this is not covered in this tutorial. Most importantly, you should write integration tests for your loader, that will simulate an origin, run the loader, and check everything is loaded in the storage as it should be. As we do not want tests to directly query an origin (it makes tests flaky, hard to reproduce, and put unnecessary load on the origin), we usually mock it using the :py:func:`swh.core.pytest_plugin.requests_mock_datadir` fixture It works by creating a ``data/`` folder in your tests (such as ``swh/loader/package/newloader/tests/data/``) and downloading results from API calls there, in the structured documented in :py:func:`swh.core.pytest_plugin.requests_mock_datadir_factory` The files in the ``datadir/`` will then be served whenever the loader tries to access an URL. This is very dependent on the kind of repositories your loader will read from, so here is an example with the PyPI loader. The files ``swh/loader/package/pypi/tests/data/https_pypi.org/pypi_nexter_json`` and ``swh/loader/package/pypi/tests/data/https_files.pythonhosted.org/nexter-*`` are used in this test:: from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats def test_pypi_visit_1_release_with_2_artifacts(swh_storage, requests_mock_datadir): # Initialize the loader url = "https://pypi.org/project/nexter" loader = PyPILoader(swh_storage, url) # Run the loader, with a swh-storage instance, on the given URL. # HTTP calls will be mocked by the requests_mock_datadir fixture actual_load_status = loader.load() # Check the loader loaded exactly the snapshot we expected # (when writing your tests for the first time, you cannot know the # snapshot id without running your loader; so let it error and write # down the result here) expected_snapshot_id = hash_to_bytes("1394b2e59351a944cc763bd9d26d90ce8e8121a8") assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id.hex(), } # Check the content of the snapshot. (ditto) expected_snapshot = Snapshot( id=expected_snapshot_id, branches={ b"releases/1.1.0/nexter-1.1.0.zip": SnapshotBranch( target=hash_to_bytes("f7d43faeb65b64d3faa67e4f46559db57d26b9a4"), target_type=TargetType.RELEASE, ), b"releases/1.1.0/nexter-1.1.0.tar.gz": SnapshotBranch( target=hash_to_bytes("732bb9dc087e6015884daaebb8b82559be729b5a"), target_type=TargetType.RELEASE, ), }, ) check_snapshot(expected_snapshot, swh_storage) # Check the visit was properly created with the right type assert_last_visit_matches( swh_storage, url, status="full", type="pypi", snapshot=expected_snapshot.id ) # Then you could check the directory structure: directory_id = swh_storage.release_get( [hash_to_bytes("f7d43faeb65b64d3faa67e4f46559db57d26b9a4")] )[0].target entries = list(swh_storage.directory_ls(directory_id, recursive=True)) assert entries == [ ... ] Here are some scenarios you should test, when relevant: * No versions * One version * Two or more versions * More than one package per version, if relevant * Corrupt packages (missing metadata, ...), if relevant * API errors * etc. Making your loader incremental ------------------------------ .. important:: In the previous sections, you wrote a fully functional loader for a new type of package repository. This is great! Please tell us about it, and :ref:`submit it for review ` so we can give you some feedback early. Now, we will see a key optimization for any package loader: skipping packages it already downloaded, using :term:`extids `. The rough idea it to find some way to uniquely identify packages before downloading them and encode it in a short string, the ExtID. Using checksums +++++++++++++++ Ideally, this short string is a checksum of the archive, provided by the API before downloading the archive itself. This is ideal, because this ensures that we detect changes in the package's content even if it keeps the same name and version number. If this is not the case of the repository you want to load from, skip to the next subsection. This is used for example by the PyPI loader (with a sha256sum) and the NPM loader (with a sha1sum). The Debian loader uses a similar scheme: as a single package is assembled from a set of tarballs, it only uses the hash of the ``.dsc`` file, which itself contains a hash of all the tarballs. This is implemented by overriding the ``extid`` method of you ``NewPackageInfo`` class, that returns the type of the ExtID (see below) and the ExtID itself:: from swh.loader.package.loader import PartialExtID EXTID_TYPE: str = "pypi-archive-sha256" @attr.s class NewPackageInfo(BasePackageInfo): sha256: str def extid(self) -> PartialExtID: return (EXTID_TYPE, hash_to_bytes(self.sha256)) and the loader's ``get_package_info`` method sets the right value in the ``sha256`` attribute. Using a custom manifest +++++++++++++++++++++++ -Unfortunaly, this does not work for all packages, as some package repositories do +Unfortunately, this does not work for all packages, as some package repositories do not provide a checksum of the archives via their API. If this is the case of the repository you want to load from, you need to find a way around it. It highly depends on the repository, so this tutorial cannot cover how to do it. We do however provide an easy option that should work in most cases: creating a "manifest" of the archive with some metadata in it, and hashing it. For example, when loading from the GNU FTP servers, we have access to some metadata, that is somewhat good enough to deduplicate. We write them all in a string and hash that string. It is done like this:: import string @attr.s class ArchivePackageInfo(BasePackageInfo): length = attr.ib(type=int) """Size of the archive file""" time = attr.ib(type=Union[str, datetime.datetime]) """Timestamp of the archive file on the server""" version = attr.ib(type=str) EXTID_FORMAT = "package-manifest-sha256" MANIFEST_FORMAT = string.Template("$time $length $version $url") The default implementation of :py:func:`swh.loader.package.loader.BasePackageInfo.extid` will read this template, substitute the variables based on the object's attributes, compute the hash of the result, and return it. Note that, as mentioned before, this is not perfect because a tarball may be replaced with a different tarball of exactly the same length and modification time, and we won't detect it. But this is extremely unlikely, so we consider it to be good enough. Alternatively, if this is not good enough for your loader, you can simply not implement ExtIDs, and your loader will always load all tarballs. This can be bandwidth-heavy for both |swh| and the origin you are loaded from, so this decision should not be taken lightly. Choosing the ExtID type +++++++++++++++++++++++ The type of your ExtID should be a short ASCII string, that is both unique to your loader and descriptive of how it was computed. Why unique to the loader? Because different loaders may load the same archive differently. For example, if I was to create an archive with both a ``PKG-INFO`` and a ``package.json`` file, and submit it to both NPM and PyPI, both package repositories would have exactly the same tarball. But the NPM loader would create the release based on authorship info in ``package.json``, and the PyPI loader based on ``PKG-INFO``. But we do not want the PyPI loader to assume it already created a release itself, while the release was created by the NPM loader! And why descriptive? This is simply for future-proofing; in case your loader changes the format of the ExtID (eg. by using a different hash algorithm). Testing your incremental loading ++++++++++++++++++++++++++++++++ If you followed the steps above, your loader is now able to detect what packages it already downloaded and skip them. This is what we call an incremental loader. It is now time to write tests to make sure your loader fulfills this promise. This time, we want to use ``requests_mock_datadir_visits`` instead of ``requests_mock_datadir``, because we want to mock the repository's API to emulate its results changing over time (eg. because a new version was published between two runs of the loader). See the documentation of :py:func:`swh.core.pytest_plugin.requests_mock_datadir_factory` for a description of the file layout to use. Let's take, once again, a look at ``swh/loader/package/pypi/tests/test_pypi.py``, to use as an example:: def test_pypi_incremental_visit(swh_storage, requests_mock_datadir_visits): """With prior visit, 2nd load will result with a different snapshot """ # Initialize the loader url = "https://pypi.org/project/0805nexter" loader = PyPILoader(swh_storage, url) # First visit visit1_actual_load_status = loader.load() visit1_stats = get_stats(swh_storage) # Make sure everything is in order expected_snapshot_id = hash_to_bytes("ba6e158ada75d0b3cfb209ffdf6daa4ed34a227a") assert visit1_actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id.hex(), } assert_last_visit_matches( swh_storage, url, status="full", type="pypi", snapshot=expected_snapshot_id ) assert { "content": 6, "directory": 4, "origin": 1, "origin_visit": 1, "release": 0, "release": 2, "skipped_content": 0, "snapshot": 1, } == visit1_stats # Reset internal state del loader._cached__raw_info del loader._cached_info # Second visit visit2_actual_load_status = loader.load() visit2_stats = get_stats(swh_storage) # Check the result of the visit assert visit2_actual_load_status["status"] == "eventful", visit2_actual_load_status expected_snapshot_id2 = hash_to_bytes("2e5149a7b0725d18231a37b342e9b7c4e121f283") assert visit2_actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id2.hex(), } assert_last_visit_matches( swh_storage, url, status="full", type="pypi", snapshot=expected_snapshot_id2 ) assert { "content": 6 + 1, # 1 more content "directory": 4 + 2, # 2 more directories "origin": 1, "origin_visit": 1 + 1, "release": 2 + 1, # 1 more release "revision": 0, "skipped_content": 0, "snapshot": 1 + 1, # 1 more snapshot } == visit2_stats # Check all content objects were loaded expected_contents = map( hash_to_bytes, [ "a61e24cdfdab3bb7817f6be85d37a3e666b34566", "938c33483285fd8ad57f15497f538320df82aeb8", "a27576d60e08c94a05006d2e6d540c0fdb5f38c8", "405859113963cb7a797642b45f171d6360425d16", "e5686aa568fdb1d19d7f1329267082fe40482d31", "83ecf6ec1114fd260ca7a833a2d165e71258c338", "92689fa2b7fb4d4fc6fb195bf73a50c87c030639", ], ) assert list(swh_storage.content_missing_per_sha1(expected_contents)) == [] # Check all directory objects were loaded expected_dirs = map( hash_to_bytes, [ "05219ba38bc542d4345d5638af1ed56c7d43ca7d", "cf019eb456cf6f78d8c4674596f1c9a97ece8f44", "b178b66bd22383d5f16f4f5c923d39ca798861b4", "c3a58f8b57433a4b56caaa5033ae2e0931405338", "e226e7e4ad03b4fc1403d69a18ebdd6f2edd2b3a", "52604d46843b898f5a43208045d09fcf8731631b", ], ) assert list(swh_storage.directory_missing(expected_dirs)) == [] # etc. Loading metadata ---------------- Finally, an optional step: collecting and loading :term:`extrinsic metadata`. This is metadata that your loader may collect while loading an origin. For example, the PyPI loader collects some parts of the API response (eg. https://pypi.org/pypi/requests/json) They are stored as raw bytestring, along with a format (an ASCII string) and a date of discovery (usually the time your loader ran). This is done by adding them to the ``directory_extrinsic_metadata`` attribute of your ``NewPackageInfo`` object when creating it in ``get_package_info`` as :class:`swh.loader.package.loader.RawExtrinsicMetadataCore` objects:: NewPackageInfo( ..., directory_extrinsic_metadata=[ RawExtrinsicMetadataCore( format="new-format", metadata=b"foo bar baz", discovery_date=datetime.datetime(...), ) ] ) ``format`` should be a human-readable ASCII string that unambiguously describes the format. Readers of the metadata object will have a built-in list of formats they understand, and will check if your metadata object is among them. You should use one of the :ref:`known metadata formats ` if possible, or add yours to this list. ``metadata`` is the metadata object itself. When possible, it should be copied verbatim from the source object you got, and should not be created by the loader. If this is not possible, for example because it is extracted from a larger JSON or XML document, make sure you do as little modifications as possible to reduce the risks of corruption. ``discovery_date`` is optional, and defaults to the time your loader started working. In theory, you can write extrinsic metadata on any kind of objects, eg. by implementing :py:meth:`swh.loader.package.loader.PackageLoader.get_extrinsic_origin_metadata`, :py:meth:`swh.loader.package.loader.PackageLoader.get_extrinsic_snapshot_metadata`; but this is rarely relevant in practice. Be sure to check if loader can find any potentially interesting metadata, though! You also need to implement a new method on your loader class, to return information on where the metadata is coming from, called a metadata authority. This authority is identified by a URI, such as ``https://github.com/`` for GitHub, ``https://pypi.org/`` for PyPI, etc. For example:: from swh.model.model import MetadataAuthority, MetadataAuthorityType def get_metadata_authority(self): return MetadataAuthority( type=MetadataAuthorityType.FORGE, url="https://pypi.org/", ) If your loader supports loading from different instances (like GitLab), you can define the authority dynamically based on the URL of the origin:: def get_metadata_authority(self): p_url = urlparse(self.url) return MetadataAuthority( type=MetadataAuthorityType.FORGE, url=f"{p_url.scheme}://{p_url.netloc}/", ) Checklist --------- Before the final addition of a new loader, here is a list of things to check for. Most of them are a reminder of other sections above. * There is (or will be) a lister to trigger it * Tested with pytest, from scratch and incrementally (if relevant) * Tested in Docker, from scratch and incrementally (if relevant) * Release fields are consistent with the :ref:`existing specifications `, and you updated the specifications to add your loader. They must be explicitly tested. * Relevant metadata are loaded with as little processing as possible (ie. keep the original format unchanged, instead of converting it to a JSON/msgpack/... format) and :ref:`their format is documented `. They must tested as well. * There is no risk of extid clashes, even across instances (if relevant), even in presence of malicious actors (as far as reasonably possible) Final words ----------- Congratulations, you made it to the end. If you have not already, please `contact us`_ to tell us about your new loader, and :ref:`submit your loader for review ` on our forge so we can merge it and run it along our other loaders to archive more repositories. And if you have any change in mind to improve this tutorial for future readers, please submit them too. Thank you for your contributions! .. _contact us: https://www.softwareheritage.org/community/developers/ diff --git a/swh.loader.core.egg-info/PKG-INFO b/swh.loader.core.egg-info/PKG-INFO index 1c6d837..3389dbe 100644 --- a/swh.loader.core.egg-info/PKG-INFO +++ b/swh.loader.core.egg-info/PKG-INFO @@ -1,56 +1,56 @@ Metadata-Version: 2.1 Name: swh.loader.core -Version: 2.5.0 +Version: 2.5.4 Summary: Software Heritage Base Loader Home-page: https://forge.softwareheritage.org/diffusion/DLDBASE Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-loader-core Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-loader-core/ Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Requires-Python: >=3.7 Description-Content-Type: text/markdown Provides-Extra: testing License-File: LICENSE License-File: AUTHORS Software Heritage - Loader foundations ====================================== The Software Heritage Loader Core is a low-level loading utilities and helpers used by :term:`loaders `. The main entry points are classes: - :class:`swh.loader.core.loader.BaseLoader` for loaders (e.g. svn) - :class:`swh.loader.core.loader.DVCSLoader` for DVCS loaders (e.g. hg, git, ...) - :class:`swh.loader.package.loader.PackageLoader` for Package loaders (e.g. PyPI, Npm, ...) Package loaders --------------- This package also implements many package loaders directly, out of convenience, as they usually are quite similar and each fits in a single file. They all roughly follow these steps, explained in the :py:meth:`swh.loader.package.loader.PackageLoader.load` documentation. See the :ref:`package-loader-tutorial` for details. VCS loaders ----------- Unlike package loaders, VCS loaders remain in separate packages, as they often need more advanced conversions and very VCS-specific operations. This usually involves getting the branches of a repository and recursively loading revisions in the history (and directory trees in these revisions), until a known revision is found diff --git a/swh.loader.core.egg-info/SOURCES.txt b/swh.loader.core.egg-info/SOURCES.txt index 50986f6..dbf32c7 100644 --- a/swh.loader.core.egg-info/SOURCES.txt +++ b/swh.loader.core.egg-info/SOURCES.txt @@ -1,214 +1,216 @@ .gitignore .pre-commit-config.yaml AUTHORS CODE_OF_CONDUCT.md CONTRIBUTORS LICENSE MANIFEST.in Makefile README.rst conftest.py mypy.ini pyproject.toml pytest.ini requirements-swh.txt requirements-test.txt requirements.txt setup.cfg setup.py tox.ini docs/.gitignore docs/Makefile docs/README.rst docs/cli.rst docs/conf.py docs/index.rst docs/package-loader-specifications.rst docs/package-loader-tutorial.rst docs/vcs-loader-overview.rst docs/_static/.placeholder docs/_templates/.placeholder swh/__init__.py swh.loader.core.egg-info/PKG-INFO swh.loader.core.egg-info/SOURCES.txt swh.loader.core.egg-info/dependency_links.txt swh.loader.core.egg-info/entry_points.txt swh.loader.core.egg-info/requires.txt swh.loader.core.egg-info/top_level.txt swh/loader/__init__.py swh/loader/cli.py swh/loader/exception.py swh/loader/pytest_plugin.py swh/loader/core/__init__.py swh/loader/core/converters.py swh/loader/core/loader.py swh/loader/core/py.typed swh/loader/core/utils.py swh/loader/core/tests/__init__.py swh/loader/core/tests/test_converters.py swh/loader/core/tests/test_loader.py swh/loader/core/tests/test_utils.py swh/loader/package/__init__.py swh/loader/package/loader.py swh/loader/package/py.typed swh/loader/package/utils.py swh/loader/package/archive/__init__.py swh/loader/package/archive/loader.py swh/loader/package/archive/tasks.py swh/loader/package/archive/tests/__init__.py swh/loader/package/archive/tests/test_archive.py swh/loader/package/archive/tests/test_tasks.py swh/loader/package/archive/tests/data/not_gzipped_tarball.tar.gz swh/loader/package/archive/tests/data/https_ftp.gnu.org/gnu_8sync_8sync-0.1.0.tar.gz swh/loader/package/archive/tests/data/https_ftp.gnu.org/gnu_8sync_8sync-0.1.0.tar.gz_visit1 swh/loader/package/archive/tests/data/https_ftp.gnu.org/gnu_8sync_8sync-0.1.0.tar.gz_visit2 swh/loader/package/archive/tests/data/https_ftp.gnu.org/gnu_8sync_8sync-0.2.0.tar.gz swh/loader/package/cran/__init__.py swh/loader/package/cran/loader.py swh/loader/package/cran/tasks.py swh/loader/package/cran/tests/__init__.py swh/loader/package/cran/tests/test_cran.py swh/loader/package/cran/tests/test_tasks.py swh/loader/package/cran/tests/data/description/KnownBR swh/loader/package/cran/tests/data/description/acepack swh/loader/package/cran/tests/data/https_cran.r-project.org/src_contrib_1.4.0_Recommended_KernSmooth_2.22-6.tar.gz swh/loader/package/debian/__init__.py swh/loader/package/debian/loader.py swh/loader/package/debian/tasks.py swh/loader/package/debian/tests/__init__.py swh/loader/package/debian/tests/test_debian.py swh/loader/package/debian/tests/test_tasks.py swh/loader/package/debian/tests/data/http_deb.debian.org/debian_pool_contrib_c_cicero_cicero_0.7.2-3.diff.gz swh/loader/package/debian/tests/data/http_deb.debian.org/debian_pool_contrib_c_cicero_cicero_0.7.2-3.dsc swh/loader/package/debian/tests/data/http_deb.debian.org/debian_pool_contrib_c_cicero_cicero_0.7.2-4.diff.gz swh/loader/package/debian/tests/data/http_deb.debian.org/debian_pool_contrib_c_cicero_cicero_0.7.2-4.dsc swh/loader/package/debian/tests/data/http_deb.debian.org/debian_pool_contrib_c_cicero_cicero_0.7.2.orig.tar.gz swh/loader/package/debian/tests/data/http_deb.debian.org/onefile.txt swh/loader/package/deposit/__init__.py swh/loader/package/deposit/loader.py swh/loader/package/deposit/tasks.py swh/loader/package/deposit/tests/__init__.py swh/loader/package/deposit/tests/conftest.py swh/loader/package/deposit/tests/test_deposit.py swh/loader/package/deposit/tests/test_tasks.py swh/loader/package/deposit/tests/data/https_deposit.softwareheritage.org/1_private_666_meta swh/loader/package/deposit/tests/data/https_deposit.softwareheritage.org/1_private_666_raw swh/loader/package/deposit/tests/data/https_deposit.softwareheritage.org/1_private_777_meta swh/loader/package/deposit/tests/data/https_deposit.softwareheritage.org/1_private_777_raw swh/loader/package/deposit/tests/data/https_deposit.softwareheritage.org/1_private_888_meta swh/loader/package/deposit/tests/data/https_deposit.softwareheritage.org/1_private_888_raw swh/loader/package/deposit/tests/data/https_deposit.softwareheritage.org/1_private_999_meta swh/loader/package/deposit/tests/data/https_deposit.softwareheritage.org/1_private_999_raw swh/loader/package/deposit/tests/data/https_deposit.softwareheritage.org/hello-2.10.zip swh/loader/package/deposit/tests/data/https_deposit.softwareheritage.org/hello-2.12.tar.gz swh/loader/package/deposit/tests/data/https_deposit.softwareheritage.org/hello_2.10.json swh/loader/package/deposit/tests/data/https_deposit.softwareheritage.org/hello_2.11.json swh/loader/package/deposit/tests/data/https_deposit.softwareheritage.org/hello_2.12.json swh/loader/package/deposit/tests/data/https_deposit.softwareheritage.org/hello_2.13.json swh/loader/package/maven/__init__.py swh/loader/package/maven/loader.py swh/loader/package/maven/tasks.py swh/loader/package/maven/tests/__init__.py swh/loader/package/maven/tests/test_maven.py swh/loader/package/maven/tests/test_tasks.py swh/loader/package/maven/tests/data/https_maven.org/sprova4j-0.1.0-sources.jar swh/loader/package/maven/tests/data/https_maven.org/sprova4j-0.1.0.pom swh/loader/package/maven/tests/data/https_maven.org/sprova4j-0.1.1-sources.jar swh/loader/package/maven/tests/data/https_maven.org/sprova4j-0.1.1.pom swh/loader/package/nixguix/__init__.py swh/loader/package/nixguix/loader.py swh/loader/package/nixguix/tasks.py swh/loader/package/nixguix/tests/__init__.py swh/loader/package/nixguix/tests/conftest.py swh/loader/package/nixguix/tests/test_nixguix.py swh/loader/package/nixguix/tests/test_tasks.py swh/loader/package/nixguix/tests/data/https_example.com/file.txt swh/loader/package/nixguix/tests/data/https_fail.com/truncated-archive.tgz swh/loader/package/nixguix/tests/data/https_ftp.gnu.org/gnu_8sync_8sync-0.1.0.tar.gz swh/loader/package/nixguix/tests/data/https_ftp.gnu.org/gnu_8sync_8sync-0.1.0.tar.gz_visit1 swh/loader/package/nixguix/tests/data/https_ftp.gnu.org/gnu_8sync_8sync-0.1.0.tar.gz_visit2 swh/loader/package/nixguix/tests/data/https_ftp.gnu.org/gnu_8sync_8sync-0.2.0.tar.gz swh/loader/package/nixguix/tests/data/https_github.com/owner-1_repository-1_revision-1.tgz swh/loader/package/nixguix/tests/data/https_github.com/owner-2_repository-1_revision-1.tgz swh/loader/package/nixguix/tests/data/https_github.com/owner-3_repository-1_revision-1.tgz swh/loader/package/nixguix/tests/data/https_nix-community.github.io/nixpkgs-swh_sources-EOFError.json swh/loader/package/nixguix/tests/data/https_nix-community.github.io/nixpkgs-swh_sources.json swh/loader/package/nixguix/tests/data/https_nix-community.github.io/nixpkgs-swh_sources.json_visit1 swh/loader/package/nixguix/tests/data/https_nix-community.github.io/nixpkgs-swh_sources_special.json swh/loader/package/nixguix/tests/data/https_nix-community.github.io/nixpkgs-swh_sources_special.json_visit1 swh/loader/package/npm/__init__.py swh/loader/package/npm/loader.py swh/loader/package/npm/tasks.py swh/loader/package/npm/tests/__init__.py swh/loader/package/npm/tests/test_npm.py swh/loader/package/npm/tests/test_tasks.py swh/loader/package/npm/tests/data/https_registry.npmjs.org/@aller_shared_-_shared-0.1.0.tgz swh/loader/package/npm/tests/data/https_registry.npmjs.org/@aller_shared_-_shared-0.1.1-alpha.14.tgz swh/loader/package/npm/tests/data/https_registry.npmjs.org/jammit-express_-_jammit-express-0.0.1.tgz swh/loader/package/npm/tests/data/https_registry.npmjs.org/nativescript-telerik-analytics_-_nativescript-telerik-analytics-1.0.0.tgz swh/loader/package/npm/tests/data/https_registry.npmjs.org/org_-_org-0.0.2.tgz swh/loader/package/npm/tests/data/https_registry.npmjs.org/org_-_org-0.0.3.tgz swh/loader/package/npm/tests/data/https_registry.npmjs.org/org_-_org-0.0.4.tgz swh/loader/package/npm/tests/data/https_registry.npmjs.org/org_-_org-0.0.5.tgz swh/loader/package/npm/tests/data/https_registry.npmjs.org/org_-_org-0.1.0.tgz swh/loader/package/npm/tests/data/https_registry.npmjs.org/org_-_org-0.2.0.tgz swh/loader/package/npm/tests/data/https_replicate.npmjs.com/@aller_shared swh/loader/package/npm/tests/data/https_replicate.npmjs.com/catify swh/loader/package/npm/tests/data/https_replicate.npmjs.com/jammit-express swh/loader/package/npm/tests/data/https_replicate.npmjs.com/jammit-no-time swh/loader/package/npm/tests/data/https_replicate.npmjs.com/nativescript-telerik-analytics swh/loader/package/npm/tests/data/https_replicate.npmjs.com/org swh/loader/package/npm/tests/data/https_replicate.npmjs.com/org_visit1 swh/loader/package/opam/__init__.py swh/loader/package/opam/loader.py swh/loader/package/opam/tasks.py swh/loader/package/opam/tests/__init__.py swh/loader/package/opam/tests/test_opam.py swh/loader/package/opam/tests/test_tasks.py -swh/loader/package/opam/tests/data/fake_opam_repo/repo +swh/loader/package/opam/tests/data/fake_opam_repo/_repo swh/loader/package/opam/tests/data/fake_opam_repo/version -swh/loader/package/opam/tests/data/fake_opam_repo/packages/agrid/agrid.0.1/opam -swh/loader/package/opam/tests/data/fake_opam_repo/packages/directories/directories.0.1/opam -swh/loader/package/opam/tests/data/fake_opam_repo/packages/directories/directories.0.2/opam -swh/loader/package/opam/tests/data/fake_opam_repo/packages/directories/directories.0.3/opam -swh/loader/package/opam/tests/data/fake_opam_repo/packages/ocb/ocb.0.1/opam +swh/loader/package/opam/tests/data/fake_opam_repo/repo/loadertest/lock +swh/loader/package/opam/tests/data/fake_opam_repo/repo/loadertest/repos-config +swh/loader/package/opam/tests/data/fake_opam_repo/repo/loadertest/packages/agrid/agrid.0.1/opam +swh/loader/package/opam/tests/data/fake_opam_repo/repo/loadertest/packages/directories/directories.0.1/opam +swh/loader/package/opam/tests/data/fake_opam_repo/repo/loadertest/packages/directories/directories.0.2/opam +swh/loader/package/opam/tests/data/fake_opam_repo/repo/loadertest/packages/directories/directories.0.3/opam +swh/loader/package/opam/tests/data/fake_opam_repo/repo/loadertest/packages/ocb/ocb.0.1/opam swh/loader/package/opam/tests/data/https_github.com/OCamlPro_agrid_archive_0.1.tar.gz swh/loader/package/opam/tests/data/https_github.com/OCamlPro_directories_archive_0.1.tar.gz swh/loader/package/opam/tests/data/https_github.com/OCamlPro_directories_archive_0.2.tar.gz swh/loader/package/opam/tests/data/https_github.com/OCamlPro_directories_archive_0.3.tar.gz swh/loader/package/opam/tests/data/https_github.com/OCamlPro_ocb_archive_0.1.tar.gz swh/loader/package/pypi/__init__.py swh/loader/package/pypi/loader.py swh/loader/package/pypi/tasks.py swh/loader/package/pypi/tests/__init__.py swh/loader/package/pypi/tests/test_pypi.py swh/loader/package/pypi/tests/test_tasks.py swh/loader/package/pypi/tests/data/https_files.pythonhosted.org/0805nexter-1.1.0.tar.gz swh/loader/package/pypi/tests/data/https_files.pythonhosted.org/0805nexter-1.1.0.zip swh/loader/package/pypi/tests/data/https_files.pythonhosted.org/0805nexter-1.2.0.zip swh/loader/package/pypi/tests/data/https_files.pythonhosted.org/0805nexter-1.3.0.zip swh/loader/package/pypi/tests/data/https_files.pythonhosted.org/0805nexter-1.4.0.zip swh/loader/package/pypi/tests/data/https_files.pythonhosted.org/nexter-1.1.0.tar.gz swh/loader/package/pypi/tests/data/https_files.pythonhosted.org/nexter-1.1.0.zip swh/loader/package/pypi/tests/data/https_files.pythonhosted.org/packages_70_97_c49fb8ec24a7aaab54c3dbfbb5a6ca1431419d9ee0f6c363d9ad01d2b8b1_0805nexter-1.3.0.zip swh/loader/package/pypi/tests/data/https_files.pythonhosted.org/packages_86_10_c9555ec63106153aaaad753a281ff47f4ac79e980ff7f5d740d6649cd56a_upymenu-0.0.1.tar.gz swh/loader/package/pypi/tests/data/https_files.pythonhosted.org/packages_c4_a0_4562cda161dc4ecbbe9e2a11eb365400c0461845c5be70d73869786809c4_0805nexter-1.2.0.zip swh/loader/package/pypi/tests/data/https_files.pythonhosted.org/packages_c4_a0_4562cda161dc4ecbbe9e2a11eb365400c0461845c5be70d73869786809c4_0805nexter-1.2.0.zip_visit1 swh/loader/package/pypi/tests/data/https_files.pythonhosted.org/packages_ec_65_c0116953c9a3f47de89e71964d6c7b0c783b01f29fa3390584dbf3046b4d_0805nexter-1.1.0.zip swh/loader/package/pypi/tests/data/https_files.pythonhosted.org/packages_ec_65_c0116953c9a3f47de89e71964d6c7b0c783b01f29fa3390584dbf3046b4d_0805nexter-1.1.0.zip_visit1 swh/loader/package/pypi/tests/data/https_pypi.org/pypi_0805nexter_json swh/loader/package/pypi/tests/data/https_pypi.org/pypi_0805nexter_json_visit1 swh/loader/package/pypi/tests/data/https_pypi.org/pypi_nexter_json swh/loader/package/pypi/tests/data/https_pypi.org/pypi_upymenu_json swh/loader/package/tests/__init__.py swh/loader/package/tests/common.py swh/loader/package/tests/test_conftest.py swh/loader/package/tests/test_loader.py swh/loader/package/tests/test_loader_metadata.py swh/loader/package/tests/test_utils.py swh/loader/tests/__init__.py swh/loader/tests/conftest.py swh/loader/tests/py.typed swh/loader/tests/test_cli.py swh/loader/tests/test_init.py swh/loader/tests/data/0805nexter-1.1.0.tar.gz \ No newline at end of file diff --git a/swh.loader.core.egg-info/entry_points.txt b/swh.loader.core.egg-info/entry_points.txt index 8be3745..442b094 100644 --- a/swh.loader.core.egg-info/entry_points.txt +++ b/swh.loader.core.egg-info/entry_points.txt @@ -1,14 +1,13 @@ +[swh.cli.subcommands] +loader = swh.loader.cli - [swh.cli.subcommands] - loader=swh.loader.cli - [swh.workers] - loader.archive=swh.loader.package.archive:register - loader.cran=swh.loader.package.cran:register - loader.debian=swh.loader.package.debian:register - loader.deposit=swh.loader.package.deposit:register - loader.nixguix=swh.loader.package.nixguix:register - loader.npm=swh.loader.package.npm:register - loader.opam=swh.loader.package.opam:register - loader.pypi=swh.loader.package.pypi:register - loader.maven=swh.loader.package.maven:register - \ No newline at end of file +[swh.workers] +loader.archive = swh.loader.package.archive:register +loader.cran = swh.loader.package.cran:register +loader.debian = swh.loader.package.debian:register +loader.deposit = swh.loader.package.deposit:register +loader.maven = swh.loader.package.maven:register +loader.nixguix = swh.loader.package.nixguix:register +loader.npm = swh.loader.package.npm:register +loader.opam = swh.loader.package.opam:register +loader.pypi = swh.loader.package.pypi:register diff --git a/swh/loader/cli.py b/swh/loader/cli.py index f23f7ff..53c4a11 100644 --- a/swh/loader/cli.py +++ b/swh/loader/cli.py @@ -1,130 +1,134 @@ # Copyright (C) 2019-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information # WARNING: do not import unnecessary things here to keep cli startup time under # control import logging from typing import Any import click import pkg_resources from swh.core.cli import CONTEXT_SETTINGS from swh.core.cli import swh as swh_cli_group logger = logging.getLogger(__name__) LOADERS = { entry_point.name.split(".", 1)[1]: entry_point for entry_point in pkg_resources.iter_entry_points("swh.workers") if entry_point.name.split(".", 1)[0] == "loader" } SUPPORTED_LOADERS = sorted(list(LOADERS)) def get_loader(name: str, **kwargs) -> Any: """Given a loader name, instantiate it. Args: name: Loader's name kwargs: Configuration dict (url...) Returns: An instantiated loader """ if name not in LOADERS: raise ValueError( "Invalid loader %s: only supported loaders are %s" % (name, SUPPORTED_LOADERS) ) registry_entry = LOADERS[name].load()() logger.debug(f"registry: {registry_entry}") loader_cls = registry_entry["loader"] logger.debug(f"loader class: {loader_cls}") return loader_cls.from_config(**kwargs) @swh_cli_group.group(name="loader", context_settings=CONTEXT_SETTINGS) @click.option( "--config-file", "-C", default=None, type=click.Path(exists=True, dir_okay=False,), help="Configuration file.", ) @click.pass_context def loader(ctx, config_file): """Loader cli tools """ from os import environ from swh.core.config import read ctx.ensure_object(dict) logger.debug("ctx: %s", ctx) if not config_file: config_file = environ.get("SWH_CONFIG_FILENAME") ctx.obj["config"] = read(config_file) logger.debug("config_file: %s", config_file) logger.debug("config: ", ctx.obj["config"]) @loader.command(name="run", context_settings=CONTEXT_SETTINGS) @click.argument("type", type=click.Choice(SUPPORTED_LOADERS)) @click.argument("url") @click.argument("options", nargs=-1) @click.pass_context def run(ctx, type, url, options): """Ingest with loader the origin located at """ import iso8601 from swh.scheduler.cli.utils import parse_options conf = ctx.obj.get("config", {}) if "storage" not in conf: raise ValueError("Missing storage configuration key") (_, kw) = parse_options(options) logger.debug(f"kw: {kw}") visit_date = kw.get("visit_date") if visit_date and isinstance(visit_date, str): visit_date = iso8601.parse_date(visit_date) kw["visit_date"] = visit_date loader = get_loader(type, url=url, storage=conf["storage"], **kw) result = loader.load() - click.echo(result) + msg = f"{result} for origin '{url}'" + directory = kw.get("directory") + if directory: + msg = msg + f" and directory '{directory}'" + click.echo(msg) @loader.command(name="list", context_settings=CONTEXT_SETTINGS) @click.argument("type", default="all", type=click.Choice(["all"] + SUPPORTED_LOADERS)) @click.pass_context def list(ctx, type): """List supported loaders and optionally their arguments""" import inspect if type == "all": loaders = ", ".join(SUPPORTED_LOADERS) click.echo(f"Supported loaders: {loaders}") else: registry_entry = LOADERS[type].load()() loader_cls = registry_entry["loader"] doc = inspect.getdoc(loader_cls).strip() # Hack to get the signature of the class even though it subclasses # Generic, which reimplements __new__. # See signature = inspect.signature(loader_cls.__init__) signature_str = str(signature).replace("self, ", "") click.echo(f"Loader: {doc}\nsignature: {signature_str}") diff --git a/swh/loader/package/deposit/loader.py b/swh/loader/package/deposit/loader.py index 229c577..3876e39 100644 --- a/swh/loader/package/deposit/loader.py +++ b/swh/loader/package/deposit/loader.py @@ -1,394 +1,384 @@ # Copyright (C) 2019-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import datetime from datetime import timezone import json import logging from typing import Any, Dict, Iterator, List, Mapping, Optional, Sequence, Tuple, Union import attr import requests from swh.core.config import load_from_envvar from swh.loader.core.loader import DEFAULT_CONFIG from swh.loader.package.loader import ( BasePackageInfo, PackageLoader, RawExtrinsicMetadataCore, ) from swh.loader.package.utils import cached_method, download from swh.model.hashutil import hash_to_bytes, hash_to_hex from swh.model.model import ( MetadataAuthority, MetadataAuthorityType, MetadataFetcher, ObjectType, Person, Release, Sha1Git, TimestampWithTimezone, ) from swh.storage.algos.snapshot import snapshot_get_all_branches from swh.storage.interface import StorageInterface logger = logging.getLogger(__name__) def now() -> datetime.datetime: return datetime.datetime.now(tz=timezone.utc) @attr.s class DepositPackageInfo(BasePackageInfo): filename = attr.ib(type=str) # instead of Optional[str] - raw_info = attr.ib(type=Dict[str, Any]) author_date = attr.ib(type=datetime.datetime) """codemeta:dateCreated if any, deposit completed_date otherwise""" commit_date = attr.ib(type=datetime.datetime) """codemeta:datePublished if any, deposit completed_date otherwise""" client = attr.ib(type=str) id = attr.ib(type=int) """Internal ID of the deposit in the deposit DB""" collection = attr.ib(type=str) """The collection in the deposit; see SWORD specification.""" author = attr.ib(type=Person) committer = attr.ib(type=Person) release_notes = attr.ib(type=Optional[str]) @classmethod def from_metadata( cls, metadata: Dict[str, Any], url: str, filename: str, version: str ) -> "DepositPackageInfo": # Note: # `date` and `committer_date` are always transmitted by the deposit read api # which computes itself the values. The loader needs to use those to create the # release. all_metadata_raw: List[str] = metadata["metadata_raw"] - raw_info = { - "origin": metadata["origin"], - "origin_metadata": { - "metadata": metadata["metadata_dict"], - "provider": metadata["provider"], - "tool": metadata["tool"], - }, - } depo = metadata["deposit"] return cls( url=url, filename=filename, version=version, author_date=depo["author_date"], commit_date=depo["committer_date"], client=depo["client"], id=depo["id"], collection=depo["collection"], author=parse_author(depo["author"]), committer=parse_author(depo["committer"]), release_notes=depo["release_notes"], - raw_info=raw_info, directory_extrinsic_metadata=[ RawExtrinsicMetadataCore( discovery_date=now(), metadata=raw_metadata.encode(), format="sword-v2-atom-codemeta-v2", ) for raw_metadata in all_metadata_raw ], ) def extid(self) -> None: # For now, we don't try to deduplicate deposits. There is little point anyway, # as it only happens when the exact same tarball was deposited twice. return None class DepositLoader(PackageLoader[DepositPackageInfo]): """Load a deposited artifact into swh archive. """ visit_type = "deposit" def __init__( self, storage: StorageInterface, url: str, deposit_id: str, deposit_client: "ApiClient", max_content_size: Optional[int] = None, default_filename: str = "archive.tar", ): """Constructor Args: url: Origin url to associate the artifacts/metadata to deposit_id: Deposit identity deposit_client: Deposit api client """ super().__init__(storage=storage, url=url, max_content_size=max_content_size) self.deposit_id = deposit_id self.client = deposit_client self.default_filename = default_filename @classmethod def from_configfile(cls, **kwargs: Any): """Instantiate a loader from the configuration loaded from the SWH_CONFIG_FILENAME envvar, with potential extra keyword arguments if their value is not None. Args: kwargs: kwargs passed to the loader instantiation """ config = dict(load_from_envvar(DEFAULT_CONFIG)) config.update({k: v for k, v in kwargs.items() if v is not None}) deposit_client = ApiClient(**config.pop("deposit")) return cls.from_config(deposit_client=deposit_client, **config) def get_versions(self) -> Sequence[str]: # only 1 branch 'HEAD' with no alias since we only have 1 snapshot # branch return ["HEAD"] def get_metadata_authority(self) -> MetadataAuthority: provider = self.metadata()["provider"] assert provider["provider_type"] == MetadataAuthorityType.DEPOSIT_CLIENT.value return MetadataAuthority( type=MetadataAuthorityType.DEPOSIT_CLIENT, url=provider["provider_url"], metadata={ "name": provider["provider_name"], **(provider["metadata"] or {}), }, ) def get_metadata_fetcher(self) -> MetadataFetcher: tool = self.metadata()["tool"] return MetadataFetcher( name=tool["name"], version=tool["version"], metadata=tool["configuration"], ) def get_package_info( self, version: str ) -> Iterator[Tuple[str, DepositPackageInfo]]: p_info = DepositPackageInfo.from_metadata( self.metadata(), url=self.url, filename=self.default_filename, version=version, ) yield "HEAD", p_info def download_package( self, p_info: DepositPackageInfo, tmpdir: str ) -> List[Tuple[str, Mapping]]: """Override to allow use of the dedicated deposit client """ return [self.client.archive_get(self.deposit_id, tmpdir, p_info.filename)] def build_release( self, p_info: DepositPackageInfo, uncompressed_path: str, directory: Sha1Git, ) -> Optional[Release]: message = ( f"{p_info.client}: Deposit {p_info.id} in collection {p_info.collection}" ) if p_info.release_notes: message += "\n\n" + p_info.release_notes if not message.endswith("\n"): message += "\n" return Release( name=p_info.version.encode(), message=message.encode(), author=p_info.author, date=TimestampWithTimezone.from_dict(p_info.author_date), target=directory, target_type=ObjectType.DIRECTORY, synthetic=True, ) def get_extrinsic_origin_metadata(self) -> List[RawExtrinsicMetadataCore]: metadata = self.metadata() all_metadata_raw: List[str] = metadata["metadata_raw"] origin_metadata = json.dumps( { "metadata": all_metadata_raw, "provider": metadata["provider"], "tool": metadata["tool"], } ).encode() return [ RawExtrinsicMetadataCore( discovery_date=now(), metadata=raw_meta.encode(), format="sword-v2-atom-codemeta-v2", ) for raw_meta in all_metadata_raw ] + [ RawExtrinsicMetadataCore( discovery_date=now(), metadata=origin_metadata, format="original-artifacts-json", ) ] @cached_method def metadata(self): """Returns metadata from the deposit server""" return self.client.metadata_get(self.deposit_id) def load(self) -> Dict: # First making sure the deposit is known on the deposit's RPC server # prior to trigger a loading try: self.metadata() except ValueError: logger.error(f"Unknown deposit {self.deposit_id}, ignoring") return {"status": "failed"} # Then usual loading return super().load() def finalize_visit( self, status_visit: str, errors: Optional[List[str]] = None, **kwargs ) -> Dict[str, Any]: r = super().finalize_visit(status_visit=status_visit, **kwargs) success = status_visit == "full" # Update deposit status try: if not success: self.client.status_update( self.deposit_id, status="failed", errors=errors, ) return r snapshot_id = hash_to_bytes(r["snapshot_id"]) snapshot = snapshot_get_all_branches(self.storage, snapshot_id) if not snapshot: return r branches = snapshot.branches logger.debug("branches: %s", branches) if not branches: return r rel_id = branches[b"HEAD"].target release = self.storage.release_get([rel_id])[0] if not release: return r # update the deposit's status to success with its # release-id and directory-id self.client.status_update( self.deposit_id, status="done", release_id=hash_to_hex(rel_id), directory_id=hash_to_hex(release.target), snapshot_id=r["snapshot_id"], origin_url=self.url, ) except Exception: logger.exception("Problem when trying to update the deposit's status") return {"status": "failed"} return r def parse_author(author) -> Person: """See prior fixme """ return Person( fullname=author["fullname"].encode("utf-8"), name=author["name"].encode("utf-8"), email=author["email"].encode("utf-8"), ) class ApiClient: """Private Deposit Api client """ def __init__(self, url, auth: Optional[Mapping[str, str]]): self.base_url = url.rstrip("/") self.auth = None if not auth else (auth["username"], auth["password"]) def do(self, method: str, url: str, *args, **kwargs): """Internal method to deal with requests, possibly with basic http authentication. Args: method (str): supported http methods as in get/post/put Returns: The request's execution output """ method_fn = getattr(requests, method) if self.auth: kwargs["auth"] = self.auth return method_fn(url, *args, **kwargs) def archive_get( self, deposit_id: Union[int, str], tmpdir: str, filename: str ) -> Tuple[str, Dict]: """Retrieve deposit's archive artifact locally """ url = f"{self.base_url}/{deposit_id}/raw/" return download(url, dest=tmpdir, filename=filename, auth=self.auth) def metadata_url(self, deposit_id: Union[int, str]) -> str: return f"{self.base_url}/{deposit_id}/meta/" def metadata_get(self, deposit_id: Union[int, str]) -> Dict[str, Any]: """Retrieve deposit's metadata artifact as json """ url = self.metadata_url(deposit_id) r = self.do("get", url) if r.ok: return r.json() msg = f"Problem when retrieving deposit metadata at {url}" logger.error(msg) raise ValueError(msg) def status_update( self, deposit_id: Union[int, str], status: str, errors: Optional[List[str]] = None, release_id: Optional[str] = None, directory_id: Optional[str] = None, snapshot_id: Optional[str] = None, origin_url: Optional[str] = None, ): """Update deposit's information including status, and persistent identifiers result of the loading. """ url = f"{self.base_url}/{deposit_id}/update/" payload: Dict[str, Any] = {"status": status} if release_id: payload["release_id"] = release_id if directory_id: payload["directory_id"] = directory_id if snapshot_id: payload["snapshot_id"] = snapshot_id if origin_url: payload["origin_url"] = origin_url if errors: payload["status_detail"] = {"loading": errors} self.do("put", url, json=payload) diff --git a/swh/loader/package/maven/tasks.py b/swh/loader/package/maven/tasks.py index 5be462d..49d2b0b 100644 --- a/swh/loader/package/maven/tasks.py +++ b/swh/loader/package/maven/tasks.py @@ -1,15 +1,15 @@ -# Copyright (C) 2021 The Software Heritage developers +# Copyright (C) 2021-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from celery import shared_task from swh.loader.package.maven.loader import MavenLoader @shared_task(name=__name__ + ".LoadMaven") -def load_jar_file(*, url=None, artifacts=None): - """Load jar's artifacts.""" +def load_maven(*, url=None, artifacts=None): + """Load maven jar artifacts.""" loader = MavenLoader.from_configfile(url=url, artifacts=artifacts) return loader.load() diff --git a/swh/loader/package/opam/loader.py b/swh/loader/package/opam/loader.py index d2a688f..a2bb808 100644 --- a/swh/loader/package/opam/loader.py +++ b/swh/loader/package/opam/loader.py @@ -1,259 +1,261 @@ # Copyright (C) 2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import io import os from subprocess import PIPE, Popen, call from typing import Iterator, List, Optional, Tuple import attr from swh.loader.package.loader import ( BasePackageInfo, PackageLoader, RawExtrinsicMetadataCore, ) from swh.loader.package.utils import cached_method from swh.model.model import ( MetadataAuthority, MetadataAuthorityType, ObjectType, Person, Release, Sha1Git, ) from swh.storage.interface import StorageInterface @attr.s class OpamPackageInfo(BasePackageInfo): author = attr.ib(type=Person) committer = attr.ib(type=Person) def opam_read( cmd: List[str], init_error_msg_if_any: Optional[str] = None ) -> Optional[str]: """This executes an opam command and returns the first line of the output. Args: cmd: Opam command to execute as a list of string init_error_msg_if_any: Error message to raise in case a problem occurs during initialization Raises: ValueError with the init_error_msg_if_any content in case stdout is not consumable and the variable is provided with non empty value. Returns: the first line of the executed command output """ with Popen(cmd, stdout=PIPE) as proc: if proc.stdout is not None: for line in io.TextIOWrapper(proc.stdout): # care only for the first line output result (mostly blank separated # values, callers will deal with the parsing of the line) return line elif init_error_msg_if_any: raise ValueError(init_error_msg_if_any) return None class OpamLoader(PackageLoader[OpamPackageInfo]): """Load all versions of a given package in a given opam repository. The state of the opam repository is stored in a directory called an opam root. This folder is a requisite for the opam binary to actually list information on package. When initialize_opam_root is False (the default for production workers), the opam root must already have been configured outside of the loading process. If not an error is raised, thus failing the loading. For standalone workers, initialize_opam_root must be set to True, so the ingestion can take care of installing the required opam root properly. The remaining ingestion uses the opam binary to give the versions of the given package. Then, for each version, the loader uses the opam binary to list the tarball url to fetch and ingest. """ visit_type = "opam" def __init__( self, storage: StorageInterface, url: str, opam_root: str, opam_instance: str, opam_url: str, opam_package: str, max_content_size: Optional[int] = None, initialize_opam_root: bool = False, ): super().__init__(storage=storage, url=url, max_content_size=max_content_size) self.opam_root = opam_root self.opam_instance = opam_instance self.opam_url = opam_url self.opam_package = opam_package self.initialize_opam_root = initialize_opam_root def get_package_dir(self) -> str: return ( f"{self.opam_root}/repo/{self.opam_instance}/packages/{self.opam_package}" ) def get_package_name(self, version: str) -> str: return f"{self.opam_package}.{version}" def get_package_file(self, version: str) -> str: return f"{self.get_package_dir()}/{self.get_package_name(version)}/opam" def get_metadata_authority(self): return MetadataAuthority(type=MetadataAuthorityType.FORGE, url=self.opam_url) @cached_method def _compute_versions(self) -> List[str]: """Compute the versions using opam internals Raises: ValueError in case the lister is not able to determine the list of versions Returns: The list of versions for the package """ # TODO: use `opam show` instead of this workaround when it support the `--repo` # flag package_dir = self.get_package_dir() + if not os.path.exists(package_dir): raise ValueError( f"can't get versions for package {self.opam_package} " f"(at url {self.url})." ) + versions = [ ".".join(version.split(".")[1:]) for version in os.listdir(package_dir) ] if not versions: raise ValueError( f"can't get versions for package {self.opam_package} " f"(at url {self.url})" ) versions.sort() return versions def get_versions(self) -> List[str]: """First initialize the opam root directory if needed then start listing the package versions. Raises: ValueError in case the lister is not able to determine the list of versions or if the opam root directory is invalid. """ if self.initialize_opam_root: # for standalone loader (e.g docker), loader must initialize the opam root # folder call( [ "opam", "init", "--reinit", "--bare", "--no-setup", "--root", self.opam_root, self.opam_instance, self.opam_url, ] ) else: # for standard/production loaders, no need to initialize the opam root # folder. It must be present though so check for it, if not present, raise if not os.path.isfile(os.path.join(self.opam_root, "config")): # so if not correctly setup, raise immediately raise ValueError("Invalid opam root") return self._compute_versions() def get_default_version(self) -> str: """Return the most recent version of the package as default.""" return self._compute_versions()[-1] def _opam_show_args(self, version: str): package_file = self.get_package_file(version) return [ "opam", "show", "--color", "never", "--safe", "--normalise", "--root", self.opam_root, "--file", package_file, ] def get_enclosed_single_line_field(self, field, version) -> Optional[str]: result = opam_read(self._opam_show_args(version) + ["--field", field]) # Sanitize the result if any (remove trailing \n and enclosing ") return result.strip().strip('"') if result else None def get_package_info(self, version: str) -> Iterator[Tuple[str, OpamPackageInfo]]: url = self.get_enclosed_single_line_field("url.src:", version) if url is None: raise ValueError( f"can't get field url.src: for version {version} of package {self.opam_package} \ (at url {self.url}) from `opam show`" ) authors_field = self.get_enclosed_single_line_field("authors:", version) fullname = b"" if authors_field is None else str.encode(authors_field) author = Person.from_fullname(fullname) maintainer_field = self.get_enclosed_single_line_field("maintainer:", version) fullname = b"" if maintainer_field is None else str.encode(maintainer_field) committer = Person.from_fullname(fullname) with Popen(self._opam_show_args(version) + ["--raw"], stdout=PIPE) as proc: assert proc.stdout is not None metadata = proc.stdout.read() yield self.get_package_name(version), OpamPackageInfo( url=url, filename=None, author=author, committer=committer, version=version, directory_extrinsic_metadata=[ RawExtrinsicMetadataCore( metadata=metadata, format="opam-package-definition", ) ], ) def build_release( self, p_info: OpamPackageInfo, uncompressed_path: str, directory: Sha1Git, ) -> Optional[Release]: msg = ( f"Synthetic release for OPAM source package {self.opam_package} " f"version {p_info.version}\n" ) return Release( name=p_info.version.encode(), author=p_info.author, message=msg.encode(), date=None, target=directory, target_type=ObjectType.DIRECTORY, synthetic=True, ) diff --git a/swh/loader/package/opam/tests/data/fake_opam_repo/repo b/swh/loader/package/opam/tests/data/fake_opam_repo/_repo similarity index 100% rename from swh/loader/package/opam/tests/data/fake_opam_repo/repo rename to swh/loader/package/opam/tests/data/fake_opam_repo/_repo diff --git a/swh/loader/package/opam/tests/data/fake_opam_repo/repo/loadertest/lock b/swh/loader/package/opam/tests/data/fake_opam_repo/repo/loadertest/lock new file mode 100644 index 0000000..e69de29 diff --git a/swh/loader/package/opam/tests/data/fake_opam_repo/packages/agrid/agrid.0.1/opam b/swh/loader/package/opam/tests/data/fake_opam_repo/repo/loadertest/packages/agrid/agrid.0.1/opam similarity index 100% rename from swh/loader/package/opam/tests/data/fake_opam_repo/packages/agrid/agrid.0.1/opam rename to swh/loader/package/opam/tests/data/fake_opam_repo/repo/loadertest/packages/agrid/agrid.0.1/opam diff --git a/swh/loader/package/opam/tests/data/fake_opam_repo/packages/directories/directories.0.1/opam b/swh/loader/package/opam/tests/data/fake_opam_repo/repo/loadertest/packages/directories/directories.0.1/opam similarity index 100% rename from swh/loader/package/opam/tests/data/fake_opam_repo/packages/directories/directories.0.1/opam rename to swh/loader/package/opam/tests/data/fake_opam_repo/repo/loadertest/packages/directories/directories.0.1/opam diff --git a/swh/loader/package/opam/tests/data/fake_opam_repo/packages/directories/directories.0.2/opam b/swh/loader/package/opam/tests/data/fake_opam_repo/repo/loadertest/packages/directories/directories.0.2/opam similarity index 100% rename from swh/loader/package/opam/tests/data/fake_opam_repo/packages/directories/directories.0.2/opam rename to swh/loader/package/opam/tests/data/fake_opam_repo/repo/loadertest/packages/directories/directories.0.2/opam diff --git a/swh/loader/package/opam/tests/data/fake_opam_repo/packages/directories/directories.0.3/opam b/swh/loader/package/opam/tests/data/fake_opam_repo/repo/loadertest/packages/directories/directories.0.3/opam similarity index 100% rename from swh/loader/package/opam/tests/data/fake_opam_repo/packages/directories/directories.0.3/opam rename to swh/loader/package/opam/tests/data/fake_opam_repo/repo/loadertest/packages/directories/directories.0.3/opam diff --git a/swh/loader/package/opam/tests/data/fake_opam_repo/packages/ocb/ocb.0.1/opam b/swh/loader/package/opam/tests/data/fake_opam_repo/repo/loadertest/packages/ocb/ocb.0.1/opam similarity index 100% rename from swh/loader/package/opam/tests/data/fake_opam_repo/packages/ocb/ocb.0.1/opam rename to swh/loader/package/opam/tests/data/fake_opam_repo/repo/loadertest/packages/ocb/ocb.0.1/opam diff --git a/swh/loader/package/opam/tests/data/fake_opam_repo/repo/loadertest/repos-config b/swh/loader/package/opam/tests/data/fake_opam_repo/repo/loadertest/repos-config new file mode 100644 index 0000000..5096094 --- /dev/null +++ b/swh/loader/package/opam/tests/data/fake_opam_repo/repo/loadertest/repos-config @@ -0,0 +1 @@ +repositories: "opam.ocaml.org" {"https://opam.ocaml.org"} diff --git a/swh/loader/package/opam/tests/test_opam.py b/swh/loader/package/opam/tests/test_opam.py index ade79ef..b37d971 100644 --- a/swh/loader/package/opam/tests/test_opam.py +++ b/swh/loader/package/opam/tests/test_opam.py @@ -1,355 +1,394 @@ -# Copyright (C) 2019-2021 The Software Heritage developers +# Copyright (C) 2019-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +from os.path import exists +import shutil + +import pytest + from swh.loader.package import __version__ from swh.loader.package.loader import RawExtrinsicMetadataCore from swh.loader.package.opam.loader import OpamLoader, OpamPackageInfo from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats from swh.model.hashutil import hash_to_bytes from swh.model.model import ( Person, RawExtrinsicMetadata, Release, Snapshot, SnapshotBranch, TargetType, ) from swh.model.model import MetadataAuthority, MetadataAuthorityType, MetadataFetcher from swh.model.model import ObjectType as ModelObjectType from swh.model.swhids import CoreSWHID, ExtendedObjectType, ExtendedSWHID, ObjectType from swh.storage.interface import PagedResult OCB_METADATA = b"""\ opam-version: "2.0" name: "ocb" version: "0.1" synopsis: "SVG badge generator" description: "An OCaml library for SVG badge generation. There\'s also a command-line tool provided." maintainer: "OCamlPro " authors: "OCamlPro " license: "ISC" homepage: "https://ocamlpro.github.io/ocb/" doc: "https://ocamlpro.github.io/ocb/api/" bug-reports: "https://github.com/OCamlPro/ocb/issues" depends: [ "ocaml" {>= "4.05"} "dune" {>= "2.0"} "odoc" {with-doc} ] build: [ ["dune" "subst"] {dev} [ "dune" "build" "-p" name "-j" jobs "@install" "@runtest" {with-test} "@doc" {with-doc} ] ] dev-repo: "git+https://github.com/OCamlPro/ocb.git" url { src: "https://github.com/OCamlPro/ocb/archive/0.1.tar.gz" checksum: [ "sha256=aa27684fbda1b8036ae7e3c87de33a98a9cd2662bcc91c8447e00e41476b6a46" "sha512=1260344f184dd8c8074b0439dbcc8a5d59550a654c249cd61913d4c150c664f37b76195ddca38f7f6646d08bddb320ceb8d420508450b4f09a233cd5c22e6b9b" ] } """ # noqa +@pytest.fixture +def fake_opam_root(mocker, tmpdir, datadir): + """Fixture to initialize the actual opam in test context. It mocks the actual opam init + calls and installs a fake opam root out of the one present in datadir. + + """ + # inhibits the real `subprocess.call` which prepares the required internal opam + # state + module_name = "swh.loader.package.opam.loader" + mock_init = mocker.patch(f"{module_name}.call", return_value=None) + + # Installs the fake opam root for the tests to use + fake_opam_root_src = f"{datadir}/fake_opam_repo" + fake_opam_root_dst = f"{tmpdir}/opam" + # old version does not support dirs_exist_ok... + # TypeError: copytree() got an unexpected keyword argument 'dirs_exist_ok' + # see: https://docs.python.org/3.7/library/shutil.html + if exists(fake_opam_root_dst): + shutil.rmtree(fake_opam_root_dst) + shutil.copytree(fake_opam_root_src, fake_opam_root_dst) + + yield fake_opam_root_dst + + # loader are initialized with `initialize_opam_root=True` so this should be called + assert mock_init.called, "This should be called when loader use this fixture" + + def test_opam_loader_no_opam_repository_fails(swh_storage, tmpdir, datadir): """Running opam loader without a prepared opam repository fails""" opam_url = f"file://{datadir}/fake_opam_repo" opam_root = tmpdir opam_instance = "loadertest" opam_package = "agrid" url = f"opam+{opam_url}/packages/{opam_package}" loader = OpamLoader( swh_storage, url, opam_root, opam_instance, opam_url, opam_package, - initialize_opam_root=False, # The opam directory must be present + initialize_opam_root=False, # The opam directory must be present and no init... ) # No opam root directory init directory from loader. So, at the opam root does not # exist, the loading fails. That's the expected use for the production workers # (whose opam_root maintenance will be externally managed). actual_load_status = loader.load() assert actual_load_status == {"status": "failed"} -def test_opam_loader_one_version(tmpdir, requests_mock_datadir, datadir, swh_storage): - +def test_opam_loader_one_version( + tmpdir, requests_mock_datadir, fake_opam_root, datadir, swh_storage +): opam_url = f"file://{datadir}/fake_opam_repo" - opam_root = tmpdir + opam_root = fake_opam_root opam_instance = "loadertest" opam_package = "agrid" url = f"opam+{opam_url}/packages/{opam_package}" loader = OpamLoader( swh_storage, url, opam_root, opam_instance, opam_url, opam_package, - initialize_opam_root=True, + initialize_opam_root=True, # go through the initialization while mocking it ) actual_load_status = loader.load() expected_snapshot_id = hash_to_bytes("e1159446b00745ba4daa7ee26d74fbd81ecc081c") assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id.hex(), } assert_last_visit_matches( swh_storage, url, status="full", type="opam", snapshot=expected_snapshot_id ) release_id = hash_to_bytes("d4d8d3df4f34609a3eeabd48aea49002c5f54f41") expected_snapshot = Snapshot( id=expected_snapshot_id, branches={ b"HEAD": SnapshotBranch(target=b"agrid.0.1", target_type=TargetType.ALIAS,), b"agrid.0.1": SnapshotBranch( target=release_id, target_type=TargetType.RELEASE, ), }, ) check_snapshot(expected_snapshot, swh_storage) assert swh_storage.release_get([release_id])[0] == Release( name=b"0.1", message=b"Synthetic release for OPAM source package agrid version 0.1\n", target=hash_to_bytes("00412ee5bc601deb462e55addd1004715116785e"), target_type=ModelObjectType.DIRECTORY, synthetic=True, author=Person.from_fullname(b"OCamlPro "), date=None, id=release_id, ) stats = get_stats(swh_storage) assert { "content": 18, "directory": 8, "origin": 1, "origin_visit": 1, "release": 1, "revision": 0, "skipped_content": 0, "snapshot": 1, } == stats -def test_opam_loader_many_version(tmpdir, requests_mock_datadir, datadir, swh_storage): +def test_opam_loader_many_version( + tmpdir, requests_mock_datadir, fake_opam_root, datadir, swh_storage +): opam_url = f"file://{datadir}/fake_opam_repo" - opam_root = tmpdir + opam_root = fake_opam_root opam_instance = "loadertest" opam_package = "directories" url = f"opam+{opam_url}/packages/{opam_package}" loader = OpamLoader( swh_storage, url, opam_root, opam_instance, opam_url, opam_package, initialize_opam_root=True, ) actual_load_status = loader.load() expected_snapshot_id = hash_to_bytes("f498f7f3b0edbce5cf5834b487a4f8360f6a6a43") assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id.hex(), } expected_snapshot = Snapshot( id=expected_snapshot_id, branches={ b"HEAD": SnapshotBranch( target=b"directories.0.3", target_type=TargetType.ALIAS, ), b"directories.0.1": SnapshotBranch( target=hash_to_bytes("1c88d466b3d57a619e296999322d096fa37bb1c2"), target_type=TargetType.RELEASE, ), b"directories.0.2": SnapshotBranch( target=hash_to_bytes("d6f30684039ad485511a138e2ae504ff67a13075"), target_type=TargetType.RELEASE, ), b"directories.0.3": SnapshotBranch( target=hash_to_bytes("6cf92c0ff052074e69ac18809a9c8198bcc2e746"), target_type=TargetType.RELEASE, ), }, ) assert_last_visit_matches( swh_storage, url, status="full", type="opam", snapshot=expected_snapshot_id ) check_snapshot(expected_snapshot, swh_storage) -def test_opam_release(tmpdir, requests_mock_datadir, swh_storage, datadir): +def test_opam_release( + tmpdir, requests_mock_datadir, fake_opam_root, swh_storage, datadir +): opam_url = f"file://{datadir}/fake_opam_repo" - opam_root = tmpdir + opam_root = fake_opam_root opam_instance = "loadertest" opam_package = "ocb" url = f"opam+{opam_url}/packages/{opam_package}" loader = OpamLoader( swh_storage, url, opam_root, opam_instance, opam_url, opam_package, initialize_opam_root=True, ) actual_load_status = loader.load() expected_snapshot_id = hash_to_bytes("8ba39f050243a72ca667c5587a87413240cbaa47") assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id.hex(), } info_iter = loader.get_package_info("0.1") branch_name, package_info = next(info_iter) expected_branch_name = "ocb.0.1" expected_package_info = OpamPackageInfo( url="https://github.com/OCamlPro/ocb/archive/0.1.tar.gz", filename=None, author=Person.from_fullname(b"OCamlPro "), committer=Person.from_fullname(b"OCamlPro "), version="0.1", directory_extrinsic_metadata=[ RawExtrinsicMetadataCore( metadata=OCB_METADATA, format="opam-package-definition", ) ], ) assert branch_name == expected_branch_name assert package_info == expected_package_info release_id = hash_to_bytes("c231e541eb29c712635ada394b04127ac69e9fb0") expected_snapshot = Snapshot( id=hash_to_bytes(actual_load_status["snapshot_id"]), branches={ b"HEAD": SnapshotBranch(target=b"ocb.0.1", target_type=TargetType.ALIAS,), b"ocb.0.1": SnapshotBranch( target=release_id, target_type=TargetType.RELEASE, ), }, ) assert_last_visit_matches( swh_storage, url, status="full", type="opam", snapshot=expected_snapshot.id ) check_snapshot(expected_snapshot, swh_storage) release = swh_storage.release_get([release_id])[0] assert release is not None assert release.author == expected_package_info.author -def test_opam_metadata(tmpdir, requests_mock_datadir, swh_storage, datadir): +def test_opam_metadata( + tmpdir, requests_mock_datadir, fake_opam_root, swh_storage, datadir +): opam_url = f"file://{datadir}/fake_opam_repo" - opam_root = tmpdir + opam_root = fake_opam_root opam_instance = "loadertest" opam_package = "ocb" url = f"opam+{opam_url}/packages/{opam_package}" loader = OpamLoader( swh_storage, url, opam_root, opam_instance, opam_url, opam_package, initialize_opam_root=True, ) actual_load_status = loader.load() assert actual_load_status["status"] == "eventful" expected_release_id = hash_to_bytes("c231e541eb29c712635ada394b04127ac69e9fb0") expected_snapshot = Snapshot( id=hash_to_bytes(actual_load_status["snapshot_id"]), branches={ b"HEAD": SnapshotBranch(target=b"ocb.0.1", target_type=TargetType.ALIAS,), b"ocb.0.1": SnapshotBranch( target=expected_release_id, target_type=TargetType.RELEASE, ), }, ) assert_last_visit_matches( swh_storage, url, status="full", type="opam", snapshot=expected_snapshot.id ) check_snapshot(expected_snapshot, swh_storage) release = swh_storage.release_get([expected_release_id])[0] assert release is not None release_swhid = CoreSWHID( object_type=ObjectType.RELEASE, object_id=expected_release_id ) directory_swhid = ExtendedSWHID( object_type=ExtendedObjectType.DIRECTORY, object_id=release.target ) metadata_authority = MetadataAuthority( type=MetadataAuthorityType.FORGE, url=opam_url, ) expected_metadata = [ RawExtrinsicMetadata( target=directory_swhid, authority=metadata_authority, fetcher=MetadataFetcher( name="swh.loader.package.opam.loader.OpamLoader", version=__version__, ), discovery_date=loader.visit_date, format="opam-package-definition", metadata=OCB_METADATA, origin=url, release=release_swhid, ) ] assert swh_storage.raw_extrinsic_metadata_get( directory_swhid, metadata_authority, ) == PagedResult(next_page_token=None, results=expected_metadata,)