diff --git a/PKG-INFO b/PKG-INFO index 79689b9..351c8fb 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,31 +1,31 @@ Metadata-Version: 2.1 Name: swh.vault -Version: 0.5.0 +Version: 0.5.1 Summary: Software Heritage vault Home-page: https://forge.softwareheritage.org/diffusion/DVAU/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-vault Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-vault/ Description: swh-vault ========= User-facing service that allows to retrieve parts of the archive as self-contained bundles. See the [documentation](https://docs.softwareheritage.org/devel/swh-vault/index.html) for more details. Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Requires-Python: >=3.7 Description-Content-Type: text/markdown Provides-Extra: testing diff --git a/debian/changelog b/debian/changelog index 44cef1f..be69f24 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,382 +1,384 @@ -swh-vault (0.5.0-1~swh1~bpo10+1) buster-swh; urgency=medium +swh-vault (0.5.1-1~swh1) unstable-swh; urgency=medium - * Rebuild for buster-swh + * New upstream release 0.5.1 - (tagged by Antoine Lambert + on 2021-04-29 14:42:43 +0200) + * Upstream changes: - version 0.5.1 - -- Software Heritage autobuilder (on jenkins-debian1) Tue, 08 Dec 2020 15:02:34 +0000 + -- Software Heritage autobuilder (on jenkins-debian1) Thu, 29 Apr 2021 12:48:13 +0000 swh-vault (0.5.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.5.0 - (tagged by Antoine R. Dumont (@ardumont) on 2020-12-08 15:58:26 +0100) * Upstream changes: - v0.5.0 - vault: Remove deprecated services default config - cli: Remove deprecated logging configuration -- Software Heritage autobuilder (on jenkins-debian1) Tue, 08 Dec 2020 15:01:11 +0000 swh-vault (0.4.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.4.0 - (tagged by Antoine R. Dumont (@ardumont) on 2020-11-23 13:50:22 +0100) * Upstream changes: - v0.4.0 - requirements-test.txt: Drop no longer needed test dependency - swh.vault.tests.conftest: Drop dead code -- Software Heritage autobuilder (on jenkins-debian1) Mon, 23 Nov 2020 12:52:25 +0000 swh-vault (0.3.4-1~swh1) unstable-swh; urgency=medium * New upstream release 0.3.4 - (tagged by Antoine R. Dumont (@ardumont) on 2020-11-23 11:35:47 +0100) * Upstream changes: - v0.3.4 - test_server: Fix exception structure - conftest: Explicitely declare aiohttp pytest plugin use -- Software Heritage autobuilder (on jenkins-debian1) Mon, 23 Nov 2020 10:37:50 +0000 swh-vault (0.3.3-1~swh1) unstable-swh; urgency=medium * New upstream release 0.3.3 - (tagged by Antoine R. Dumont (@ardumont) on 2020-11-18 18:02:35 +0100) * Upstream changes: - v0.3.3 - Fix api.server configuration adaptation issue -- Software Heritage autobuilder (on jenkins-debian1) Wed, 18 Nov 2020 18:40:45 +0000 swh-vault (0.3.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.3.1 - (tagged by Antoine R. Dumont (@ardumont) on 2020-11-17 17:46:37 +0100) * Upstream changes: - v0.3.1 - test_server: Simplify test server initialization to the minimum -- Software Heritage autobuilder (on jenkins-debian1) Tue, 17 Nov 2020 16:54:22 +0000 swh-vault (0.3.0-1~swh2) unstable-swh; urgency=medium * Fix dependency release -- Antoine R. Dumont (@ardumont) Tue, 17 Nov 2020 16:54:03 +0000 swh-vault (0.3.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.3.0 - (tagged by Antoine R. Dumont (@ardumont) on 2020-11-13 12:10:09 +0100) * Upstream changes: - v0.3.0 - Refactor vault configuration without the args indirection - vault.server: Introduce typed VaultInterface - Replace file modes literals to DentryPerms enum - Add tests on current configuration check for cooker instantiation - api.server: Add types and tests on configuration checks - swh.vault: Unify get_vault factory function with other factories - vault.tests: Make postgresql fixture faster -- Software Heritage autobuilder (on jenkins-debian1) Tue, 17 Nov 2020 16:22:52 +0000 swh-vault (0.2.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.2.0 - (tagged by Antoine R. Dumont (@ardumont) on 2020-10-19 09:52:04 +0200) * Upstream changes: - v0.2.0 - vault.config: Adapt scheduler configuration structure - test_cookers: Turn git_loader into a pytest fixture - tests: Fix loader git instantiation - tox.ini: pin black to the pre-commit version (19.10b0) to avoid flip- flops - Run isort after the CLI import changes -- Software Heritage autobuilder (on jenkins-debian1) Mon, 19 Oct 2020 07:54:03 +0000 swh-vault (0.1.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.1.0 - (tagged by David Douard on 2020-09-25 12:34:43 +0200) * Upstream changes: - v0.1.0 -- Software Heritage autobuilder (on jenkins-debian1) Fri, 25 Sep 2020 10:37:22 +0000 swh-vault (0.0.35-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.35 - (tagged by David Douard on 2020-09-11 15:15:26 +0200) * Upstream changes: - v0.0.35 -- Software Heritage autobuilder (on jenkins-debian1) Fri, 11 Sep 2020 13:18:50 +0000 swh-vault (0.0.34-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.34 - (tagged by Antoine Lambert on 2020-08-18 13:55:51 +0200) * Upstream changes: - version 0.0.34 -- Software Heritage autobuilder (on jenkins-debian1) Tue, 18 Aug 2020 11:58:22 +0000 swh-vault (0.0.33-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.33 - (tagged by Valentin Lorentz on 2020-05-05 17:56:33 +0200) * Upstream changes: - v0.0.33 - * Use swh-storage validation proxy. - * Use model objects to send to storage - * Add a pyproject.toml file to target py37 for black - * setup: Update the minimum required runtime python3 version - * setup.py: add documentation link - * Raise NotFoundExc within our RPC framework instead of returning 404. -- Software Heritage autobuilder (on jenkins-debian1) Tue, 05 May 2020 15:59:51 +0000 swh-vault (0.0.32-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.32 - (tagged by Antoine Lambert on 2020-02-05 13:00:19 +0100) * Upstream changes: - version 0.0.32 -- Software Heritage autobuilder (on jenkins-debian1) Wed, 05 Feb 2020 12:16:16 +0000 swh-vault (0.0.31-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.31 - (tagged by Stefano Zacchiroli on 2019-11-05 17:24:43 +0100) * Upstream changes: - v0.0.31 - * typing: minimal changes to make a no-op mypy run pass - * Remove indirection swh.vault.api.wsgi - * tox.ini: Fix py3 environment to use packaged tests - * CLI: drop obsolete alias "serve" for "rpc- serve" -- Software Heritage autobuilder (on jenkins-debian1) Tue, 05 Nov 2019 16:44:29 +0000 swh-vault (0.0.30-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.30 - (tagged by Antoine Lambert on 2019-07-29 11:17:23 +0200) * Upstream changes: - version 0.0.30 -- Software Heritage autobuilder (on jenkins-debian1) Mon, 29 Jul 2019 09:22:02 +0000 swh-vault (0.0.29-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.29 - (tagged by Antoine Lambert on 2019-05-23 11:39:12 +0200) * Upstream changes: - version 0.0.29 -- Software Heritage autobuilder (on jenkins-debian1) Thu, 23 May 2019 09:46:57 +0000 swh-vault (0.0.28-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.28 - (tagged by Antoine Lambert on 2019-05-23 11:00:51 +0200) * Upstream changes: - version 0.0.28 -- Software Heritage autobuilder (on jenkins-debian1) Thu, 23 May 2019 09:05:34 +0000 swh-vault (0.0.27-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.27 - (tagged by Antoine Lambert on 2019-05-07 14:44:26 +0200) * Upstream changes: - version 0.0.27 -- Software Heritage autobuilder (on jenkins-debian1) Tue, 07 May 2019 12:54:35 +0000 swh-vault (0.0.26-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.26 - (tagged by Antoine Lambert on 2019-04-26 11:59:23 +0200) * Upstream changes: - version 0.0.26 -- Software Heritage autobuilder (on jenkins-debian1) Fri, 26 Apr 2019 10:06:45 +0000 swh-vault (0.0.25-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.25 - (tagged by Antoine R. Dumont (@ardumont) on 2019-03-29 12:19:19 +0100) * Upstream changes: - v0.0.25 - master vault.backend: Migrate email address to bot@swh.org - API: use default's APIError exception instead of the VaultAPIError - Remove debian packaging from master branch -- Software Heritage autobuilder (on jenkins-debian1) Fri, 29 Mar 2019 11:28:28 +0000 swh-vault (0.0.24-1~swh3) unstable-swh; urgency=low * d/control: Update missing build dependency on postgresql-contrib -- Antoine Romain Dumont Mon, 18 Feb 2019 16:20:50 +0100 swh-vault (0.0.24-1~swh2) unstable-swh; urgency=low * d/control: Update missing build dependency on git * d/rules: Sanitize build locale -- Antoine Romain Dumont Mon, 18 Feb 2019 16:04:50 +0100 swh-vault (0.0.24-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.24 - (tagged by Antoine R. Dumont (@ardumont) on 2019-02-18 15:21:31 +0100) * Upstream changes: - v0.0.24 - MANIFEST.in: Fix packaging to include the sql schema definitions -- Software Heritage autobuilder (on jenkins-debian1) Mon, 18 Feb 2019 14:25:33 +0000 swh-vault (0.0.23-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.23 - (tagged by Antoine R. Dumont (@ardumont) on 2019-02-18 14:39:25 +0100) * Upstream changes: - v0.0.23 - test_cookers: Fix commit behavior when committing to another branch - Rewrite tests using pytest's fixtures and adapt them to recent refactorings - Normalize the configuration of VaultBackend and cooker - Make it possible to specify the config file via SWH_CONFIG_FILENAME env var - Refactor the VaultBackend to use BaseDb and pool-based db access - Add a swh.vault.api.wsgi module to instanciate the (singleton) wsgi app object -- Software Heritage autobuilder (on jenkins-debian1) Mon, 18 Feb 2019 13:48:28 +0000 swh-vault (0.0.22-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.22 - (tagged by Antoine R. Dumont (@ardumont) on 2019-02-14 10:12:41 +0100) * Upstream changes: - v0.0.22 - api/server: Do not read configuration at each request -- Software Heritage autobuilder (on jenkins-debian1) Thu, 14 Feb 2019 09:16:23 +0000 swh-vault (0.0.21-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.21 - (tagged by David Douard on 2019-02-07 17:38:49 +0100) * Upstream changes: - v0.0.21 -- Software Heritage autobuilder (on jenkins-debian1) Thu, 07 Feb 2019 16:44:51 +0000 swh-vault (0.0.20-1~swh1) unstable-swh; urgency=medium * v0.0.20 * swh.vault: Open a get_vault instantiation function * swh.vault.api.client: Permit to specify the query timeout option * swh.storage doesn't expose a db attribute any longer -- Antoine R. Dumont (@ardumont) Thu, 24 May 2018 12:31:50 +0200 swh-vault (0.0.19-1~swh1) unstable-swh; urgency=medium * version 0.0.19 -- Antoine Pietri Thu, 03 May 2018 17:49:18 +0200 swh-vault (0.0.18-1~swh1) unstable-swh; urgency=medium * version 0.0.18 -- Antoine Pietri Thu, 03 May 2018 17:10:24 +0200 swh-vault (0.0.17-1~swh1) unstable-swh; urgency=medium * version 0.0.17 -- Antoine Pietri Thu, 03 May 2018 13:16:59 +0200 swh-vault (0.0.16-1~swh1) unstable-swh; urgency=medium * version 0.0.16 -- Antoine Pietri Wed, 02 May 2018 13:41:05 +0200 swh-vault (0.0.15-1~swh1) unstable-swh; urgency=medium * version 0.0.15 -- Antoine Pietri Fri, 27 Apr 2018 18:46:06 +0200 swh-vault (0.0.14-1~swh1) unstable-swh; urgency=medium * version 0.0.14 -- Antoine Pietri Fri, 27 Apr 2018 17:11:50 +0200 swh-vault (0.0.13-1~swh1) unstable-swh; urgency=medium * version 0.0.13 -- Antoine Pietri Wed, 25 Apr 2018 15:52:33 +0200 swh-vault (0.0.12-1~swh1) unstable-swh; urgency=medium * version 0.0.12 -- Antoine Pietri Wed, 21 Feb 2018 15:30:25 +0100 swh-vault (0.0.11-1~swh1) unstable-swh; urgency=medium * version 0.0.11 -- Antoine Pietri Fri, 16 Feb 2018 16:09:10 +0100 swh-vault (0.0.10-1~swh1) unstable-swh; urgency=medium * version 0.0.10 -- Antoine Pietri Thu, 15 Feb 2018 16:08:05 +0100 swh-vault (0.0.9-1~swh1) unstable-swh; urgency=medium * version 0.0.9 -- Antoine Pietri Thu, 01 Feb 2018 18:21:29 +0100 swh-vault (0.0.8-1~swh1) unstable-swh; urgency=medium * version 0.0.8 -- Antoine Pietri Wed, 31 Jan 2018 17:54:55 +0100 swh-vault (0.0.7-1~swh1) unstable-swh; urgency=medium * version 0.0.7 -- Antoine Pietri Tue, 30 Jan 2018 18:21:07 +0100 swh-vault (0.0.6-1~swh1) unstable-swh; urgency=medium * version 0.0.6 -- Antoine Pietri Tue, 09 Jan 2018 16:37:41 +0100 swh-vault (0.0.5-1~swh1) unstable-swh; urgency=medium * version 0.0.5 -- Antoine Pietri Thu, 14 Dec 2017 19:33:01 +0100 swh-vault (0.0.4-1~swh1) unstable-swh; urgency=medium * version 0.0.4 -- Antoine Pietri Fri, 08 Dec 2017 15:33:54 +0100 swh-vault (0.0.3-1~swh1) unstable-swh; urgency=medium * version 0.0.3 -- Antoine Pietri Fri, 01 Dec 2017 15:31:34 +0100 swh-vault (0.0.2-1~swh1) unstable-swh; urgency=medium * version 0.0.2 -- Antoine Pietri Thu, 30 Nov 2017 15:50:43 +0100 swh-vault (0.0.1-1~swh1) unstable-swh; urgency=medium * Initial release * version 0.0.1 -- Antoine Pietri Mon, 13 Nov 2017 16:22:47 +0100 diff --git a/docs/api.rst b/docs/api.rst index 3983b19..f717513 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -1,179 +1,179 @@ .. _vault-api-ref: Vault API Reference =================== Software source code **objects**---e.g., individual files, directories, commits, tagged releases, etc.---are stored in the Software Heritage (SWH) Archive in fully deduplicated form. That allows direct access to individual artifacts, but require some preparation ("cooking") when fast access to a large set of related objects (e.g., an entire repository) is required. The **Software Heritage Vault** takes care of that preparation by asynchronously assembling **bundles** of related source code objects, caching, and garbage collecting them as needed. -The Vault is accessible via a RESTful API documented below. +The Vault is accessible via a RPC API documented below. All endpoints are mounted at API root, which is currently at https://archive.softwareheritage.org/api/1/. Unless otherwise stated, API endpoints respond to HTTP GET method. Object identification --------------------- The vault stores bundles corresponding to different kinds of objects (see :ref:`data-model`). The following object kinds are currently supported by the Vault: - directories - revisions - snapshots The URL fragment ``:objectkind/:objectid`` is used throughout the vault API to identify vault objects. The syntax and meaning of ``:objectid`` for the different object kinds is detailed below. In the case of revisions, a third parameter, ``:format``, must be used to specify the format of the resulting bundle. The URL fragment then becomes ``:objectkind/:objectid/:format``. Directories ~~~~~~~~~~~ - object kind: ``directory`` - URL fragment: ``directory/:dir_id`` where ``:dir_id`` is a :py:func:`directory identifier `. The only format available for a directory export is a gzip-compressed tarball. You can extract the resulting bundle using: .. code:: shell tar xaf bundle.tar.gz Revisions ~~~~~~~~~ - object kind: ``revision`` - URL fragment: ``revision/:rev_id/:format`` where ``:rev_id`` is a :py:func:`revision identifier ` and ``:format`` is the export format. The only format available for a revision export is ``gitfast``: a gzip-compressed `git fast-export `_. You can extract the resulting bundle using: .. code:: shell git init zcat bundle.gitfast.gz | git fast-import git checkout HEAD Repository snapshots ~~~~~~~~~~~~~~~~~~~~ .. TODO **(NOT AVAILABLE YET)** - object kind: ``snapshot`` - URL fragment: ``snapshot/:snp_id`` where ``:snp_id`` is a :py:func:`snapshot identifier `. Cooking and status checking --------------------------- Vault bundles might be ready for retrieval or not. When they are not, they will need to be **cooked** before they can be retrieved. A cooked bundle will remain around until it expires; after expiration, it will need to be cooked again before it can be retrieved. Cooking is idempotent, and a no-op in between a previous cooking operation and expiration. .. http:post:: /vault/:objectkind/:objectid[/:format] .. http:get:: /vault/:objectkind/:objectid[/:format] **Request body**: optionally, an ``email`` POST parameter containing an e-mail to notify when the bundle cooking has ended. **Allowed HTTP Methods:** - :http:method:`post` to **request** a bundle cooking - :http:method:`get` to check the progress and status of the cooking - :http:method:`head` - :http:method:`options` **Response:** :statuscode 200: bundle available for cooking, status of the cooking :statuscode 400: malformed identifier hash or format :statuscode 404: unavailable bundle or object not found .. sourcecode:: http HTTP/1.1 200 OK Content-Type: application/json { "id": 42, "fetch_url": "/api/1/vault/directory/:dir_id/raw/", "obj_id": ":dir_id", "obj_type": "directory", "progress_message": "Creating tarball...", "status": "pending" } After a cooking request has been started, all subsequent GET and POST requests to the cooking URL return some JSON data containing information about the progress of the bundle creation. The JSON contains the following keys: - ``id``: the ID of the cooking request - ``fetch_url``: the URL that can be used for the retrieval of the bundle - ``obj_type``: an internal identifier uniquely representing the object kind and the format of the required bundle. - ``obj_id``: the identifier of the requested bundle - ``progress_message``: a string describing the current progress of the cooking. If the cooking failed, ``progress_message`` will contain the reason of the failure. - ``status``: one of the following values: - ``new``: the bundle request was created - ``pending``: the bundle is being cooked - ``done``: the bundle has been cooked and is ready for retrieval - ``failed``: the bundle cooking failed and can be retried Retrieval --------- Retrieve a specific bundle from the vault with: .. http:get:: /vault/:objectkind/:objectid[/:format]/raw Where ``:format`` is optional, depending on the object kind. **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` **Response**: :statuscode 200: bundle available; response body is the bundle. :statuscode 404: unavailable bundle; client should request its cooking. diff --git a/docs/cli.rst b/docs/cli.rst new file mode 100644 index 0000000..8a543a9 --- /dev/null +++ b/docs/cli.rst @@ -0,0 +1,8 @@ +.. _swh-vault-cli: + +Command-line interface +====================== + +.. click:: swh.vault.cli:vault + :prog: swh vault + :nested: full diff --git a/docs/index.rst b/docs/index.rst index 62cbf1a..74ab82d 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,25 +1,26 @@ .. _swh-vault: Software Heritage - Vault ========================= User-facing service that allows to retrieve parts of the archive as self-contained bundles (e.g., individual releases, entire repository snapshots, etc.) .. toctree:: :maxdepth: 2 :caption: Contents: getting-started.rst api.rst Reference Documentation ----------------------- .. toctree:: :maxdepth: 2 + cli /apidoc/swh.vault diff --git a/requirements-test.txt b/requirements-test.txt index c32d259..6e6306f 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,6 +1,6 @@ pytest dulwich >= 0.18.7 swh.loader.core -swh.loader.git >= 0.0.52 +swh.loader.git >= 0.8 swh.storage[testing] pytest-mock diff --git a/swh.vault.egg-info/PKG-INFO b/swh.vault.egg-info/PKG-INFO index 79689b9..351c8fb 100644 --- a/swh.vault.egg-info/PKG-INFO +++ b/swh.vault.egg-info/PKG-INFO @@ -1,31 +1,31 @@ Metadata-Version: 2.1 Name: swh.vault -Version: 0.5.0 +Version: 0.5.1 Summary: Software Heritage vault Home-page: https://forge.softwareheritage.org/diffusion/DVAU/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-vault Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-vault/ Description: swh-vault ========= User-facing service that allows to retrieve parts of the archive as self-contained bundles. See the [documentation](https://docs.softwareheritage.org/devel/swh-vault/index.html) for more details. Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Requires-Python: >=3.7 Description-Content-Type: text/markdown Provides-Extra: testing diff --git a/swh.vault.egg-info/SOURCES.txt b/swh.vault.egg-info/SOURCES.txt index cda1c23..e24b29f 100644 --- a/swh.vault.egg-info/SOURCES.txt +++ b/swh.vault.egg-info/SOURCES.txt @@ -1,67 +1,68 @@ .gitignore .pre-commit-config.yaml AUTHORS CODE_OF_CONDUCT.md CONTRIBUTORS LICENSE MANIFEST.in Makefile README.md conftest.py mypy.ini pyproject.toml pytest.ini requirements-swh.txt requirements-test.txt requirements.txt setup.cfg setup.py tox.ini docs/.gitignore docs/Makefile docs/api.rst +docs/cli.rst docs/conf.py docs/getting-started.rst docs/index.rst docs/_static/.placeholder docs/_templates/.placeholder sql/upgrades/002.sql sql/upgrades/003.sql swh/__init__.py swh.vault.egg-info/PKG-INFO swh.vault.egg-info/SOURCES.txt swh.vault.egg-info/dependency_links.txt swh.vault.egg-info/entry_points.txt swh.vault.egg-info/not-zip-safe swh.vault.egg-info/requires.txt swh.vault.egg-info/top_level.txt swh/vault/__init__.py swh/vault/backend.py swh/vault/cache.py swh/vault/cli.py swh/vault/cooking_tasks.py swh/vault/exc.py swh/vault/interface.py swh/vault/py.typed swh/vault/to_disk.py swh/vault/api/__init__.py swh/vault/api/client.py swh/vault/api/server.py swh/vault/cookers/__init__.py swh/vault/cookers/base.py swh/vault/cookers/directory.py swh/vault/cookers/revision_flat.py swh/vault/cookers/revision_gitfast.py swh/vault/cookers/utils.py swh/vault/sql/30-schema.sql swh/vault/tests/__init__.py swh/vault/tests/conftest.py swh/vault/tests/test_backend.py swh/vault/tests/test_cache.py swh/vault/tests/test_cookers.py swh/vault/tests/test_cookers_base.py swh/vault/tests/test_init.py swh/vault/tests/test_init_cookers.py swh/vault/tests/test_server.py swh/vault/tests/test_to_disk.py swh/vault/tests/vault_testing.py \ No newline at end of file diff --git a/swh.vault.egg-info/requires.txt b/swh.vault.egg-info/requires.txt index f4f1db4..64ca232 100644 --- a/swh.vault.egg-info/requires.txt +++ b/swh.vault.egg-info/requires.txt @@ -1,19 +1,19 @@ click flask psycopg2 python-dateutil fastimport typing-extensions swh.core[db,http]>=0.5 swh.model>=0.3 swh.objstorage>=0.0.17 swh.scheduler>=0.7.0 swh.storage>=0.0.106 [testing] pytest dulwich>=0.18.7 swh.loader.core -swh.loader.git>=0.0.52 +swh.loader.git>=0.8 swh.storage[testing] pytest-mock diff --git a/swh/vault/tests/test_cookers.py b/swh/vault/tests/test_cookers.py index 714bfda..46d23c5 100644 --- a/swh/vault/tests/test_cookers.py +++ b/swh/vault/tests/test_cookers.py @@ -1,575 +1,565 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import contextlib import datetime import gzip import io import os import pathlib import subprocess import tarfile import tempfile import unittest import unittest.mock import dulwich.fastexport import dulwich.index import dulwich.objects import dulwich.porcelain import dulwich.repo import pytest from swh.loader.git.from_disk import GitLoaderFromDisk from swh.model import from_disk, hashutil from swh.model.model import Directory, DirectoryEntry, Person, Revision, RevisionType from swh.vault.cookers import DirectoryCooker, RevisionGitfastCooker from swh.vault.tests.vault_testing import hash_content from swh.vault.to_disk import HIDDEN_MESSAGE, SKIPPED_MESSAGE class TestRepo: """A tiny context manager for a test git repository, with some utility functions to perform basic git stuff. """ def __enter__(self): self.tmp_dir = tempfile.TemporaryDirectory(prefix="tmp-vault-repo-") self.repo_dir = self.tmp_dir.__enter__() self.repo = dulwich.repo.Repo.init(self.repo_dir) self.author_name = b"Test Author" self.author_email = b"test@softwareheritage.org" self.author = b"%s <%s>" % (self.author_name, self.author_email) self.base_date = 258244200 self.counter = 0 return pathlib.Path(self.repo_dir) def __exit__(self, exc, value, tb): self.tmp_dir.__exit__(exc, value, tb) def checkout(self, rev_sha): rev = self.repo[rev_sha] dulwich.index.build_index_from_tree( self.repo_dir, self.repo.index_path(), self.repo.object_store, rev.tree ) def git_shell(self, *cmd, stdout=subprocess.DEVNULL, **kwargs): name = self.author_name email = self.author_email date = "%d +0000" % (self.base_date + self.counter) env = { # Set git commit format "GIT_AUTHOR_NAME": name, "GIT_AUTHOR_EMAIL": email, "GIT_AUTHOR_DATE": date, "GIT_COMMITTER_NAME": name, "GIT_COMMITTER_EMAIL": email, "GIT_COMMITTER_DATE": date, # Ignore all the system-wide and user configurations "GIT_CONFIG_NOSYSTEM": "1", "HOME": str(self.tmp_dir), "XDG_CONFIG_HOME": str(self.tmp_dir), } kwargs.setdefault("env", {}).update(env) subprocess.check_call( ("git", "-C", self.repo_dir) + cmd, stdout=stdout, **kwargs ) def commit(self, message="Commit test\n", ref=b"HEAD"): """Commit the current working tree in a new commit with message on the branch 'ref'. At the end of the commit, the reference should stay the same and the index should be clean. """ self.git_shell("add", ".") message = message.encode() + b"\n" ret = self.repo.do_commit( message=message, committer=self.author, commit_timestamp=self.base_date + self.counter, commit_timezone=0, ref=ref, ) self.counter += 1 # committing on another branch leaves # dangling files in index if ref != b"HEAD": # XXX this should work (but does not) # dulwich.porcelain.reset(self.repo, 'hard') self.git_shell("reset", "--hard", "HEAD") return ret def merge(self, parent_sha_list, message="Merge branches."): self.git_shell( "merge", "--allow-unrelated-histories", "-m", message, *[p.decode() for p in parent_sha_list], ) self.counter += 1 return self.repo.refs[b"HEAD"] def print_debug_graph(self, reflog=False): args = ["log", "--all", "--graph", "--decorate"] if reflog: args.append("--reflog") self.git_shell(*args, stdout=None) @pytest.fixture -def swh_loader_config(swh_loader_config): - swh_loader_config["max_content_size"] = 100 * 1024 * 1024 - return swh_loader_config - - -@pytest.fixture -def git_loader( - swh_storage, swh_loader_config, -): +def git_loader(swh_storage,): """Instantiate a Git Loader using the storage instance as storage. """ def _create_loader(directory): - loader = GitLoaderFromDisk( + return GitLoaderFromDisk( + swh_storage, "fake_origin", directory=directory, visit_date=datetime.datetime.now(datetime.timezone.utc), - config=swh_loader_config, ) - loader.storage = swh_storage - return loader return _create_loader @contextlib.contextmanager def cook_extract_directory(storage, obj_id): """Context manager that cooks a directory and extract it.""" backend = unittest.mock.MagicMock() backend.storage = storage cooker = DirectoryCooker("directory", obj_id, backend=backend, storage=storage) cooker.fileobj = io.BytesIO() assert cooker.check_exists() cooker.prepare_bundle() cooker.fileobj.seek(0) with tempfile.TemporaryDirectory(prefix="tmp-vault-extract-") as td: with tarfile.open(fileobj=cooker.fileobj, mode="r") as tar: tar.extractall(td) yield pathlib.Path(td) / hashutil.hash_to_hex(obj_id) cooker.storage = None @contextlib.contextmanager def cook_stream_revision_gitfast(storage, obj_id): """Context manager that cooks a revision and stream its fastexport.""" backend = unittest.mock.MagicMock() backend.storage = storage cooker = RevisionGitfastCooker( "revision_gitfast", obj_id, backend=backend, storage=storage ) cooker.fileobj = io.BytesIO() assert cooker.check_exists() cooker.prepare_bundle() cooker.fileobj.seek(0) fastexport_stream = gzip.GzipFile(fileobj=cooker.fileobj) yield fastexport_stream cooker.storage = None @contextlib.contextmanager def cook_extract_revision_gitfast(storage, obj_id): """Context manager that cooks a revision and extract it.""" test_repo = TestRepo() with cook_stream_revision_gitfast(storage, obj_id) as stream, test_repo as p: processor = dulwich.fastexport.GitImportProcessor(test_repo.repo) processor.import_stream(stream) yield test_repo, p TEST_CONTENT = ( " test content\n" "and unicode \N{BLACK HEART SUIT}\n" " and trailing spaces " ) TEST_EXECUTABLE = b"\x42\x40\x00\x00\x05" class TestDirectoryCooker: def test_directory_simple(self, git_loader): repo = TestRepo() with repo as rp: (rp / "file").write_text(TEST_CONTENT) (rp / "executable").write_bytes(TEST_EXECUTABLE) (rp / "executable").chmod(0o755) (rp / "link").symlink_to("file") (rp / "dir1/dir2").mkdir(parents=True) (rp / "dir1/dir2/file").write_text(TEST_CONTENT) c = repo.commit() loader = git_loader(str(rp)) loader.load() obj_id_hex = repo.repo[c].tree.decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) with cook_extract_directory(loader.storage, obj_id) as p: assert (p / "file").stat().st_mode == 0o100644 assert (p / "file").read_text() == TEST_CONTENT assert (p / "executable").stat().st_mode == 0o100755 assert (p / "executable").read_bytes() == TEST_EXECUTABLE assert (p / "link").is_symlink assert os.readlink(str(p / "link")) == "file" assert (p / "dir1/dir2/file").stat().st_mode == 0o100644 assert (p / "dir1/dir2/file").read_text() == TEST_CONTENT directory = from_disk.Directory.from_disk(path=bytes(p)) assert obj_id_hex == hashutil.hash_to_hex(directory.hash) def test_directory_filtered_objects(self, git_loader): repo = TestRepo() with repo as rp: file_1, id_1 = hash_content(b"test1") file_2, id_2 = hash_content(b"test2") file_3, id_3 = hash_content(b"test3") (rp / "file").write_bytes(file_1) (rp / "hidden_file").write_bytes(file_2) (rp / "absent_file").write_bytes(file_3) c = repo.commit() loader = git_loader(str(rp)) loader.load() obj_id_hex = repo.repo[c].tree.decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) # FIXME: storage.content_update() should be changed to allow things # like that with loader.storage.get_db().transaction() as cur: cur.execute( """update content set status = 'visible' where sha1 = %s""", (id_1,), ) cur.execute( """update content set status = 'hidden' where sha1 = %s""", (id_2,), ) cur.execute( """update content set status = 'absent' where sha1 = %s""", (id_3,), ) with cook_extract_directory(loader.storage, obj_id) as p: assert (p / "file").read_bytes() == b"test1" assert (p / "hidden_file").read_bytes() == HIDDEN_MESSAGE assert (p / "absent_file").read_bytes() == SKIPPED_MESSAGE def test_directory_bogus_perms(self, git_loader): # Some early git repositories have 664/775 permissions... let's check # if all the weird modes are properly normalized in the directory # cooker. repo = TestRepo() with repo as rp: (rp / "file").write_text(TEST_CONTENT) (rp / "file").chmod(0o664) (rp / "executable").write_bytes(TEST_EXECUTABLE) (rp / "executable").chmod(0o775) (rp / "wat").write_text(TEST_CONTENT) (rp / "wat").chmod(0o604) c = repo.commit() loader = git_loader(str(rp)) loader.load() obj_id_hex = repo.repo[c].tree.decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) with cook_extract_directory(loader.storage, obj_id) as p: assert (p / "file").stat().st_mode == 0o100644 assert (p / "executable").stat().st_mode == 0o100755 assert (p / "wat").stat().st_mode == 0o100644 def test_directory_revision_data(self, swh_storage): target_rev = "0e8a3ad980ec179856012b7eecf4327e99cd44cd" dir = Directory( entries=( DirectoryEntry( name=b"submodule", type="rev", target=hashutil.hash_to_bytes(target_rev), perms=0o100644, ), ), ) swh_storage.directory_add([dir]) with cook_extract_directory(swh_storage, dir.id) as p: assert (p / "submodule").is_symlink() assert os.readlink(str(p / "submodule")) == target_rev class TestRevisionGitfastCooker: def test_revision_simple(self, git_loader): # # 1--2--3--4--5--6--7 # repo = TestRepo() with repo as rp: (rp / "file1").write_text(TEST_CONTENT) repo.commit("add file1") (rp / "file2").write_text(TEST_CONTENT) repo.commit("add file2") (rp / "dir1/dir2").mkdir(parents=True) (rp / "dir1/dir2/file").write_text(TEST_CONTENT) repo.commit("add dir1/dir2/file") (rp / "bin1").write_bytes(TEST_EXECUTABLE) (rp / "bin1").chmod(0o755) repo.commit("add bin1") (rp / "link1").symlink_to("file1") repo.commit("link link1 to file1") (rp / "file2").unlink() repo.commit("remove file2") (rp / "bin1").rename(rp / "bin") repo.commit("rename bin1 to bin") loader = git_loader(str(rp)) loader.load() obj_id_hex = repo.repo.refs[b"HEAD"].decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) with cook_extract_revision_gitfast(loader.storage, obj_id) as (ert, p): ert.checkout(b"HEAD") assert (p / "file1").stat().st_mode == 0o100644 assert (p / "file1").read_text() == TEST_CONTENT assert (p / "link1").is_symlink assert os.readlink(str(p / "link1")) == "file1" assert (p / "bin").stat().st_mode == 0o100755 assert (p / "bin").read_bytes() == TEST_EXECUTABLE assert (p / "dir1/dir2/file").read_text() == TEST_CONTENT assert (p / "dir1/dir2/file").stat().st_mode == 0o100644 assert ert.repo.refs[b"HEAD"].decode() == obj_id_hex def test_revision_two_roots(self, git_loader): # # 1----3---4 # / # 2---- # repo = TestRepo() with repo as rp: (rp / "file1").write_text(TEST_CONTENT) c1 = repo.commit("Add file1") del repo.repo.refs[b"refs/heads/master"] # git update-ref -d HEAD (rp / "file2").write_text(TEST_CONTENT) repo.commit("Add file2") repo.merge([c1]) (rp / "file3").write_text(TEST_CONTENT) repo.commit("add file3") obj_id_hex = repo.repo.refs[b"HEAD"].decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) loader = git_loader(str(rp)) loader.load() with cook_extract_revision_gitfast(loader.storage, obj_id) as (ert, p): assert ert.repo.refs[b"HEAD"].decode() == obj_id_hex def test_revision_two_double_fork_merge(self, git_loader): # # 2---4---6 # / / / # 1---3---5 # repo = TestRepo() with repo as rp: (rp / "file1").write_text(TEST_CONTENT) c1 = repo.commit("Add file1") repo.repo.refs[b"refs/heads/c1"] = c1 (rp / "file2").write_text(TEST_CONTENT) repo.commit("Add file2") (rp / "file3").write_text(TEST_CONTENT) c3 = repo.commit("Add file3", ref=b"refs/heads/c1") repo.repo.refs[b"refs/heads/c3"] = c3 repo.merge([c3]) (rp / "file5").write_text(TEST_CONTENT) c5 = repo.commit("Add file3", ref=b"refs/heads/c3") repo.merge([c5]) obj_id_hex = repo.repo.refs[b"HEAD"].decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) loader = git_loader(str(rp)) loader.load() with cook_extract_revision_gitfast(loader.storage, obj_id) as (ert, p): assert ert.repo.refs[b"HEAD"].decode() == obj_id_hex def test_revision_triple_merge(self, git_loader): # # .---.---5 # / / / # 2 3 4 # / / / # 1---.---. # repo = TestRepo() with repo as rp: (rp / "file1").write_text(TEST_CONTENT) c1 = repo.commit("Commit 1") repo.repo.refs[b"refs/heads/b1"] = c1 repo.repo.refs[b"refs/heads/b2"] = c1 repo.commit("Commit 2") c3 = repo.commit("Commit 3", ref=b"refs/heads/b1") c4 = repo.commit("Commit 4", ref=b"refs/heads/b2") repo.merge([c3, c4]) obj_id_hex = repo.repo.refs[b"HEAD"].decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) loader = git_loader(str(rp)) loader.load() with cook_extract_revision_gitfast(loader.storage, obj_id) as (ert, p): assert ert.repo.refs[b"HEAD"].decode() == obj_id_hex def test_revision_filtered_objects(self, git_loader): repo = TestRepo() with repo as rp: file_1, id_1 = hash_content(b"test1") file_2, id_2 = hash_content(b"test2") file_3, id_3 = hash_content(b"test3") (rp / "file").write_bytes(file_1) (rp / "hidden_file").write_bytes(file_2) (rp / "absent_file").write_bytes(file_3) repo.commit() obj_id_hex = repo.repo.refs[b"HEAD"].decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) loader = git_loader(str(rp)) loader.load() # FIXME: storage.content_update() should be changed to allow things # like that with loader.storage.get_db().transaction() as cur: cur.execute( """update content set status = 'visible' where sha1 = %s""", (id_1,), ) cur.execute( """update content set status = 'hidden' where sha1 = %s""", (id_2,), ) cur.execute( """update content set status = 'absent' where sha1 = %s""", (id_3,), ) with cook_extract_revision_gitfast(loader.storage, obj_id) as (ert, p): ert.checkout(b"HEAD") assert (p / "file").read_bytes() == b"test1" assert (p / "hidden_file").read_bytes() == HIDDEN_MESSAGE assert (p / "absent_file").read_bytes() == SKIPPED_MESSAGE def test_revision_bogus_perms(self, git_loader): # Some early git repositories have 664/775 permissions... let's check # if all the weird modes are properly normalized in the revision # cooker. repo = TestRepo() with repo as rp: (rp / "file").write_text(TEST_CONTENT) (rp / "file").chmod(0o664) (rp / "executable").write_bytes(TEST_EXECUTABLE) (rp / "executable").chmod(0o775) (rp / "wat").write_text(TEST_CONTENT) (rp / "wat").chmod(0o604) repo.commit("initial commit") loader = git_loader(str(rp)) loader.load() obj_id_hex = repo.repo.refs[b"HEAD"].decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) with cook_extract_revision_gitfast(loader.storage, obj_id) as (ert, p): ert.checkout(b"HEAD") assert (p / "file").stat().st_mode == 0o100644 assert (p / "executable").stat().st_mode == 0o100755 assert (p / "wat").stat().st_mode == 0o100644 def test_revision_null_fields(self, git_loader): # Our schema doesn't enforce a lot of non-null revision fields. We need # to check these cases don't break the cooker. repo = TestRepo() with repo as rp: (rp / "file").write_text(TEST_CONTENT) c = repo.commit("initial commit") loader = git_loader(str(rp)) loader.load() repo.repo.refs[b"HEAD"].decode() dir_id_hex = repo.repo[c].tree.decode() dir_id = hashutil.hash_to_bytes(dir_id_hex) test_revision = Revision( message=b"", author=Person(name=None, email=None, fullname=b""), date=None, committer=Person(name=None, email=None, fullname=b""), committer_date=None, parents=(), type=RevisionType.GIT, directory=dir_id, metadata={}, synthetic=True, ) storage = loader.storage storage.revision_add([test_revision]) with cook_extract_revision_gitfast(storage, test_revision.id) as (ert, p): ert.checkout(b"HEAD") assert (p / "file").stat().st_mode == 0o100644 def test_revision_revision_data(self, swh_storage): target_rev = "0e8a3ad980ec179856012b7eecf4327e99cd44cd" dir = Directory( entries=( DirectoryEntry( name=b"submodule", type="rev", target=hashutil.hash_to_bytes(target_rev), perms=0o100644, ), ), ) swh_storage.directory_add([dir]) rev = Revision( message=b"", author=Person(name=None, email=None, fullname=b""), date=None, committer=Person(name=None, email=None, fullname=b""), committer_date=None, parents=(), type=RevisionType.GIT, directory=dir.id, metadata={}, synthetic=True, ) swh_storage.revision_add([rev]) with cook_stream_revision_gitfast(swh_storage, rev.id) as stream: pattern = "M 160000 {} submodule".format(target_rev).encode() assert pattern in stream.read() diff --git a/tox.ini b/tox.ini index f053ae8..9b620ba 100644 --- a/tox.ini +++ b/tox.ini @@ -1,34 +1,72 @@ [tox] envlist=black,flake8,mypy,py3 [testenv] extras = testing deps = pytest-cov commands = pytest --cov={envsitepackagesdir}/swh/vault \ {envsitepackagesdir}/swh/vault \ --cov-branch {posargs} [testenv:black] skip_install = true deps = black==19.10b0 commands = {envpython} -m black --check swh [testenv:flake8] skip_install = true deps = flake8 commands = {envpython} -m flake8 [testenv:mypy] extras = testing deps = mypy commands = mypy swh + +# build documentation outside swh-environment using the current +# git HEAD of swh-docs, is executed on CI for each diff to prevent +# breaking doc build +[testenv:sphinx] +whitelist_externals = make +usedevelop = true +extras = + testing +deps = + # fetch and install swh-docs in develop mode + -e git+https://forge.softwareheritage.org/source/swh-docs#egg=swh.docs + +setenv = + SWH_PACKAGE_DOC_TOX_BUILD = 1 + # turn warnings into errors + SPHINXOPTS = -W +commands = + make -I ../.tox/sphinx/src/swh-docs/swh/ -C docs + + +# build documentation only inside swh-environment using local state +# of swh-docs package +[testenv:sphinx-dev] +whitelist_externals = make +usedevelop = true +extras = + testing +deps = + # install swh-docs in develop mode + -e ../swh-docs + +setenv = + SWH_PACKAGE_DOC_TOX_BUILD = 1 + # turn warnings into errors + SPHINXOPTS = -W +commands = + make -I ../.tox/sphinx-dev/src/swh-docs/swh/ -C docs