diff --git a/PKG-INFO b/PKG-INFO index 2624635..73e2986 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,36 +1,36 @@ Metadata-Version: 2.1 Name: swh.vault -Version: 0.6.3 +Version: 0.6.4 Summary: Software Heritage vault Home-page: https://forge.softwareheritage.org/diffusion/DVAU/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-vault Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-vault/ Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Requires-Python: >=3.7 Description-Content-Type: text/markdown Provides-Extra: testing Provides-Extra: graph License-File: LICENSE License-File: AUTHORS swh-vault ========= User-facing service that allows to retrieve parts of the archive as self-contained bundles. See the [documentation](https://docs.softwareheritage.org/devel/swh-vault/index.html) for more details. diff --git a/debian/changelog b/debian/changelog index 6178ae8..c96562c 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,442 +1,447 @@ -swh-vault (0.6.3-1~swh1~bpo10+1) buster-swh; urgency=medium +swh-vault (0.6.4-1~swh1) unstable-swh; urgency=medium - * Rebuild for buster-swh + * New upstream release 0.6.4 - (tagged by Antoine R. Dumont + (@ardumont) on 2021-06-29 13:18:27 + +0200) + * Upstream changes: - v0.6.4 - Fix tests when the umask is not + 022 - tests: Fix support of Dulwich < 0.20 - conftest: Use + postgresql keyword for the configuration - -- Software Heritage autobuilder (on jenkins-debian1) Tue, 29 Jun 2021 10:01:07 +0000 + -- Software Heritage autobuilder (on jenkins-debian1) Tue, 29 Jun 2021 11:20:54 +0000 swh-vault (0.6.3-1~swh1) unstable-swh; urgency=medium * New upstream release 0.6.3 - (tagged by Antoine R. Dumont (@ardumont) on 2021-06-29 11:55:43 +0200) * Upstream changes: - v0.6.3 - git_bare: Add support for filtered content with Git >= 2.21 -- Software Heritage autobuilder (on jenkins-debian1) Tue, 29 Jun 2021 09:59:47 +0000 swh-vault (0.6.2-1~swh1) unstable-swh; urgency=medium * New upstream release 0.6.2 - (tagged by Antoine R. Dumont (@ardumont) on 2021-06-29 11:08:44 +0200) * Upstream changes: - v0.6.2 - Make swh.graph dependency optional 2/2 -- Software Heritage autobuilder (on jenkins-debian1) Tue, 29 Jun 2021 09:12:28 +0000 swh-vault (0.6.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.6.1 - (tagged by Antoine R. Dumont (@ardumont) on 2021-06-29 10:12:19 +0200) * Upstream changes: - v0.6.1 - Make swh.graph dependency optional -- Software Heritage autobuilder (on jenkins-debian1) Tue, 29 Jun 2021 08:15:52 +0000 swh-vault (0.6.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.6.0 - (tagged by Antoine R. Dumont (@ardumont) on 2021-06-28 12:18:51 +0200) * Upstream changes: - v0.6.0 - git_bare: Add support for skipped/missing/absent/hidden contents - git_bare: Optionally access the objstorage directly - git_bare: Use batched content_get() instead of content_find() - git_bare: Use directory_get_entries instead of directory_ls, it should be faster - git_bare: Refactor the graph descent using explicit stacks instead of the call stack. - git_bare: When possible, use swh-graph instead of swh-storage to query revision - history - git_bare: Deduplicate object downloads and writes - Add a naive git bare cooker - cli: Add 'cook' command, to run cookers without Celery - tests: Run all directory tests on the gitfast cooker - tests: Add in_memory_backend.py - tests: Make test_directory_bogus_perms/test_revision_bogus_perms/ actually test the - cookers -- Software Heritage autobuilder (on jenkins-debian1) Mon, 28 Jun 2021 10:25:48 +0000 swh-vault (0.5.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.5.1 - (tagged by Antoine Lambert on 2021-04-29 14:42:43 +0200) * Upstream changes: - version 0.5.1 -- Software Heritage autobuilder (on jenkins-debian1) Thu, 29 Apr 2021 12:48:13 +0000 swh-vault (0.5.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.5.0 - (tagged by Antoine R. Dumont (@ardumont) on 2020-12-08 15:58:26 +0100) * Upstream changes: - v0.5.0 - vault: Remove deprecated services default config - cli: Remove deprecated logging configuration -- Software Heritage autobuilder (on jenkins-debian1) Tue, 08 Dec 2020 15:01:11 +0000 swh-vault (0.4.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.4.0 - (tagged by Antoine R. Dumont (@ardumont) on 2020-11-23 13:50:22 +0100) * Upstream changes: - v0.4.0 - requirements-test.txt: Drop no longer needed test dependency - swh.vault.tests.conftest: Drop dead code -- Software Heritage autobuilder (on jenkins-debian1) Mon, 23 Nov 2020 12:52:25 +0000 swh-vault (0.3.4-1~swh1) unstable-swh; urgency=medium * New upstream release 0.3.4 - (tagged by Antoine R. Dumont (@ardumont) on 2020-11-23 11:35:47 +0100) * Upstream changes: - v0.3.4 - test_server: Fix exception structure - conftest: Explicitely declare aiohttp pytest plugin use -- Software Heritage autobuilder (on jenkins-debian1) Mon, 23 Nov 2020 10:37:50 +0000 swh-vault (0.3.3-1~swh1) unstable-swh; urgency=medium * New upstream release 0.3.3 - (tagged by Antoine R. Dumont (@ardumont) on 2020-11-18 18:02:35 +0100) * Upstream changes: - v0.3.3 - Fix api.server configuration adaptation issue -- Software Heritage autobuilder (on jenkins-debian1) Wed, 18 Nov 2020 18:40:45 +0000 swh-vault (0.3.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.3.1 - (tagged by Antoine R. Dumont (@ardumont) on 2020-11-17 17:46:37 +0100) * Upstream changes: - v0.3.1 - test_server: Simplify test server initialization to the minimum -- Software Heritage autobuilder (on jenkins-debian1) Tue, 17 Nov 2020 16:54:22 +0000 swh-vault (0.3.0-1~swh2) unstable-swh; urgency=medium * Fix dependency release -- Antoine R. Dumont (@ardumont) Tue, 17 Nov 2020 16:54:03 +0000 swh-vault (0.3.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.3.0 - (tagged by Antoine R. Dumont (@ardumont) on 2020-11-13 12:10:09 +0100) * Upstream changes: - v0.3.0 - Refactor vault configuration without the args indirection - vault.server: Introduce typed VaultInterface - Replace file modes literals to DentryPerms enum - Add tests on current configuration check for cooker instantiation - api.server: Add types and tests on configuration checks - swh.vault: Unify get_vault factory function with other factories - vault.tests: Make postgresql fixture faster -- Software Heritage autobuilder (on jenkins-debian1) Tue, 17 Nov 2020 16:22:52 +0000 swh-vault (0.2.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.2.0 - (tagged by Antoine R. Dumont (@ardumont) on 2020-10-19 09:52:04 +0200) * Upstream changes: - v0.2.0 - vault.config: Adapt scheduler configuration structure - test_cookers: Turn git_loader into a pytest fixture - tests: Fix loader git instantiation - tox.ini: pin black to the pre-commit version (19.10b0) to avoid flip- flops - Run isort after the CLI import changes -- Software Heritage autobuilder (on jenkins-debian1) Mon, 19 Oct 2020 07:54:03 +0000 swh-vault (0.1.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.1.0 - (tagged by David Douard on 2020-09-25 12:34:43 +0200) * Upstream changes: - v0.1.0 -- Software Heritage autobuilder (on jenkins-debian1) Fri, 25 Sep 2020 10:37:22 +0000 swh-vault (0.0.35-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.35 - (tagged by David Douard on 2020-09-11 15:15:26 +0200) * Upstream changes: - v0.0.35 -- Software Heritage autobuilder (on jenkins-debian1) Fri, 11 Sep 2020 13:18:50 +0000 swh-vault (0.0.34-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.34 - (tagged by Antoine Lambert on 2020-08-18 13:55:51 +0200) * Upstream changes: - version 0.0.34 -- Software Heritage autobuilder (on jenkins-debian1) Tue, 18 Aug 2020 11:58:22 +0000 swh-vault (0.0.33-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.33 - (tagged by Valentin Lorentz on 2020-05-05 17:56:33 +0200) * Upstream changes: - v0.0.33 - * Use swh-storage validation proxy. - * Use model objects to send to storage - * Add a pyproject.toml file to target py37 for black - * setup: Update the minimum required runtime python3 version - * setup.py: add documentation link - * Raise NotFoundExc within our RPC framework instead of returning 404. -- Software Heritage autobuilder (on jenkins-debian1) Tue, 05 May 2020 15:59:51 +0000 swh-vault (0.0.32-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.32 - (tagged by Antoine Lambert on 2020-02-05 13:00:19 +0100) * Upstream changes: - version 0.0.32 -- Software Heritage autobuilder (on jenkins-debian1) Wed, 05 Feb 2020 12:16:16 +0000 swh-vault (0.0.31-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.31 - (tagged by Stefano Zacchiroli on 2019-11-05 17:24:43 +0100) * Upstream changes: - v0.0.31 - * typing: minimal changes to make a no-op mypy run pass - * Remove indirection swh.vault.api.wsgi - * tox.ini: Fix py3 environment to use packaged tests - * CLI: drop obsolete alias "serve" for "rpc- serve" -- Software Heritage autobuilder (on jenkins-debian1) Tue, 05 Nov 2019 16:44:29 +0000 swh-vault (0.0.30-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.30 - (tagged by Antoine Lambert on 2019-07-29 11:17:23 +0200) * Upstream changes: - version 0.0.30 -- Software Heritage autobuilder (on jenkins-debian1) Mon, 29 Jul 2019 09:22:02 +0000 swh-vault (0.0.29-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.29 - (tagged by Antoine Lambert on 2019-05-23 11:39:12 +0200) * Upstream changes: - version 0.0.29 -- Software Heritage autobuilder (on jenkins-debian1) Thu, 23 May 2019 09:46:57 +0000 swh-vault (0.0.28-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.28 - (tagged by Antoine Lambert on 2019-05-23 11:00:51 +0200) * Upstream changes: - version 0.0.28 -- Software Heritage autobuilder (on jenkins-debian1) Thu, 23 May 2019 09:05:34 +0000 swh-vault (0.0.27-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.27 - (tagged by Antoine Lambert on 2019-05-07 14:44:26 +0200) * Upstream changes: - version 0.0.27 -- Software Heritage autobuilder (on jenkins-debian1) Tue, 07 May 2019 12:54:35 +0000 swh-vault (0.0.26-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.26 - (tagged by Antoine Lambert on 2019-04-26 11:59:23 +0200) * Upstream changes: - version 0.0.26 -- Software Heritage autobuilder (on jenkins-debian1) Fri, 26 Apr 2019 10:06:45 +0000 swh-vault (0.0.25-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.25 - (tagged by Antoine R. Dumont (@ardumont) on 2019-03-29 12:19:19 +0100) * Upstream changes: - v0.0.25 - master vault.backend: Migrate email address to bot@swh.org - API: use default's APIError exception instead of the VaultAPIError - Remove debian packaging from master branch -- Software Heritage autobuilder (on jenkins-debian1) Fri, 29 Mar 2019 11:28:28 +0000 swh-vault (0.0.24-1~swh3) unstable-swh; urgency=low * d/control: Update missing build dependency on postgresql-contrib -- Antoine Romain Dumont Mon, 18 Feb 2019 16:20:50 +0100 swh-vault (0.0.24-1~swh2) unstable-swh; urgency=low * d/control: Update missing build dependency on git * d/rules: Sanitize build locale -- Antoine Romain Dumont Mon, 18 Feb 2019 16:04:50 +0100 swh-vault (0.0.24-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.24 - (tagged by Antoine R. Dumont (@ardumont) on 2019-02-18 15:21:31 +0100) * Upstream changes: - v0.0.24 - MANIFEST.in: Fix packaging to include the sql schema definitions -- Software Heritage autobuilder (on jenkins-debian1) Mon, 18 Feb 2019 14:25:33 +0000 swh-vault (0.0.23-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.23 - (tagged by Antoine R. Dumont (@ardumont) on 2019-02-18 14:39:25 +0100) * Upstream changes: - v0.0.23 - test_cookers: Fix commit behavior when committing to another branch - Rewrite tests using pytest's fixtures and adapt them to recent refactorings - Normalize the configuration of VaultBackend and cooker - Make it possible to specify the config file via SWH_CONFIG_FILENAME env var - Refactor the VaultBackend to use BaseDb and pool-based db access - Add a swh.vault.api.wsgi module to instanciate the (singleton) wsgi app object -- Software Heritage autobuilder (on jenkins-debian1) Mon, 18 Feb 2019 13:48:28 +0000 swh-vault (0.0.22-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.22 - (tagged by Antoine R. Dumont (@ardumont) on 2019-02-14 10:12:41 +0100) * Upstream changes: - v0.0.22 - api/server: Do not read configuration at each request -- Software Heritage autobuilder (on jenkins-debian1) Thu, 14 Feb 2019 09:16:23 +0000 swh-vault (0.0.21-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.21 - (tagged by David Douard on 2019-02-07 17:38:49 +0100) * Upstream changes: - v0.0.21 -- Software Heritage autobuilder (on jenkins-debian1) Thu, 07 Feb 2019 16:44:51 +0000 swh-vault (0.0.20-1~swh1) unstable-swh; urgency=medium * v0.0.20 * swh.vault: Open a get_vault instantiation function * swh.vault.api.client: Permit to specify the query timeout option * swh.storage doesn't expose a db attribute any longer -- Antoine R. Dumont (@ardumont) Thu, 24 May 2018 12:31:50 +0200 swh-vault (0.0.19-1~swh1) unstable-swh; urgency=medium * version 0.0.19 -- Antoine Pietri Thu, 03 May 2018 17:49:18 +0200 swh-vault (0.0.18-1~swh1) unstable-swh; urgency=medium * version 0.0.18 -- Antoine Pietri Thu, 03 May 2018 17:10:24 +0200 swh-vault (0.0.17-1~swh1) unstable-swh; urgency=medium * version 0.0.17 -- Antoine Pietri Thu, 03 May 2018 13:16:59 +0200 swh-vault (0.0.16-1~swh1) unstable-swh; urgency=medium * version 0.0.16 -- Antoine Pietri Wed, 02 May 2018 13:41:05 +0200 swh-vault (0.0.15-1~swh1) unstable-swh; urgency=medium * version 0.0.15 -- Antoine Pietri Fri, 27 Apr 2018 18:46:06 +0200 swh-vault (0.0.14-1~swh1) unstable-swh; urgency=medium * version 0.0.14 -- Antoine Pietri Fri, 27 Apr 2018 17:11:50 +0200 swh-vault (0.0.13-1~swh1) unstable-swh; urgency=medium * version 0.0.13 -- Antoine Pietri Wed, 25 Apr 2018 15:52:33 +0200 swh-vault (0.0.12-1~swh1) unstable-swh; urgency=medium * version 0.0.12 -- Antoine Pietri Wed, 21 Feb 2018 15:30:25 +0100 swh-vault (0.0.11-1~swh1) unstable-swh; urgency=medium * version 0.0.11 -- Antoine Pietri Fri, 16 Feb 2018 16:09:10 +0100 swh-vault (0.0.10-1~swh1) unstable-swh; urgency=medium * version 0.0.10 -- Antoine Pietri Thu, 15 Feb 2018 16:08:05 +0100 swh-vault (0.0.9-1~swh1) unstable-swh; urgency=medium * version 0.0.9 -- Antoine Pietri Thu, 01 Feb 2018 18:21:29 +0100 swh-vault (0.0.8-1~swh1) unstable-swh; urgency=medium * version 0.0.8 -- Antoine Pietri Wed, 31 Jan 2018 17:54:55 +0100 swh-vault (0.0.7-1~swh1) unstable-swh; urgency=medium * version 0.0.7 -- Antoine Pietri Tue, 30 Jan 2018 18:21:07 +0100 swh-vault (0.0.6-1~swh1) unstable-swh; urgency=medium * version 0.0.6 -- Antoine Pietri Tue, 09 Jan 2018 16:37:41 +0100 swh-vault (0.0.5-1~swh1) unstable-swh; urgency=medium * version 0.0.5 -- Antoine Pietri Thu, 14 Dec 2017 19:33:01 +0100 swh-vault (0.0.4-1~swh1) unstable-swh; urgency=medium * version 0.0.4 -- Antoine Pietri Fri, 08 Dec 2017 15:33:54 +0100 swh-vault (0.0.3-1~swh1) unstable-swh; urgency=medium * version 0.0.3 -- Antoine Pietri Fri, 01 Dec 2017 15:31:34 +0100 swh-vault (0.0.2-1~swh1) unstable-swh; urgency=medium * version 0.0.2 -- Antoine Pietri Thu, 30 Nov 2017 15:50:43 +0100 swh-vault (0.0.1-1~swh1) unstable-swh; urgency=medium * Initial release * version 0.0.1 -- Antoine Pietri Mon, 13 Nov 2017 16:22:47 +0100 diff --git a/swh.vault.egg-info/PKG-INFO b/swh.vault.egg-info/PKG-INFO index 2624635..73e2986 100644 --- a/swh.vault.egg-info/PKG-INFO +++ b/swh.vault.egg-info/PKG-INFO @@ -1,36 +1,36 @@ Metadata-Version: 2.1 Name: swh.vault -Version: 0.6.3 +Version: 0.6.4 Summary: Software Heritage vault Home-page: https://forge.softwareheritage.org/diffusion/DVAU/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-vault Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-vault/ Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Requires-Python: >=3.7 Description-Content-Type: text/markdown Provides-Extra: testing Provides-Extra: graph License-File: LICENSE License-File: AUTHORS swh-vault ========= User-facing service that allows to retrieve parts of the archive as self-contained bundles. See the [documentation](https://docs.softwareheritage.org/devel/swh-vault/index.html) for more details. diff --git a/swh/vault/tests/conftest.py b/swh/vault/tests/conftest.py index 3d394b2..e6d7efa 100644 --- a/swh/vault/tests/conftest.py +++ b/swh/vault/tests/conftest.py @@ -1,88 +1,93 @@ # Copyright (C) 2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os from typing import Any, Dict import pkg_resources.extern.packaging.version import pytest import yaml from swh.core.db.pytest_plugin import postgresql_fact from swh.storage.tests import SQL_DIR as STORAGE_SQL_DIR import swh.vault from swh.vault import get_vault os.environ["LC_ALL"] = "C.UTF-8" +# needed for directory tests on git-cloned repositories +# 022 is usually the default value, but some environments (eg. Debian builds) have +# a different one. +os.umask(0o022) + pytest_v = pkg_resources.get_distribution("pytest").parsed_version if pytest_v < pkg_resources.extern.packaging.version.parse("3.9"): @pytest.fixture def tmp_path(): import pathlib import tempfile with tempfile.TemporaryDirectory() as tmpdir: yield pathlib.Path(tmpdir) VAULT_SQL_DIR = os.path.join(os.path.dirname(swh.vault.__file__), "sql") postgres_vault = postgresql_fact( "postgresql_proc", dbname="vault", dump_files=f"{VAULT_SQL_DIR}/*.sql" ) postgres_storage = postgresql_fact( "postgresql_proc", dbname="storage", dump_files=f"{STORAGE_SQL_DIR}/*.sql" ) @pytest.fixture def swh_vault_config(postgres_vault, postgres_storage, tmp_path) -> Dict[str, Any]: tmp_path = str(tmp_path) return { "db": postgres_vault.dsn, "storage": { - "cls": "local", + "cls": "postgresql", "db": postgres_storage.dsn, "objstorage": { "cls": "pathslicing", "args": {"root": tmp_path, "slicing": "0:1/1:5",}, }, }, "cache": { "cls": "pathslicing", "args": {"root": tmp_path, "slicing": "0:1/1:5", "allow_delete": True}, }, "scheduler": {"cls": "remote", "url": "http://swh-scheduler:5008",}, } @pytest.fixture def swh_local_vault_config(swh_vault_config: Dict[str, Any]) -> Dict[str, Any]: return { "vault": {"cls": "local", **swh_vault_config}, "client_max_size": 1024 ** 3, } @pytest.fixture def swh_vault_config_file(swh_local_vault_config, monkeypatch, tmp_path): conf_path = os.path.join(str(tmp_path), "vault-server.yml") with open(conf_path, "w") as f: f.write(yaml.dump(swh_local_vault_config)) monkeypatch.setenv("SWH_CONFIG_FILENAME", conf_path) return conf_path @pytest.fixture def swh_vault(swh_vault_config): return get_vault("local", **swh_vault_config) @pytest.fixture def swh_storage(swh_vault): return swh_vault.storage diff --git a/swh/vault/tests/test_cookers.py b/swh/vault/tests/test_cookers.py index f917d42..aabaa1d 100644 --- a/swh/vault/tests/test_cookers.py +++ b/swh/vault/tests/test_cookers.py @@ -1,786 +1,786 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import contextlib import datetime import glob import gzip import io import os import pathlib import shutil import subprocess import tarfile import tempfile import unittest import unittest.mock import dulwich.fastexport import dulwich.index import dulwich.objects import dulwich.porcelain import dulwich.repo import pytest from swh.loader.git.from_disk import GitLoaderFromDisk from swh.model import from_disk, hashutil from swh.model.model import ( Directory, DirectoryEntry, Person, Revision, RevisionType, TimestampWithTimezone, ) from swh.vault.cookers import DirectoryCooker, GitBareCooker, RevisionGitfastCooker from swh.vault.tests.vault_testing import hash_content from swh.vault.to_disk import HIDDEN_MESSAGE, SKIPPED_MESSAGE class TestRepo: """A tiny context manager for a test git repository, with some utility functions to perform basic git stuff. """ def __init__(self, repo_dir=None): self.repo_dir = repo_dir def __enter__(self): if self.repo_dir: self.tmp_dir = None self.repo = dulwich.repo.Repo(self.repo_dir) else: self.tmp_dir = tempfile.TemporaryDirectory(prefix="tmp-vault-repo-") self.repo_dir = self.tmp_dir.__enter__() self.repo = dulwich.repo.Repo.init(self.repo_dir) self.author_name = b"Test Author" self.author_email = b"test@softwareheritage.org" self.author = b"%s <%s>" % (self.author_name, self.author_email) self.base_date = 258244200 self.counter = 0 return pathlib.Path(self.repo_dir) def __exit__(self, exc, value, tb): if self.tmp_dir is not None: self.tmp_dir.__exit__(exc, value, tb) self.repo_dir = None def checkout(self, rev_sha): rev = self.repo[rev_sha] dulwich.index.build_index_from_tree( - self.repo_dir, self.repo.index_path(), self.repo.object_store, rev.tree + str(self.repo_dir), self.repo.index_path(), self.repo.object_store, rev.tree ) def git_shell(self, *cmd, stdout=subprocess.DEVNULL, **kwargs): name = self.author_name email = self.author_email date = "%d +0000" % (self.base_date + self.counter) env = { # Set git commit format "GIT_AUTHOR_NAME": name, "GIT_AUTHOR_EMAIL": email, "GIT_AUTHOR_DATE": date, "GIT_COMMITTER_NAME": name, "GIT_COMMITTER_EMAIL": email, "GIT_COMMITTER_DATE": date, # Ignore all the system-wide and user configurations "GIT_CONFIG_NOSYSTEM": "1", "HOME": str(self.tmp_dir), "XDG_CONFIG_HOME": str(self.tmp_dir), } kwargs.setdefault("env", {}).update(env) subprocess.check_call( ("git", "-C", self.repo_dir) + cmd, stdout=stdout, **kwargs ) def commit(self, message="Commit test\n", ref=b"HEAD"): """Commit the current working tree in a new commit with message on the branch 'ref'. At the end of the commit, the reference should stay the same and the index should be clean. """ paths = [ os.path.relpath(path, self.repo_dir) for path in glob.glob(self.repo_dir + "/**/*", recursive=True) ] self.repo.stage(paths) message = message.encode() + b"\n" ret = self.repo.do_commit( message=message, committer=self.author, commit_timestamp=self.base_date + self.counter, commit_timezone=0, ref=ref, ) self.counter += 1 # committing on another branch leaves # dangling files in index if ref != b"HEAD": # XXX this should work (but does not) # dulwich.porcelain.reset(self.repo, 'hard') self.git_shell("reset", "--hard", "HEAD") return ret def merge(self, parent_sha_list, message="Merge branches."): self.git_shell( "merge", "--allow-unrelated-histories", "-m", message, *[p.decode() for p in parent_sha_list], ) self.counter += 1 return self.repo.refs[b"HEAD"] def print_debug_graph(self, reflog=False): args = ["log", "--all", "--graph", "--decorate"] if reflog: args.append("--reflog") self.git_shell(*args, stdout=None) @pytest.fixture def git_loader(swh_storage,): """Instantiate a Git Loader using the storage instance as storage. """ def _create_loader(directory): return GitLoaderFromDisk( swh_storage, "fake_origin", directory=directory, visit_date=datetime.datetime.now(datetime.timezone.utc), ) return _create_loader @contextlib.contextmanager def cook_extract_directory_dircooker(storage, obj_id, fsck=True): """Context manager that cooks a directory and extract it.""" backend = unittest.mock.MagicMock() backend.storage = storage cooker = DirectoryCooker("directory", obj_id, backend=backend, storage=storage) cooker.fileobj = io.BytesIO() assert cooker.check_exists() cooker.prepare_bundle() cooker.fileobj.seek(0) with tempfile.TemporaryDirectory(prefix="tmp-vault-extract-") as td: with tarfile.open(fileobj=cooker.fileobj, mode="r") as tar: tar.extractall(td) yield pathlib.Path(td) / hashutil.hash_to_hex(obj_id) cooker.storage = None @contextlib.contextmanager def cook_extract_directory_gitfast(storage, obj_id, fsck=True): """Context manager that cooks a revision containing a directory and extract it, using RevisionGitfastCooker""" test_repo = TestRepo() with test_repo as p: date = TimestampWithTimezone.from_datetime( datetime.datetime.now(datetime.timezone.utc) ) revision = Revision( directory=obj_id, message=b"dummy message", author=Person.from_fullname(b"someone"), committer=Person.from_fullname(b"someone"), date=date, committer_date=date, type=RevisionType.GIT, synthetic=False, ) storage.revision_add([revision]) with cook_stream_revision_gitfast(storage, revision.id) as stream, test_repo as p: processor = dulwich.fastexport.GitImportProcessor(test_repo.repo) processor.import_stream(stream) test_repo.checkout(b"HEAD") shutil.rmtree(p / ".git") yield p @contextlib.contextmanager def cook_extract_directory_git_bare( storage, obj_id, fsck=True, direct_objstorage=False ): """Context manager that cooks a revision and extract it, using GitBareCooker""" backend = unittest.mock.MagicMock() backend.storage = storage # Cook the object cooker = GitBareCooker( "directory", obj_id, backend=backend, storage=storage, objstorage=storage.objstorage if direct_objstorage else None, ) cooker.use_fsck = fsck # Some tests try edge-cases that git-fsck rejects cooker.fileobj = io.BytesIO() assert cooker.check_exists() cooker.prepare_bundle() cooker.fileobj.seek(0) # Extract it with tempfile.TemporaryDirectory(prefix="tmp-vault-extract-") as td: with tarfile.open(fileobj=cooker.fileobj, mode="r") as tar: tar.extractall(td) # Clone it with Dulwich with tempfile.TemporaryDirectory(prefix="tmp-vault-clone-") as clone_dir: clone_dir = pathlib.Path(clone_dir) subprocess.check_call( [ "git", "clone", os.path.join(td, f"swh:1:dir:{obj_id.hex()}.git"), clone_dir, ] ) shutil.rmtree(clone_dir / ".git") yield clone_dir @pytest.fixture( scope="module", params=[ cook_extract_directory_dircooker, cook_extract_directory_gitfast, cook_extract_directory_git_bare, ], ) def cook_extract_directory(request): """A fixture that is instantiated as either cook_extract_directory_dircooker or cook_extract_directory_git_bare.""" return request.param @contextlib.contextmanager def cook_stream_revision_gitfast(storage, obj_id): """Context manager that cooks a revision and stream its fastexport.""" backend = unittest.mock.MagicMock() backend.storage = storage cooker = RevisionGitfastCooker( "revision_gitfast", obj_id, backend=backend, storage=storage ) cooker.fileobj = io.BytesIO() assert cooker.check_exists() cooker.prepare_bundle() cooker.fileobj.seek(0) fastexport_stream = gzip.GzipFile(fileobj=cooker.fileobj) yield fastexport_stream cooker.storage = None @contextlib.contextmanager def cook_extract_revision_gitfast(storage, obj_id, fsck=True): """Context manager that cooks a revision and extract it, using RevisionGitfastCooker""" test_repo = TestRepo() with cook_stream_revision_gitfast(storage, obj_id) as stream, test_repo as p: processor = dulwich.fastexport.GitImportProcessor(test_repo.repo) processor.import_stream(stream) yield test_repo, p @contextlib.contextmanager def cook_extract_revision_git_bare(storage, obj_id, fsck=True): """Context manager that cooks a revision and extract it, using GitBareCooker""" backend = unittest.mock.MagicMock() backend.storage = storage # Cook the object cooker = GitBareCooker("revision", obj_id, backend=backend, storage=storage) cooker.use_fsck = fsck # Some tests try edge-cases that git-fsck rejects cooker.fileobj = io.BytesIO() assert cooker.check_exists() cooker.prepare_bundle() cooker.fileobj.seek(0) # Extract it with tempfile.TemporaryDirectory(prefix="tmp-vault-extract-") as td: with tarfile.open(fileobj=cooker.fileobj, mode="r") as tar: tar.extractall(td) # Clone it with Dulwich with tempfile.TemporaryDirectory(prefix="tmp-vault-clone-") as clone_dir: clone_dir = pathlib.Path(clone_dir) subprocess.check_call( [ "git", "clone", os.path.join(td, f"swh:1:rev:{obj_id.hex()}.git"), clone_dir, ] ) test_repo = TestRepo(clone_dir) with test_repo: yield test_repo, clone_dir @pytest.fixture( scope="module", params=[cook_extract_revision_gitfast, cook_extract_revision_git_bare], ) def cook_extract_revision(request): """A fixture that is instantiated as either cook_extract_revision_gitfast or cook_extract_revision_git_bare.""" return request.param TEST_CONTENT = ( " test content\n" "and unicode \N{BLACK HEART SUIT}\n" " and trailing spaces " ) TEST_EXECUTABLE = b"\x42\x40\x00\x00\x05" class TestDirectoryCooker: def test_directory_simple(self, git_loader, cook_extract_directory): repo = TestRepo() with repo as rp: (rp / "file").write_text(TEST_CONTENT) (rp / "executable").write_bytes(TEST_EXECUTABLE) (rp / "executable").chmod(0o755) (rp / "link").symlink_to("file") (rp / "dir1/dir2").mkdir(parents=True) (rp / "dir1/dir2/file").write_text(TEST_CONTENT) c = repo.commit() loader = git_loader(str(rp)) loader.load() obj_id_hex = repo.repo[c].tree.decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) with cook_extract_directory(loader.storage, obj_id) as p: assert (p / "file").stat().st_mode == 0o100644 assert (p / "file").read_text() == TEST_CONTENT assert (p / "executable").stat().st_mode == 0o100755 assert (p / "executable").read_bytes() == TEST_EXECUTABLE assert (p / "link").is_symlink() assert os.readlink(str(p / "link")) == "file" assert (p / "dir1/dir2/file").stat().st_mode == 0o100644 assert (p / "dir1/dir2/file").read_text() == TEST_CONTENT directory = from_disk.Directory.from_disk(path=bytes(p)) assert obj_id_hex == hashutil.hash_to_hex(directory.hash) def test_directory_filtered_objects(self, git_loader, cook_extract_directory): repo = TestRepo() with repo as rp: file_1, id_1 = hash_content(b"test1") file_2, id_2 = hash_content(b"test2") file_3, id_3 = hash_content(b"test3") (rp / "file").write_bytes(file_1) (rp / "hidden_file").write_bytes(file_2) (rp / "absent_file").write_bytes(file_3) c = repo.commit() loader = git_loader(str(rp)) loader.load() obj_id_hex = repo.repo[c].tree.decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) # FIXME: storage.content_update() should be changed to allow things # like that with loader.storage.get_db().transaction() as cur: cur.execute( """update content set status = 'visible' where sha1 = %s""", (id_1,), ) cur.execute( """update content set status = 'hidden' where sha1 = %s""", (id_2,), ) cur.execute( """ insert into skipped_content (sha1, sha1_git, sha256, blake2s256, length, reason) select sha1, sha1_git, sha256, blake2s256, length, 'no reason' from content where sha1 = %s """, (id_3,), ) cur.execute("delete from content where sha1 = %s", (id_3,)) with cook_extract_directory(loader.storage, obj_id) as p: assert (p / "file").read_bytes() == b"test1" assert (p / "hidden_file").read_bytes() == HIDDEN_MESSAGE assert (p / "absent_file").read_bytes() == SKIPPED_MESSAGE def test_directory_bogus_perms(self, git_loader, cook_extract_directory): # Some early git repositories have 664/775 permissions... let's check # if all the weird modes are properly normalized in the directory # cooker. repo = TestRepo() with repo as rp: (rp / "file").write_text(TEST_CONTENT) (rp / "file").chmod(0o664) (rp / "executable").write_bytes(TEST_EXECUTABLE) (rp / "executable").chmod(0o775) (rp / "wat").write_text(TEST_CONTENT) (rp / "wat").chmod(0o604) # Disable mode cleanup with unittest.mock.patch("dulwich.index.cleanup_mode", lambda mode: mode): c = repo.commit() # Make sure Dulwich didn't normalize the permissions itself. # (if it did, then the test can't check the cooker normalized them) tree_id = repo.repo[c].tree assert {entry.mode for entry in repo.repo[tree_id].items()} == { 0o100775, 0o100664, 0o100604, } # Disable mode checks with unittest.mock.patch("dulwich.objects.Tree.check", lambda self: None): loader = git_loader(str(rp)) loader.load() # Make sure swh-loader didn't normalize them either dir_entries = loader.storage.directory_ls(hashutil.bytehex_to_hash(tree_id)) assert {entry["perms"] for entry in dir_entries} == { 0o100664, 0o100775, 0o100604, } obj_id_hex = repo.repo[c].tree.decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) with cook_extract_directory(loader.storage, obj_id) as p: assert (p / "file").stat().st_mode == 0o100644 assert (p / "executable").stat().st_mode == 0o100755 assert (p / "wat").stat().st_mode == 0o100644 @pytest.mark.parametrize("direct_objstorage", [True, False]) def test_directory_objstorage( self, swh_storage, git_loader, mocker, direct_objstorage ): """Like test_directory_simple, but using swh_objstorage directly, without going through swh_storage.content_get_data()""" repo = TestRepo() with repo as rp: (rp / "file").write_text(TEST_CONTENT) (rp / "executable").write_bytes(TEST_EXECUTABLE) (rp / "executable").chmod(0o755) (rp / "link").symlink_to("file") (rp / "dir1/dir2").mkdir(parents=True) (rp / "dir1/dir2/file").write_text(TEST_CONTENT) c = repo.commit() loader = git_loader(str(rp)) loader.load() obj_id_hex = repo.repo[c].tree.decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) # Set-up spies storage_content_get_data = mocker.patch.object( swh_storage, "content_get_data", wraps=swh_storage.content_get_data ) objstorage_content_batch = mocker.patch.object( swh_storage.objstorage, "get_batch", wraps=swh_storage.objstorage.get_batch ) with cook_extract_directory_git_bare( loader.storage, obj_id, direct_objstorage=direct_objstorage ) as p: assert (p / "file").stat().st_mode == 0o100644 assert (p / "file").read_text() == TEST_CONTENT assert (p / "executable").stat().st_mode == 0o100755 assert (p / "executable").read_bytes() == TEST_EXECUTABLE assert (p / "link").is_symlink() assert os.readlink(str(p / "link")) == "file" assert (p / "dir1/dir2/file").stat().st_mode == 0o100644 assert (p / "dir1/dir2/file").read_text() == TEST_CONTENT directory = from_disk.Directory.from_disk(path=bytes(p)) assert obj_id_hex == hashutil.hash_to_hex(directory.hash) if direct_objstorage: storage_content_get_data.assert_not_called() objstorage_content_batch.assert_called() else: storage_content_get_data.assert_called() objstorage_content_batch.assert_not_called() def test_directory_revision_data(self, swh_storage): target_rev = "0e8a3ad980ec179856012b7eecf4327e99cd44cd" dir = Directory( entries=( DirectoryEntry( name=b"submodule", type="rev", target=hashutil.hash_to_bytes(target_rev), perms=0o100644, ), ), ) swh_storage.directory_add([dir]) with cook_extract_directory_dircooker(swh_storage, dir.id, fsck=False) as p: assert (p / "submodule").is_symlink() assert os.readlink(str(p / "submodule")) == target_rev class TestRevisionCooker: def test_revision_simple(self, git_loader, cook_extract_revision): # # 1--2--3--4--5--6--7 # repo = TestRepo() with repo as rp: (rp / "file1").write_text(TEST_CONTENT) repo.commit("add file1") (rp / "file2").write_text(TEST_CONTENT) repo.commit("add file2") (rp / "dir1/dir2").mkdir(parents=True) (rp / "dir1/dir2/file").write_text(TEST_CONTENT) repo.commit("add dir1/dir2/file") (rp / "bin1").write_bytes(TEST_EXECUTABLE) (rp / "bin1").chmod(0o755) repo.commit("add bin1") (rp / "link1").symlink_to("file1") repo.commit("link link1 to file1") (rp / "file2").unlink() repo.commit("remove file2") (rp / "bin1").rename(rp / "bin") repo.commit("rename bin1 to bin") loader = git_loader(str(rp)) loader.load() obj_id_hex = repo.repo.refs[b"HEAD"].decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) with cook_extract_revision(loader.storage, obj_id) as (ert, p): ert.checkout(b"HEAD") assert (p / "file1").stat().st_mode == 0o100644 assert (p / "file1").read_text() == TEST_CONTENT assert (p / "link1").is_symlink() assert os.readlink(str(p / "link1")) == "file1" assert (p / "bin").stat().st_mode == 0o100755 assert (p / "bin").read_bytes() == TEST_EXECUTABLE assert (p / "dir1/dir2/file").read_text() == TEST_CONTENT assert (p / "dir1/dir2/file").stat().st_mode == 0o100644 assert ert.repo.refs[b"HEAD"].decode() == obj_id_hex def test_revision_two_roots(self, git_loader, cook_extract_revision): # # 1----3---4 # / # 2---- # repo = TestRepo() with repo as rp: (rp / "file1").write_text(TEST_CONTENT) c1 = repo.commit("Add file1") del repo.repo.refs[b"refs/heads/master"] # git update-ref -d HEAD (rp / "file2").write_text(TEST_CONTENT) repo.commit("Add file2") repo.merge([c1]) (rp / "file3").write_text(TEST_CONTENT) repo.commit("add file3") obj_id_hex = repo.repo.refs[b"HEAD"].decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) loader = git_loader(str(rp)) loader.load() with cook_extract_revision(loader.storage, obj_id) as (ert, p): assert ert.repo.refs[b"HEAD"].decode() == obj_id_hex def test_revision_two_double_fork_merge(self, git_loader, cook_extract_revision): # # 2---4---6 # / / / # 1---3---5 # repo = TestRepo() with repo as rp: (rp / "file1").write_text(TEST_CONTENT) c1 = repo.commit("Add file1") repo.repo.refs[b"refs/heads/c1"] = c1 (rp / "file2").write_text(TEST_CONTENT) repo.commit("Add file2") (rp / "file3").write_text(TEST_CONTENT) c3 = repo.commit("Add file3", ref=b"refs/heads/c1") repo.repo.refs[b"refs/heads/c3"] = c3 repo.merge([c3]) (rp / "file5").write_text(TEST_CONTENT) c5 = repo.commit("Add file3", ref=b"refs/heads/c3") repo.merge([c5]) obj_id_hex = repo.repo.refs[b"HEAD"].decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) loader = git_loader(str(rp)) loader.load() with cook_extract_revision(loader.storage, obj_id) as (ert, p): assert ert.repo.refs[b"HEAD"].decode() == obj_id_hex def test_revision_triple_merge(self, git_loader, cook_extract_revision): # # .---.---5 # / / / # 2 3 4 # / / / # 1---.---. # repo = TestRepo() with repo as rp: (rp / "file1").write_text(TEST_CONTENT) c1 = repo.commit("Commit 1") repo.repo.refs[b"refs/heads/b1"] = c1 repo.repo.refs[b"refs/heads/b2"] = c1 repo.commit("Commit 2") c3 = repo.commit("Commit 3", ref=b"refs/heads/b1") c4 = repo.commit("Commit 4", ref=b"refs/heads/b2") repo.merge([c3, c4]) obj_id_hex = repo.repo.refs[b"HEAD"].decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) loader = git_loader(str(rp)) loader.load() with cook_extract_revision(loader.storage, obj_id) as (ert, p): assert ert.repo.refs[b"HEAD"].decode() == obj_id_hex def test_revision_filtered_objects(self, git_loader, cook_extract_revision): repo = TestRepo() with repo as rp: file_1, id_1 = hash_content(b"test1") file_2, id_2 = hash_content(b"test2") file_3, id_3 = hash_content(b"test3") (rp / "file").write_bytes(file_1) (rp / "hidden_file").write_bytes(file_2) (rp / "absent_file").write_bytes(file_3) repo.commit() obj_id_hex = repo.repo.refs[b"HEAD"].decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) loader = git_loader(str(rp)) loader.load() # FIXME: storage.content_update() should be changed to allow things # like that with loader.storage.get_db().transaction() as cur: cur.execute( """update content set status = 'visible' where sha1 = %s""", (id_1,), ) cur.execute( """update content set status = 'hidden' where sha1 = %s""", (id_2,), ) cur.execute( """ insert into skipped_content (sha1, sha1_git, sha256, blake2s256, length, reason) select sha1, sha1_git, sha256, blake2s256, length, 'no reason' from content where sha1 = %s """, (id_3,), ) cur.execute("delete from content where sha1 = %s", (id_3,)) with cook_extract_revision(loader.storage, obj_id) as (ert, p): ert.checkout(b"HEAD") assert (p / "file").read_bytes() == b"test1" assert (p / "hidden_file").read_bytes() == HIDDEN_MESSAGE assert (p / "absent_file").read_bytes() == SKIPPED_MESSAGE def test_revision_null_fields(self, git_loader, cook_extract_revision): # Our schema doesn't enforce a lot of non-null revision fields. We need # to check these cases don't break the cooker. repo = TestRepo() with repo as rp: (rp / "file").write_text(TEST_CONTENT) c = repo.commit("initial commit") loader = git_loader(str(rp)) loader.load() repo.repo.refs[b"HEAD"].decode() dir_id_hex = repo.repo[c].tree.decode() dir_id = hashutil.hash_to_bytes(dir_id_hex) test_revision = Revision( message=b"", author=Person(name=None, email=None, fullname=b""), date=None, committer=Person(name=None, email=None, fullname=b""), committer_date=None, parents=(), type=RevisionType.GIT, directory=dir_id, metadata={}, synthetic=True, ) storage = loader.storage storage.revision_add([test_revision]) with cook_extract_revision(storage, test_revision.id, fsck=False) as (ert, p): ert.checkout(b"HEAD") assert (p / "file").stat().st_mode == 0o100644 def test_revision_revision_data(self, swh_storage): target_rev = "0e8a3ad980ec179856012b7eecf4327e99cd44cd" dir = Directory( entries=( DirectoryEntry( name=b"submodule", type="rev", target=hashutil.hash_to_bytes(target_rev), perms=0o100644, ), ), ) swh_storage.directory_add([dir]) rev = Revision( message=b"", author=Person(name=None, email=None, fullname=b""), date=None, committer=Person(name=None, email=None, fullname=b""), committer_date=None, parents=(), type=RevisionType.GIT, directory=dir.id, metadata={}, synthetic=True, ) swh_storage.revision_add([rev]) with cook_stream_revision_gitfast(swh_storage, rev.id) as stream: pattern = "M 160000 {} submodule".format(target_rev).encode() assert pattern in stream.read()