diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1a2dcf9c..b80dd837 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,57 +1,49 @@ repos: -- repo: https://github.com/pre-commit/pre-commit-hooks - rev: v2.4.0 - hooks: - - id: trailing-whitespace - - id: check-json - - id: check-yaml + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.1.0 + hooks: + - id: trailing-whitespace + - id: check-json + - id: check-yaml -- repo: https://gitlab.com/pycqa/flake8 - rev: 3.8.3 - hooks: - - id: flake8 + - repo: https://gitlab.com/pycqa/flake8 + rev: 4.0.1 + hooks: + - id: flake8 -- repo: https://github.com/codespell-project/codespell - rev: v1.16.0 - hooks: - - id: codespell - exclude: TODO - args: [-L iff, -L gae] + - repo: https://github.com/codespell-project/codespell + rev: v2.1.0 + hooks: + - id: codespell + name: Check source code spelling + args: [-L iff, -L gae, -L sur] + stages: [commit] + - id: codespell + name: Check commit message spelling + stages: [commit-msg] -- repo: local - hooks: - - id: mypy - name: mypy - entry: mypy - args: [swh] - pass_filenames: false - language: system - types: [python] + - repo: local + hooks: + - id: mypy + name: mypy + entry: mypy + args: [swh] + pass_filenames: false + language: system + types: [python] + - id: check-bumped-dbversion + name: check-bumped-dbversion + files: 'sql/upgrades/.*\.sql' + entry: grep + args: ["insert into dbversion"] + language: system - - id: check-bumped-dbversion - name: check-bumped-dbversion - files: 'sql/upgrades/.*\.sql' - entry: grep - args: ['insert into dbversion'] - language: system + - repo: https://github.com/PyCQA/isort + rev: 5.10.1 + hooks: + - id: isort -- repo: https://github.com/PyCQA/isort - rev: 5.5.2 - hooks: - - id: isort - -- repo: https://github.com/python/black - rev: 19.10b0 - hooks: - - id: black - -# unfortunately, we are far from being able to enable this... -#- repo: https://github.com/PyCQA/pydocstyle.git -# rev: 4.0.0 -# hooks: -# - id: pydocstyle -# name: pydocstyle -# description: pydocstyle is a static analysis tool for checking compliance with Python docstring conventions. -# entry: pydocstyle --convention=google -# language: python -# types: [python] + - repo: https://github.com/python/black + rev: 19.10b0 + hooks: + - id: black diff --git a/PKG-INFO b/PKG-INFO index a4dc5adc..b0e720db 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,250 +1,250 @@ Metadata-Version: 2.1 Name: swh.storage -Version: 0.43.1 +Version: 1.0.0 Summary: Software Heritage storage manager Home-page: https://forge.softwareheritage.org/diffusion/DSTO/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-storage Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-storage/ Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Requires-Python: >=3.7 Description-Content-Type: text/markdown Provides-Extra: testing Provides-Extra: journal License-File: LICENSE License-File: AUTHORS swh-storage =========== Abstraction layer over the archive, allowing to access all stored source code artifacts as well as their metadata. See the [documentation](https://docs.softwareheritage.org/devel/swh-storage/index.html) for more details. ## Quick start ### Dependencies Python tests for this module include tests that cannot be run without a local Postgresql database, so you need the Postgresql server executable on your machine (no need to have a running Postgresql server). They also expect a cassandra server. #### Debian-like host ``` $ sudo apt install libpq-dev postgresql-11 cassandra ``` #### Non Debian-like host The tests expects the path to `cassandra` to either be unspecified, it is then looked up at `/usr/sbin/cassandra`, either specified through the environment variable `SWH_CASSANDRA_BIN`. Optionally, you can avoid running the cassandra tests. ``` (swh) :~/swh-storage$ tox -- -m 'not cassandra' ``` ### Installation It is strongly recommended to use a virtualenv. In the following, we consider you work in a virtualenv named `swh`. See the [developer setup guide](https://docs.softwareheritage.org/devel/developer-setup.html#developer-setup) for a more details on how to setup a working environment. You can install the package directly from [pypi](https://pypi.org/p/swh.storage): ``` (swh) :~$ pip install swh.storage [...] ``` Or from sources: ``` (swh) :~$ git clone https://forge.softwareheritage.org/source/swh-storage.git [...] (swh) :~$ cd swh-storage (swh) :~/swh-storage$ pip install . [...] ``` Then you can check it's properly installed: ``` (swh) :~$ swh storage --help Usage: swh storage [OPTIONS] COMMAND [ARGS]... Software Heritage Storage tools. Options: -h, --help Show this message and exit. Commands: rpc-serve Software Heritage Storage RPC server. ``` ## Tests The best way of running Python tests for this module is to use [tox](https://tox.readthedocs.io/). ``` (swh) :~$ pip install tox ``` ### tox From the sources directory, simply use tox: ``` (swh) :~/swh-storage$ tox [...] ========= 315 passed, 6 skipped, 15 warnings in 40.86 seconds ========== _______________________________ summary ________________________________ flake8: commands succeeded py3: commands succeeded congratulations :) ``` Note: it is possible to set the `JAVA_HOME` environment variable to specify the version of the JVM to be used by Cassandra. For example, at the time of writing this, Cassandra does not support java 14, so one may want to use for example java 11: ``` (swh) :~/swh-storage$ export JAVA_HOME=/usr/lib/jvm/java-14-openjdk-amd64/bin/java (swh) :~/swh-storage$ tox [...] ``` ## Development The storage server can be locally started. It requires a configuration file and a running Postgresql database. ### Sample configuration A typical configuration `storage.yml` file is: ``` storage: cls: postgresql db: "dbname=softwareheritage-dev user= password=" objstorage: cls: pathslicing root: /tmp/swh-storage/ slicing: 0:2/2:4/4:6 ``` which means, this uses: - a local storage instance whose db connection is to `softwareheritage-dev` local instance, - the objstorage uses a local objstorage instance whose: - `root` path is /tmp/swh-storage, - slicing scheme is `0:2/2:4/4:6`. This means that the identifier of the content (sha1) which will be stored on disk at first level with the first 2 hex characters, the second level with the next 2 hex characters and the third level with the next 2 hex characters. And finally the complete hash file holding the raw content. For example: 00062f8bd330715c4f819373653d97b3cd34394c will be stored at 00/06/2f/00062f8bd330715c4f819373653d97b3cd34394c Note that the `root` path should exist on disk before starting the server. ### Starting the storage server If the python package has been properly installed (e.g. in a virtual env), you should be able to use the command: ``` (swh) :~/swh-storage$ swh storage rpc-serve storage.yml ``` This runs a local swh-storage api at 5002 port. ``` (swh) :~/swh-storage$ curl http://127.0.0.1:5002 Software Heritage storage server

You have reached the Software Heritage storage server.
See its documentation and API for more information

``` ### And then what? In your upper layer ([loader-git](https://forge.softwareheritage.org/source/swh-loader-git/), [loader-svn](https://forge.softwareheritage.org/source/swh-loader-svn/), etc...), you can define a remote storage with this snippet of yaml configuration. ``` storage: cls: remote url: http://localhost:5002/ ``` You could directly define a postgresql storage with the following snippet: ``` storage: cls: postgresql db: service=swh-dev objstorage: cls: pathslicing root: /home/storage/swh-storage/ slicing: 0:2/2:4/4:6 ``` ## Cassandra As an alternative to PostgreSQL, swh-storage can use Cassandra as a database backend. It can be used like this: ``` storage: cls: cassandra hosts: - localhost objstorage: cls: pathslicing root: /home/storage/swh-storage/ slicing: 0:2/2:4/4:6 ``` The Cassandra swh-storage implementation supports both Cassandra >= 4.0-alpha2 and ScyllaDB >= 4.4 (and possibly earlier versions, but this is untested). While the main code supports both transparently, running tests or configuring the schema requires specific code when using ScyllaDB, enabled by setting the `SWH_USE_SCYLLADB=1` environment variable. diff --git a/debian/changelog b/debian/changelog index c862404c..2cf954dc 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,2822 +1,2825 @@ -swh-storage (0.43.1-1~swh1~bpo10+1) buster-swh; urgency=medium +swh-storage (1.0.0-1~swh1) unstable-swh; urgency=medium - * Rebuild for buster-swh + * New upstream release 1.0.0 - (tagged by David Douard + on 2022-02-24 11:32:18 +0100) + * Upstream changes: - v1.0.0 - add support for `swh db + upgrade`, aka updated for swh core 2.0.0 - -- Software Heritage autobuilder (on jenkins-debian1) Tue, 08 Feb 2022 09:45:21 +0000 + -- Software Heritage autobuilder (on jenkins-debian1) Thu, 24 Feb 2022 11:13:57 +0000 swh-storage (0.43.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.43.1 - (tagged by Valentin Lorentz on 2022-02-08 10:21:59 +0100) * Upstream changes: - v0.43.1 - * Add typing to revision_walker.py and make the state a dataclass - * revision_walker: Add support for ignore_displayname. -- Software Heritage autobuilder (on jenkins-debian1) Tue, 08 Feb 2022 09:31:33 +0000 swh-storage (0.43.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.43.0 - (tagged by Nicolas Dandrimont on 2022-02-02 18:46:15 +0100) * Upstream changes: - Release swh.storage 0.43.0 - Add a displayname field to the person table in PostgreSQL - Use the displayname field in returned results for revision_get, - revision_log and release_get -- Software Heritage autobuilder (on jenkins-debian1) Wed, 02 Feb 2022 17:56:33 +0000 swh-storage (0.42.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.42.0 - (tagged by Valentin Lorentz on 2022-01-25 16:44:35 +0100) * Upstream changes: - v0.42.0 - * Remove 'offset' and 'negative_utc' for the TimestampWithTimezone constructor call - * Stop using the deprecated 'TimestampWithTimezone.offset' attribute - * Fix directory_add to actually insert the manifest + add directory_get_raw_manifest -- Software Heritage autobuilder (on jenkins-debian1) Tue, 25 Jan 2022 15:54:46 +0000 swh-storage (0.41.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.41.1 - (tagged by David Douard on 2022-01-04 15:46:41 +0100) * Upstream changes: - v0.41.1 -- Software Heritage autobuilder (on jenkins-debian1) Tue, 04 Jan 2022 14:55:33 +0000 swh-storage (0.41.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.41.0 - (tagged by Nicolas Dandrimont on 2021-12-22 15:24:31 +0100) * Upstream changes: - Release swh.storage v0.41.0 - add support for storing odd-shaped git objects enabled in swh.model 4.0.0 - drop workarounds for old versions of tenacity -- Software Heritage autobuilder (on jenkins-debian1) Wed, 22 Dec 2021 15:22:18 +0000 swh-storage (0.40.0-2~swh1) unstable-swh; urgency=medium * Update dependencies in d/control. -- David Douard Tue, 16 Nov 2021 14:37:20 +0100 swh-storage (0.40.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.40.0 - (tagged by David Douard on 2021-11-16 10:11:25 +0100) * Upstream changes: - v0.40.0 - Add support for a redis-based reporting for invalid mirrorred objects - Remove now useless fixers in storage/fixer.py - Add a new --type option to 'swh strorage repay' - Update extrinsic metadata specs -- Software Heritage autobuilder (on jenkins-debian1) Tue, 16 Nov 2021 09:45:39 +0000 swh-storage (0.39.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.39.0 - (tagged by Antoine Lambert on 2021-10-28 17:09:40 +0200) * Upstream changes: - version 0.39.0 -- Software Heritage autobuilder (on jenkins-debian1) Fri, 29 Oct 2021 09:17:36 +0000 swh-storage (0.38.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.38.0 - (tagged by Antoine R. Dumont (@ardumont) on 2021-10-11 13:28:27 +0200) * Upstream changes: - v0.38.0 - buffer: add some debug logging for number of objects sent - buffer: add a threshold for the estimated size of revision and release batches - buffer: add a threshold for the number of revision parents in one batch - buffer: add a threshold for the number of directory entries in one batch - filter: add filtering for release_add - filter: do not call the underlying functions if there's nothing to add - buffer: Ensure that we don't send data from empty buffers -- Software Heritage autobuilder (on jenkins-debian1) Mon, 11 Oct 2021 14:58:23 +0000 swh-storage (0.37.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.37.1 - (tagged by David Douard on 2021-09-29 11:31:33 +0200) * Upstream changes: - v0.37.1 -- Software Heritage autobuilder (on jenkins-debian1) Wed, 29 Sep 2021 10:12:31 +0000 swh-storage (0.37.0-1~swh2) unstable-swh; urgency=medium * Bump new release -- Antoine R. Dumont (@ardumont) Thu, 16 Sep 2021 09:51:51 +0200 swh-storage (0.37.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.37.0 - (tagged by Antoine R. Dumont (@ardumont) on 2021-09-15 18:44:19 +0200) * Upstream changes: - v0.37.0 - Allow filtering extids per extid_version/extid_type when reading - migrate_extrinsic_metadata: Fix edge cases (missing f- stringification, remaining pypi - issues, ...) - cassandra: Make directory_ls fetch contents in batch instead of one-by-one - cassandra: Add option to select (hopefully) more efficient batch insertion algos - cassandra: Remove stat_counters. - cassandra: generate statsd metrics on method calls - content_get: Fetch rows concurrently - directory_entry_add_batch: Remove the temporary prepared statement entirely - directory_entry_add_batch: Reduce churn of prepared statements - postgresql: Fix a column order mismatch between the query and object builder - Add counting storage proxy -- Software Heritage autobuilder (on jenkins-debian1) Thu, 16 Sep 2021 06:42:23 +0000 swh-storage (0.36.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.36.0 - (tagged by Vincent SELLIER on 2021-08-24 16:51:07 +0200) * Upstream changes: - v0.36.0 - changelog: - Add cvs as supported revision_type - Add test for origin_visit_get_latest in presence of mismatched id and date orders - cassandra: Bump next_visit_id when origin_visit_add is called by a replayer - cassandra: Make content_missing query in batches - backfill: add extra where clause to use the right index for extid requests -- Software Heritage autobuilder (on jenkins-debian1) Tue, 24 Aug 2021 15:01:32 +0000 swh-storage (0.35.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.35.1 - (tagged by Valentin Lorentz on 2021-08-20 11:53:33 +0200) * Upstream changes: - v0.35.1 - * cassandra: Fix crash when using _missing() functions with more than 100 ids with ScyllaDB. -- Software Heritage autobuilder (on jenkins-debian1) Fri, 20 Aug 2021 10:01:16 +0000 swh-storage (0.35.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.35.0 - (tagged by Antoine R. Dumont (@ardumont) on 2021-07-28 10:36:09 +0200) * Upstream changes: - v0.35.0 - Implement storage of the ExtID.extid_version field -- Software Heritage autobuilder (on jenkins-debian1) Wed, 28 Jul 2021 08:43:06 +0000 swh-storage (0.34.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.34.0 - (tagged by Vincent SELLIER on 2021-07-07 18:22:00 +0200) * Upstream changes: - v0.34.0 - cassandra: allow to configure the consistency level -- Software Heritage autobuilder (on jenkins-debian1) Wed, 07 Jul 2021 16:58:42 +0000 swh-storage (0.33.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.33.0 - (tagged by Valentin Lorentz on 2021-07-05 16:48:16 +0200) * Upstream changes: - v0.33.0 - * Add endpoint raw_extrinsic_metadata_get_authorities -- Software Heritage autobuilder (on jenkins-debian1) Mon, 05 Jul 2021 15:00:12 +0000 swh-storage (0.32.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.32.0 - (tagged by Vincent SELLIER on 2021-06-28 15:35:44 +0200) * Upstream changes: - v0.32.0 - * cassandra: Add support for non-ASCII origin 'URLs'. -- Software Heritage autobuilder (on jenkins-debian1) Mon, 28 Jun 2021 16:20:21 +0000 swh-storage (0.31.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.31.0 - (tagged by Valentin Lorentz on 2021-06-25 11:17:50 +0200) * Upstream changes: - v0.31.0 - * cassandra: Add partial support for ScyllaDB - * mypy: Fix errors with release >= v0.900 (but breaks older mypy versions) - * Add endpoints to access REMD by id -- Software Heritage autobuilder (on jenkins-debian1) Fri, 25 Jun 2021 09:26:09 +0000 swh-storage (0.30.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.30.1 - (tagged by Antoine R. Dumont (@ardumont) on 2021-05-21 10:09:02 +0200) * Upstream changes: - v0.30.1 - Finalize the config "local" deprecation in favor of "postgresql" - tests: Make test parameters order deterministic, so they don't crash pytest-xdist - test_cassandra: Improve error when the process is started but not listening -- Software Heritage autobuilder (on jenkins-debian1) Fri, 21 May 2021 08:22:33 +0000 swh-storage (0.30.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.30.0 - (tagged by David Douard on 2021-05-18 16:34:25 +0200) * Upstream changes: - v0.30.0 -- Software Heritage autobuilder (on jenkins-debian1) Tue, 18 May 2021 14:45:21 +0000 swh-storage (0.29.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.29.1 - (tagged by Nicolas Dandrimont on 2021-05-14 18:31:52 +0200) * Upstream changes: - Release swh.storage 0.29.1 - Add missing db migration -- Software Heritage autobuilder (on jenkins-debian1) Fri, 14 May 2021 16:59:42 +0000 swh-storage (0.29.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.29.0 - (tagged by Valentin Lorentz on 2021-05-11 15:04:58 +0200) * Upstream changes: - v0.29.0 - * Make the TenaciousProxyStorage retry when a single object add fails - * Move all proxy storages in swh/storage/proxies/ - * Deprecate the "local" storage cls in favor of "postgresql" - * cassandra: Add tests checking directory_add and snapshot_add are atomic. - * Add endpoint directory_get_entries, to quickly list a directory's entries - * content_get: Add support for queries by sha1_git -- Software Heritage autobuilder (on jenkins-debian1) Tue, 11 May 2021 13:12:42 +0000 swh-storage (0.28.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.28.0 - (tagged by Valentin Lorentz on 2021-05-06 15:52:03 +0200) * Upstream changes: - v0.28.0 - * Normalize all Storage.xxx_add() methods to return a summary - * cassandra: Add 'check_missing' option, to allow updating objects - * cassandra: Add a test of a 'complex' migration, with a PK update - * Add a new TenaciousProxyStorage - * Make postgresql's origin_add not raise an error in case of conflict - * Stop storing authority/fetcher metadata. - * tenacious: Document potential issues about objects being dropped - * Use swh.core 0.14 -- Software Heritage autobuilder (on jenkins-debian1) Thu, 06 May 2021 14:06:51 +0000 swh-storage (0.27.4-1~swh1) unstable-swh; urgency=medium * New upstream release 0.27.4 - (tagged by Antoine Lambert on 2021-04-29 14:38:49 +0200) * Upstream changes: - version 0.27.4 -- Software Heritage autobuilder (on jenkins-debian1) Thu, 29 Apr 2021 13:04:46 +0000 swh-storage (0.27.3-1~swh1) unstable-swh; urgency=medium * New upstream release 0.27.3 - (tagged by Antoine Lambert on 2021-04-09 14:59:36 +0200) * Upstream changes: - version 0.27.3 -- Software Heritage autobuilder (on jenkins-debian1) Fri, 09 Apr 2021 13:06:58 +0000 swh-storage (0.27.2-1~swh1) unstable-swh; urgency=medium * New upstream release 0.27.2 - (tagged by David Douard on 2021-04-07 15:06:41 +0200) * Upstream changes: - v0.27.2 -- Software Heritage autobuilder (on jenkins-debian1) Thu, 08 Apr 2021 08:05:43 +0000 swh-storage (0.27.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.27.1 - (tagged by Valentin Lorentz on 2021-03-30 17:47:03 +0200) * Upstream changes: - v0.27.1 - * buffer: Add support for 'extid' -- Software Heritage autobuilder (on jenkins-debian1) Tue, 30 Mar 2021 15:59:01 +0000 swh-storage (0.27.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.27.0 - (tagged by Valentin Lorentz on 2021-03-29 14:33:24 +0200) * Upstream changes: - v0.27.0 - * origin_visit_status_add: Fix inconsistent/incorrect errors when type is None and visit is missing. - * extid: remove unicity on (extid_type, extid) and (target_type, target) -- Software Heritage autobuilder (on jenkins-debian1) Mon, 29 Mar 2021 12:44:14 +0000 swh-storage (0.26.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.26.0 - (tagged by Nicolas Dandrimont on 2021-03-22 14:44:35 +0100) * Upstream changes: - Release swh.storage v0.26.0 - Move raw_extrinsic_metadata deduplication to use a new id column. -- Software Heritage autobuilder (on jenkins-debian1) Mon, 22 Mar 2021 21:53:39 +0000 swh-storage (0.25.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.25.0 - (tagged by Antoine Lambert on 2021-03-18 13:55:10 +0100) * Upstream changes: - version 0.25.0 -- Software Heritage autobuilder (on jenkins-debian1) Thu, 18 Mar 2021 13:02:02 +0000 swh-storage (0.24.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.24.1 - (tagged by Valentin Lorentz on 2021-03-04 23:32:36 +0100) * Upstream changes: - v0.24.1 - * tests: Drop hypothesis < 6 requirement - * Remove the remaining references to the deprecated SWHID class - * postgresql: Ensure a minimum limit for the snapshot branches query -- Software Heritage autobuilder (on jenkins-debian1) Thu, 04 Mar 2021 22:39:03 +0000 swh-storage (0.24.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.24.0 - (tagged by Valentin Lorentz on 2021-03-02 10:00:23 +0100) * Upstream changes: - v0.24.0 - * storage_tests: recompute ids when evolving RawExtrinsicMetadata objects. - * RawExtrinsicMetadata: update to use the API in swh-model 1.0.0 -- Software Heritage autobuilder (on jenkins-debian1) Tue, 02 Mar 2021 09:11:15 +0000 swh-storage (0.23.2-1~swh1) unstable-swh; urgency=medium * New upstream release 0.23.2 - (tagged by Antoine Lambert on 2021-02-19 11:47:03 +0100) * Upstream changes: - version 0.23.2 -- Software Heritage autobuilder (on jenkins-debian1) Fri, 19 Feb 2021 10:58:50 +0000 swh-storage (0.23.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.23.1 - (tagged by Antoine R. Dumont (@ardumont) on 2021-02-16 17:19:00 +0100) * Upstream changes: - v0.23.1 - Switch anonymized replayer test to use pytest parametrization -- Software Heritage autobuilder (on jenkins-debian1) Tue, 16 Feb 2021 16:28:25 +0000 swh-storage (0.23.0-1~swh2) unstable-swh; urgency=medium * Fix dependency issue -- Antoine R. Dumont (@ardumont) Tue, 16 Feb 2021 14:34:57 +0100 swh-storage (0.23.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.23.0 - (tagged by Antoine R. Dumont (@ardumont) on 2021-02-15 15:20:21 +0100) * Upstream changes: - v0.23.0 - storage: Refactor OriginVisitStatus instantiation - db: Unify sql joins on origin_visit_status using "USING" - storage.postgresql: Use origin_visit_status.type value as source - test_replay: Fix hang since confluent-kafka 1.6 release - postgresql: Fix dbversion() to return the max version instead of a random one. - buffer: ensure objects are flushed in topological order - Return an accurate summary from buffer's flush() method - buffer: add support for snapshots - buffer: add type annotations for tests -- Software Heritage autobuilder (on jenkins-debian1) Mon, 15 Feb 2021 14:39:04 +0000 swh-storage (0.22.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.22.0 - (tagged by Antoine R. Dumont (@ardumont) on 2021-02-03 12:09:29 +0100) * Upstream changes: - v0.22.0 - storage: Make origin_get_latest_visit_status return OriginVisitStatus - storage: Change origin_visit_status_get_random interface to return visit_status - Write introduction to swh-storage -- Software Heritage autobuilder (on jenkins-debian1) Wed, 03 Feb 2021 11:15:27 +0000 swh-storage (0.21.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.21.1 - (tagged by Vincent SELLIER on 2021-01-28 14:11:26 +0100) * Upstream changes: - v0.21.1 - * Correctly return origin_visit_status.type value everywhere -- Software Heritage autobuilder (on jenkins-debian1) Thu, 28 Jan 2021 13:19:24 +0000 swh-storage (0.21.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.21.0 - (tagged by Antoine R. Dumont (@ardumont) on 2021-01-20 15:42:40 +0100) * Upstream changes: - v0.21.0 - db: Allow new status values not_found, failed to OriginVisitStatus -- Software Heritage autobuilder (on jenkins-debian1) Wed, 20 Jan 2021 14:52:20 +0000 swh-storage (0.20.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.20.0 - (tagged by Antoine R. Dumont (@ardumont) on 2021-01-20 10:24:00 +0100) * Upstream changes: - v0.20.0 - storage: Add persistence of the field OriginVisitStatus.type - backfiller: Add type to the origin_visit_status topic - tests: Make test_content_add_race fail for the right reason. -- Software Heritage autobuilder (on jenkins-debian1) Wed, 20 Jan 2021 09:29:54 +0000 swh-storage (0.19.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.19.0 - (tagged by Vincent SELLIER on 2021-01-14 11:09:17 +0100) * Upstream changes: - v0.19.0 - * 2021-01-12 Adapt cassandra storage to ignore the new OriginVisitStatus.type field - * 2021- 01-08 Allow to use the JAVA_HOME environment for cassandra tests - * 2021-01-13 Enforce hypothesis <6 to prevent test breakage - * 2021-01-08 Make the CREATE_TABLES_QUERIES in cassandra/schema.py an explicit list - * 2020-12-18 Add a cli section in the doc - * 2020-11-24 storage.backfill: Allow cli run for origin_visit_status as well - * 2020-11-24 conftest: Reference swh.core.db.pytest_plugin -- Software Heritage autobuilder (on jenkins-debian1) Thu, 14 Jan 2021 10:18:31 +0000 swh-storage (0.18.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.18.0 - (tagged by Antoine R. Dumont (@ardumont) on 2020-11-23 14:46:41 +0100) * Upstream changes: - v0.18.0 - requirements-test.txt: Drop no longer needed pytest-postgresql requirement - backfill: Reverse flawed logic in SnapshotBranch generation - migrate_extrinsic_metadata: don't crash when deb revisions aren't referenced by any snapshot -- Software Heritage autobuilder (on jenkins-debian1) Mon, 23 Nov 2020 13:52:32 +0000 swh-storage (0.17.2-1~swh1) unstable-swh; urgency=medium * New upstream release 0.17.2 - (tagged by Nicolas Dandrimont on 2020-11-13 11:56:37 +0100) * Upstream changes: - Release swh.storage 0.17.2 - Future- proof get_journal_writer by setting the value_sanitizer argument - migrate_extrinsic_metadata improvements - backfill: only flush on every batch -- Software Heritage autobuilder (on jenkins-debian1) Fri, 13 Nov 2020 11:05:35 +0000 swh-storage (0.17.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.17.1 - (tagged by Antoine Lambert on 2020-11-05 13:50:35 +0100) * Upstream changes: - version 0.17.1 -- Software Heritage autobuilder (on jenkins-debian1) Thu, 05 Nov 2020 12:56:53 +0000 swh-storage (0.17.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.17.0 - (tagged by Nicolas Dandrimont on 2020-11-03 18:09:53 +0100) * Upstream changes: - Release swh.storage v0.17.0 - Migrate all raw extrinsic metadata attributes from id to target - Add an `algos` function to resolve branch aliases - Prepare updates to make swh.journal more generic - Improve api server initialization - Various updates to the migrate_extrinsic_metadata script, notably writing - most metadata on directories instead of revisions -- Software Heritage autobuilder (on jenkins-debian1) Tue, 03 Nov 2020 17:20:45 +0000 swh-storage (0.16.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.16.0 - (tagged by Nicolas Dandrimont on 2020-10-09 18:23:24 +0200) * Upstream changes: - Release swh.storage v0.16.0 - Updates to the intrinsic metadata migration script - Various improvements to the buffer storage - Update swh storage backfill to use common configuration keys -- Software Heritage autobuilder (on jenkins-debian1) Fri, 09 Oct 2020 16:33:11 +0000 swh-storage (0.15.3-1~swh1) unstable-swh; urgency=medium * New upstream release 0.15.3 - (tagged by Nicolas Dandrimont on 2020-09-24 20:14:39 +0200) * Upstream changes: - Release swh.storage v0.15.3 - hopefully fix the documentation build -- Software Heritage autobuilder (on jenkins-debian1) Thu, 24 Sep 2020 18:24:14 +0000 swh-storage (0.15.2-1~swh1) unstable-swh; urgency=medium * New upstream release 0.15.2 - (tagged by Nicolas Dandrimont on 2020-09-24 19:22:11 +0200) * Upstream changes: - Release swh.storage v0.15.2 - no change rebuild to clean up jenkins fsckup accumulating old files. -- Software Heritage autobuilder (on jenkins-debian1) Thu, 24 Sep 2020 17:28:22 +0000 swh-storage (0.15.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.15.1 - (tagged by Nicolas Dandrimont on 2020-09-24 18:34:54 +0200) * Upstream changes: - Release swh.storage v0.15.1 - Restore buffer proxy behavior with default arguments -- Software Heritage autobuilder (on jenkins-debian1) Thu, 24 Sep 2020 16:44:22 +0000 swh-storage (0.15.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.15.0 - (tagged by Antoine R. Dumont (@ardumont) on 2020-09-24 16:54:07 +0200) * Upstream changes: - v0.15.0 - Support different database flavors in the SQL scripts - Add the SQL commands used to set up the logical replication publication - Output a warning when the version of the database is different than expected - Improve code quality and doc in BufferedProxyStorage - Adapt cli declaration entrypoint to swh.core 0.3 - Add warning about skipped_content (sneaking into the 'content' topics) - graph- replayer: fix to prevent wrong warning - pre-commit: Add isort hook and reorder imports with isort - pytest_plugin: Change dbname to storage to avoid clash in tests - pytest_plugin: Use psql to load SQL files instead of connecting with psycopg2 -- Software Heritage autobuilder (on jenkins-debian1) Thu, 24 Sep 2020 15:03:58 +0000 swh-storage (0.14.3-1~swh1) unstable-swh; urgency=medium * New upstream release 0.14.3 - (tagged by David Douard on 2020-09-17 16:58:59 +0200) * Upstream changes: - v0.14.3 -- Software Heritage autobuilder (on jenkins-debian1) Thu, 17 Sep 2020 16:53:56 +0000 swh-storage (0.14.2-1~swh1) unstable-swh; urgency=medium * New upstream release 0.14.2 - (tagged by David Douard on 2020-09-11 15:31:22 +0200) * Upstream changes: - v0.14.2 -- Software Heritage autobuilder (on jenkins-debian1) Fri, 11 Sep 2020 13:37:11 +0000 swh-storage (0.14.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.14.1 - (tagged by Antoine R. Dumont (@ardumont) on 2020-09-04 15:43:51 +0200) * Upstream changes: - v0.14.1 - algos.diff: Add missed revision_get conversion -- Software Heritage autobuilder (on jenkins-debian1) Fri, 04 Sep 2020 13:52:17 +0000 swh-storage (0.14.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.14.0 - (tagged by Antoine R. Dumont (@ardumont) on 2020-09-04 12:23:52 +0200) * Upstream changes: - v0.14.0 - Refactor revision_get storage API to return Revision objects - cassandra: Discard Content ctime field in content_get_partition -- Software Heritage autobuilder (on jenkins-debian1) Fri, 04 Sep 2020 10:59:54 +0000 swh-storage (0.13.3-1~swh1) unstable-swh; urgency=medium * New upstream release 0.13.3 - (tagged by Antoine R. Dumont (@ardumont) on 2020-09-01 14:34:57 +0200) * Upstream changes: - v0.13.3 - storage*: release_get(...) -> List[Optional[Release]] - Make StorageInterface a Protocol. - Add a validating storage proxy, to check ids before insertion. - Add a --check-config option for cli commands - Remove the deprecated config-path option from `swh storage rpc-serve` command - Add support for a new "check_config" config option in get_storage() - Check for db version mismatch in PgStorage.check_config() - Add a check_dbversion() method to the Db class - Fix pytest_plugin's database janitor: do not truncate the dbversion table - algos.snapshot: Add visits_and_snapshots_get_from_revision - storage/interface: Remove deprecated diff endpoints - storage_tests: Remove duplicated postgresql-specific tests. - Move postgresql-related files to swh/storage/postgresql/ -- Software Heritage autobuilder (on jenkins-debian1) Tue, 01 Sep 2020 12:40:29 +0000 swh-storage (0.13.2-1~swh2) unstable-swh; urgency=medium * Add mypy-extensions to build-dependencies -- Nicolas Dandrimont Fri, 21 Aug 2020 12:17:05 +0200 swh-storage (0.13.2-1~swh1) unstable-swh; urgency=medium * New upstream release 0.13.2 - (tagged by Valentin Lorentz on 2020-08-20 08:59:39 +0200) * Upstream changes: - v0.13.2 - * pg: Fix crash in snapshot_get when the snapshot does not exist. - * cassandra: fix signatures - * in_memory: rewrite as a backend for the cassandra storage - * remove endpoint snapshot_get_by_origin_visit. - * pg: rewrite converters to work with model objects -- Software Heritage autobuilder (on jenkins-debian1) Thu, 20 Aug 2020 07:18:50 +0000 swh-storage (0.13.1-1~swh3) unstable-swh; urgency=medium * Update dependencies -- Antoine R. Dumont (@ardumont) Fri, 07 Aug 2020 21:17:01 +0000 swh-storage (0.13.1-1~swh2) unstable-swh; urgency=medium * Update dependencies -- Antoine R. Dumont (@ardumont) Fri, 07 Aug 2020 21:02:01 +0000 swh-storage (0.13.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.13.1 - (tagged by Valentin Lorentz on 2020-08-07 18:14:32 +0200) * Upstream changes: - v0.13.1 - * Make snapshot_get_branches return a TypedDict containing SnapshotBranch objects. -- Software Heritage autobuilder (on jenkins-debian1) Fri, 07 Aug 2020 16:23:01 +0000 swh-storage (0.13.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.13.0 - (tagged by Antoine R. Dumont (@ardumont) on 2020-08-07 12:38:47 +0200) * Upstream changes: - v0.13.0 - storage*: Rename and type content_get(List[Sha1]) -> List[Optional[Content]] - storage*: Rename content_get_data(Sha1) -> Optional[bytes] - Simplify as Content.ctime None is popped out of a to_dict call in recent model - cassandra.storage: Use next token for pagination instead of computing it -- Software Heritage autobuilder (on jenkins-debian1) Fri, 07 Aug 2020 10:49:28 +0000 swh-storage (0.12.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.12.0 - (tagged by Antoine R. Dumont (@ardumont) on 2020-08-06 08:50:17 +0200) * Upstream changes: - v0.12.0 - Type storage endpoints - Drop content_get_range endpoint in favor of content_get_partition -- Software Heritage autobuilder (on jenkins-debian1) Thu, 06 Aug 2020 06:55:26 +0000 swh-storage (0.11.10-1~swh1) unstable-swh; urgency=medium * New upstream release 0.11.10 - (tagged by Antoine R. Dumont (@ardumont) on 2020-08-04 14:10:21 +0200) * Upstream changes: - v0.11.10 - tests: Improve coverage on directory_ls endpoints - storage*: Type content_find(...) -> List[Content] - storage*: Type {cnt,dir,rev,rel,snp}_get_random(...) -> Sha1Git -- Software Heritage autobuilder (on jenkins-debian1) Tue, 04 Aug 2020 12:15:21 +0000 swh-storage (0.11.9-1~swh1) unstable-swh; urgency=medium * New upstream release 0.11.9 - (tagged by Antoine R. Dumont (@ardumont) on 2020-08-03 11:55:10 +0200) * Upstream changes: - v0.11.9 - storage*: Drop origin-get- range in favor of origin-list - storage*: Do not allow unknown visit status in origin_visit*_get_latest - storage*: Add type annotation to origin_count - Reuse swh.core stream_results function -- Software Heritage autobuilder (on jenkins-debian1) Mon, 03 Aug 2020 10:02:56 +0000 swh-storage (0.11.8-1~swh1) unstable-swh; urgency=medium * New upstream release 0.11.8 - (tagged by Valentin Lorentz on 2020-07-31 14:57:09 +0200) * Upstream changes: - v0.11.8 - * test_replay: update for swh.journal 0.4.1. - * Add support for metadata-related object types to the backfiller and replayer. - * pg: Rewrite _origin_query to force the query planner to filter on URLs before filtering on visits. - * Make raw_extrinsic_metadata_get return PagedResult instead of Dict. - * Rename argument 'object_type' of raw_extrinsic_metadata_get to 'type'. -- Software Heritage autobuilder (on jenkins-debian1) Fri, 31 Jul 2020 13:17:40 +0000 swh-storage (0.11.6-1~swh1) unstable-swh; urgency=medium * New upstream release 0.11.6 - (tagged by Antoine R. Dumont (@ardumont) on 2020-07-30 16:20:48 +0200) * Upstream changes: - v0.11.6 - storage*: Adapt origin_list(...) -> PagedResult[Origin] - algos.snapshot: Open snapshot_id_get_from_revision - storage*: add origin_visit_status_get(...) -> PagedResult[OriginVisitStatus] - Add type annotations on get_storage. - buffer: Pass lists to backend functions, not iterables. - storage*: Simplify next-page- token computation - filter: Fix types passed to the proxied storage. - Fix upcoming type warning with swh.core > v0.1.2. - Make API endpoints take Lists instead of Iterables as arguments - storage*: use an enum to explicit the order in origin_visit_get - storage*: origin_visit_get(...) -> PagedResult[OriginVisit] - Write metadata + metadata authorities/fetchers to the journal. -- Software Heritage autobuilder (on jenkins-debian1) Thu, 30 Jul 2020 14:29:10 +0000 swh-storage (0.11.5-1~swh1) unstable-swh; urgency=medium * New upstream release 0.11.5 - (tagged by Valentin Lorentz on 2020-07-28 09:55:34 +0200) * Upstream changes: - v0.11.5 - in_memory: fix tie-breaking when two visits have the same date. -- Software Heritage autobuilder (on jenkins-debian1) Tue, 28 Jul 2020 08:10:21 +0000 swh-storage (0.11.4-1~swh1) unstable-swh; urgency=medium * New upstream release 0.11.4 - (tagged by Antoine R. Dumont (@ardumont) on 2020-07-27 16:08:42 +0200) * Upstream changes: - v0.11.4 - Rename object_metadata to raw_extrinsic_metadata - metadata_{authority,fetcher}_add: Fix crash when the iterable argument is empty - storage*: origin_visit_get_by -> Optional[OriginVisit] - storage*: origin_visit_find_by_date -> Optional[OriginVisit] - storage*: type origin_visit_get_latest endpoint result - algos.origin: Simplify origin_get_latest_visit_status function -- Software Heritage autobuilder (on jenkins-debian1) Mon, 27 Jul 2020 14:16:18 +0000 swh-storage (0.11.3-1~swh1) unstable-swh; urgency=medium * New upstream release 0.11.3 - (tagged by Antoine R. Dumont (@ardumont) on 2020-07-27 08:01:03 +0200) * Upstream changes: - v0.11.3 - storage*: origin_get(Iterable[str]) -> Iterable[Optional[Origin]] - storage*.origin_visit_get_random: Read model objects -- Software Heritage autobuilder (on jenkins-debian1) Mon, 27 Jul 2020 06:08:55 +0000 swh-storage (0.11.2-1~swh1) unstable-swh; urgency=medium * New upstream release 0.11.2 - (tagged by Antoine R. Dumont (@ardumont) on 2020-07-23 12:09:51 +0200) * Upstream changes: - v0.11.2 - pgstorage: Drop unnecessary indirection from reading origin_visit - pytest-plugin: Make sample_data return data model objects - tests: Use only model objects for testing - Drop validate storage proxy -- Software Heritage autobuilder (on jenkins-debian1) Thu, 23 Jul 2020 10:18:15 +0000 swh-storage (0.11.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.11.1 - (tagged by Valentin Lorentz on 2020-07-20 13:01:20 +0200) * Upstream changes: - v0.11.1 - * Use model objects in tests - * Rename 'deposit' authority type to 'deposit_client'. -- Software Heritage autobuilder (on jenkins-debian1) Mon, 20 Jul 2020 11:14:39 +0000 swh-storage (0.11.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.11.0 - (tagged by Valentin Lorentz on 2020-07-20 11:01:10 +0200) * Upstream changes: - v0.11.0 - * Make metadata-related endpoints consistent with other endpoints by using Iterables of swh- model objects instead of a dict. - * Update tests to use model objects -- Software Heritage autobuilder (on jenkins-debian1) Mon, 20 Jul 2020 09:12:25 +0000 swh-storage (0.10.6-1~swh1) unstable-swh; urgency=medium * New upstream release 0.10.6 - (tagged by Antoine R. Dumont (@ardumont) on 2020-07-16 15:31:19 +0200) * Upstream changes: - v0.10.6 - pytest_plugin: Ensure fixture instantiates correctly -- Software Heritage autobuilder (on jenkins-debian1) Thu, 16 Jul 2020 13:36:34 +0000 swh-storage (0.10.5-1~swh1) unstable-swh; urgency=medium * New upstream release 0.10.5 - (tagged by Antoine R. Dumont (@ardumont) on 2020-07-16 14:24:50 +0200) * Upstream changes: - v0.10.5 - pytest_plugin: Do not expose the validate proxy storage - pytest-plugin: Expose a sample_data_model fixture - tests: Start using model objects and drop validate proxy when possible -- Software Heritage autobuilder (on jenkins-debian1) Thu, 16 Jul 2020 12:34:44 +0000 swh-storage (0.10.4-1~swh1) unstable-swh; urgency=medium * New upstream release 0.10.4 - (tagged by Antoine R. Dumont (@ardumont) on 2020-07-16 11:25:25 +0200) * Upstream changes: - v0.10.4 - pytest_plugin: Avoid fixture client to declare optional dependency - Allow cassandra binary path to be configured through env variable - 158: Make schema and migration converge so the migration works -- Software Heritage autobuilder (on jenkins-debian1) Thu, 16 Jul 2020 09:37:24 +0000 swh-storage (0.10.3-1~swh1) unstable-swh; urgency=medium * New upstream release 0.10.3 - (tagged by Antoine Lambert on 2020-07-10 16:26:27 +0200) * Upstream changes: - version 0.10.3 -- Software Heritage autobuilder (on jenkins-debian1) Fri, 10 Jul 2020 14:40:28 +0000 swh-storage (0.10.2-1~swh2) unstable-swh; urgency=medium * Fix debian rules to avoid double pytest-plugin loading clash -- Antoine R. Dumont (@ardumont) Fri, 10 Jul 2020 09:21:14 +0200 swh-storage (0.10.2-1~swh1) unstable-swh; urgency=medium * New upstream release 0.10.2 - (tagged by Antoine R. Dumont (@ardumont) on 2020-07-10 08:30:37 +0200) * Upstream changes: - v0.10.2 - tests: Do no expose the pytest- plugin through setuptools entry - Convert ImmutableDict to dict before passing it to json.dumps - docs: Rework dia -> pdf pipeline for inkscape 1.0 -- Software Heritage autobuilder (on jenkins-debian1) Fri, 10 Jul 2020 06:52:42 +0000 swh-storage (0.10.1-1~swh2) unstable-swh; urgency=medium * Update runtime dependencies -- Antoine R. Dumont (@ardumont) Wed, 08 Jul 2020 14:56:01 +0200 swh-storage (0.10.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.10.1 - (tagged by Antoine R. Dumont (@ardumont) on 2020-07-08 14:32:52 +0200) * Upstream changes: - v0.10.1 - extract-pytest-fixture Move shareable fixtures out of conftest into a dedicated pytest plugin - Migrate from vcversioner to setuptools-scm -- Software Heritage autobuilder (on jenkins-debian1) Wed, 08 Jul 2020 12:39:15 +0000 swh-storage (0.10.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.10.0 - (tagged by David Douard on 2020-07-08 09:20:49 +0200) * Upstream changes: - v0.10.0 -- Software Heritage autobuilder (on jenkins-debian1) Wed, 08 Jul 2020 10:11:09 +0000 swh-storage (0.9.3-1~swh1) unstable-swh; urgency=medium * New upstream release 0.9.3 - (tagged by Antoine R. Dumont (@ardumont) on 2020-07-06 09:55:56 +0200) * Upstream changes: - v0.9.3 - storage: Send metrics from the origin_add endpoint -- Software Heritage autobuilder (on jenkins-debian1) Mon, 06 Jul 2020 08:06:13 +0000 swh-storage (0.9.2-1~swh1) unstable-swh; urgency=medium * New upstream release 0.9.2 - (tagged by Antoine R. Dumont (@ardumont) on 2020-07-03 18:48:39 +0200) * Upstream changes: - v0.9.2 - pg-storage: Add missing cur parameter passing -- Software Heritage autobuilder (on jenkins-debian1) Fri, 03 Jul 2020 16:54:13 +0000 swh-storage (0.9.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.9.1 - (tagged by Antoine R. Dumont (@ardumont) on 2020-07-03 16:50:45 +0200) * Upstream changes: - v0.9.1 - storage.db: Drop db.origin_visit_upsert behavior -- Software Heritage autobuilder (on jenkins-debian1) Fri, 03 Jul 2020 15:00:32 +0000 swh-storage (0.9.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.9.0 - (tagged by Antoine R. Dumont (@ardumont) on 2020-07-01 09:53:34 +0200) * Upstream changes: - v0.9.0 - storage*: Drop intermediary conversion step into OriginVisit - pg: use 'on conflict do nothing' strategy for duplicate metadata rows. - Make the code location of metadata endpoints consistent across backends. - Add content_metadata_{add,get}. - Add context columns to object_metadata table and object_metadata_{add,get}. - Generalize origin_metadata to allow support for other object types in the future. - Work around the segmentation faults caused by pytest-coverage + multiprocessing. -- Software Heritage autobuilder (on jenkins-debian1) Wed, 01 Jul 2020 08:02:08 +0000 swh-storage (0.8.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.8.1 - (tagged by David Douard on 2020-06-30 10:08:21 +0200) * Upstream changes: - v0.8.1 -- Software Heritage autobuilder (on jenkins-debian1) Tue, 30 Jun 2020 08:36:45 +0000 swh-storage (0.8.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.8.0 - (tagged by Antoine R. Dumont (@ardumont) on 2020-06-29 09:33:12 +0200) * Upstream changes: - v0.8.0 - Iterate over paginated visits in batches to retrieve latest visit/snapshot - storage*: Open order parameter to origin-visit-get endpoint - tests/replayer/storage*: Drop obsolete origin visit fields - Relax checks on journal writes regarding origin-visit* - replayer: Fix isoformat datetime string for origin-visit - Deprecate the origin_add_one() endpoint - test_storage: Add missing tests on origin_visit_get method -- Software Heritage autobuilder (on jenkins-debian1) Mon, 29 Jun 2020 07:44:00 +0000 swh-storage (0.7.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.7.0 - (tagged by Antoine R. Dumont (@ardumont) on 2020-06-22 15:42:25 +0200) * Upstream changes: - v0.7.0 - test_origin: Rename appropriately tests - algos: Improve origin visit get latest visit status algorithm - test_snapshot: Do not use origin_visit_add returned result - algos.snapshot: Fix edge case when snapshot is not resolved - Ensure ids are correct in tests' storage_data - Fix tests' storage_data revisions - SQL: replace the hash(url) index by a unique btree(url) on the origin table - Make sure the pagination in swh_snapshot_get_by_id uses the proper indexes -- Software Heritage autobuilder (on jenkins-debian1) Mon, 22 Jun 2020 14:09:33 +0000 swh-storage (0.6.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.6.0 - (tagged by Antoine R. Dumont (@ardumont) on 2020-06-19 11:29:42 +0200) * Upstream changes: - v0.6.0 - Move deprecated endpoint snapshot_get_latest from api endpoint to algos - algos.origin: Open origin-get-latest-visit-status function - storage*: Allow origin-visit-get-latest to filter on type - test_origin: Align storage initialization within tests -- Software Heritage autobuilder (on jenkins-debian1) Fri, 19 Jun 2020 12:45:32 +0000 swh-storage (0.5.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.5.0 - (tagged by Antoine R. Dumont (@ardumont) on 2020-06-17 16:03:15 +0200) * Upstream changes: - v0.5.0 - test_storage: Fix flakiness in round to milliseconds test util method - storage*: Add origin- visit-status-get-latest endpoint - Fix/update the backfiller - validate: accept model objects as well as dicts on all add endpoints - cql: Fix blackified strings - storage: Add missing cur parameter - Fix db_to_author() converter to return None is all fields are None -- Software Heritage autobuilder (on jenkins-debian1) Wed, 17 Jun 2020 14:19:37 +0000 swh-storage (0.4.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.4.0 - (tagged by Antoine R. Dumont (@ardumont) on 2020-06-16 09:50:25 +0200) * Upstream changes: - v0.4.0 - ardumont/master storage*: Drop leftover code - storage*: Drop origin_visit_upsert endpoint - storage*: Remove origin-visit-update endpoint - replay: Replay origin-visit and origin-visit-status - in_memory: Make origin- visit-status-add respect "on conflict ignore" policy - test_storage: Add journal behavior coverage for origin-visit-*add - Start migrating the validate proxy toward using BaseModel objects - storage*: Do not write twice origin-visit-status in journal -- Software Heritage autobuilder (on jenkins-debian1) Tue, 16 Jun 2020 07:58:23 +0000 swh-storage (0.3.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.3.0 - (tagged by Antoine R. Dumont (@ardumont) on 2020-06-12 09:08:23 +0200) * Upstream changes: - v0.3.0 - origin-visit-add storage*: Align origin-visit-add to take iterable of OriginVisit objects -- Software Heritage autobuilder (on jenkins-debian1) Fri, 12 Jun 2020 07:22:03 +0000 swh-storage (0.2.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.2.0 - (tagged by Antoine R. Dumont (@ardumont) on 2020-06-10 11:51:30 +0200) * Upstream changes: - v0.2.0 - origin-visit-upsert: Write visit status objects to the journal - origin-visit-update: Write visit status objects to the journal - origin-visit-add: Write visit status to the journal - Add pagination to origin_metadata_get. - Deduplicate origin-metadata when they have the same authority + discovery_date + fetcher. - Open `origin_visit_status_add` endpoint to add origin visit statuses - Add a replayer test for anonymized journal topics - Small refactoring of the InMemoryStorage to make it more consistent -- Software Heritage autobuilder (on jenkins-debian1) Wed, 10 Jun 2020 10:02:45 +0000 swh-storage (0.1.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.1.1 - (tagged by Nicolas Dandrimont on 2020-06-04 16:49:22 +0200) * Upstream changes: - Release swh.storage v0.1.1 - Work around tests hanging during Debian build -- Software Heritage autobuilder (on jenkins-debian1) Thu, 04 Jun 2020 14:56:54 +0000 swh-storage (0.1.0-2~swh1) unstable-swh; urgency=medium * Update dependencies. -- David Douard Thu, 04 Jun 2020 13:40:52 +0200 swh-storage (0.1.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.1.0 - (tagged by David Douard on 2020-06-04 12:08:46 +0200) * Upstream changes: - v0.1.0 -- Software Heritage autobuilder (on jenkins-debian1) Thu, 04 Jun 2020 10:28:43 +0000 swh-storage (0.0.193-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.193 - (tagged by Antoine R. Dumont (@ardumont) on 2020-05-28 14:28:54 +0200) * Upstream changes: - v0.0.193 - pg: Write origin visit updates & status, read from origin_visit_status - Make content.blake2s256 not null. - Remove unused SQL functions. - README: Update necessary dependencies for test purposes - Add a pre-commit hook to check there are version bumps in sql/upgrades/*.sql - Add missing dbversion bump in 150.sql. - Add artifact metadata to the extrinsic metadata storage specification. - Add not null constraints to metadata_authority/origin_metadata - Realign schema with latest 149 migration script -- Software Heritage autobuilder (on jenkins-debian1) Thu, 28 May 2020 12:37:58 +0000 swh-storage (0.0.192-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.192 - (tagged by Valentin Lorentz on 2020-05-19 18:42:00 +0200) * Upstream changes: - v0.0.192 - * origin_metadata_add: Reject non-bytes types for 'metadata'. -- Software Heritage autobuilder (on jenkins-debian1) Tue, 19 May 2020 16:54:00 +0000 swh-storage (0.0.191-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.191 - (tagged by Valentin Lorentz on 2020-05-19 13:43:35 +0200) * Upstream changes: - v0.0.191 - * Implement the new extrinsic metadata specification/vocabulary. -- Software Heritage autobuilder (on jenkins-debian1) Tue, 19 May 2020 11:52:00 +0000 swh-storage (0.0.190-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.190 - (tagged by Antoine R. Dumont (@ardumont) on 2020-05-18 14:10:39 +0200) * Upstream changes: - v0.0.190 - storage: metadata_provider: Ensure idempotency when creating provider - journal: add a skipped_content topic dedicated to SkippedContent objects - Add missing return annotations on JournalWriter methods - Improve a bit the exception message of JournalWriter.content_update - Refactor the JournalWriter class to normalize its methods - tests: fix test_replay; do only use aware datetime objects - test_kafka_writer: Add missing object type skipped_content -- Software Heritage autobuilder (on jenkins-debian1) Mon, 18 May 2020 12:18:09 +0000 swh-storage (0.0.189-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.189 - (tagged by Antoine R. Dumont (@ardumont) on 2020-04-30 14:50:54 +0200) * Upstream changes: - v0.0.189 - pg: Write both origin visit updates & status, read from origin_visit - pg-storage: Add new created state - setup.py: add documentation link - metadata spec: Fix title hierarchy - tests: Use aware datetimes instead of naive ones. - cassandra: Adapt internal implementations to use origin visit status - in_memory: Adapt internal implementations to use origin visit status -- Software Heritage autobuilder (on jenkins-debian1) Thu, 30 Apr 2020 12:58:57 +0000 swh-storage (0.0.188-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.188 - (tagged by David Douard on 2020-04-28 13:44:20 +0200) * Upstream changes: - v0.0.188 -- Software Heritage autobuilder (on jenkins-debian1) Tue, 28 Apr 2020 11:52:08 +0000 swh-storage (0.0.187-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.187 - (tagged by Antoine R. Dumont (@ardumont) on 2020-04-14 18:13:08 +0200) * Upstream changes: - v0.0.187 - storage.interface: Actually define the remote flush operation -- Software Heritage autobuilder (on jenkins-debian1) Tue, 14 Apr 2020 16:23:41 +0000 swh-storage (0.0.186-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.186 - (tagged by Nicolas Dandrimont on 2020-04-14 17:09:22 +0200) * Upstream changes: - Release swh.storage v0.0.186 - Drop backwards-compatibility code with swh.journal < 0.0.30 -- Software Heritage autobuilder (on jenkins-debian1) Tue, 14 Apr 2020 15:20:57 +0000 swh-storage (0.0.185-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.185 - (tagged by Antoine R. Dumont (@ardumont) on 2020-04-14 14:15:32 +0200) * Upstream changes: - v0.0.185 - storage.filter: Remove internal state - test: update storage tests to (future) swh.journal 0.0.30 -- Software Heritage autobuilder (on jenkins-debian1) Tue, 14 Apr 2020 12:22:06 +0000 swh-storage (0.0.184-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.184 - (tagged by Antoine R. Dumont (@ardumont) on 2020-04-10 16:07:32 +0200) * Upstream changes: - v0.0.184 - storage*: Add flush endpoints to storage implems (backend, proxy) - test_retry: Add missing skipped_content_add tests -- Software Heritage autobuilder (on jenkins-debian1) Fri, 10 Apr 2020 14:14:20 +0000 swh-storage (0.0.183-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.183 - (tagged by Antoine R. Dumont (@ardumont) on 2020-04-09 12:35:53 +0200) * Upstream changes: - v0.0.183 - proxy storage: Add a clear_buffers endpoint - buffer proxy storage: Filter out duplicate objects prior to storage write - storage: Prevent erroneous HashCollisions by using the same ctime for all rows. - Enable black - origin_visit_update: ensure it raises a StorageArgumentException - Adapt cassandra backend to validating model types - tests: many refactoring improvements - tests: Shut down cassandra connection before closing the fixture down - Add more type annotations -- Software Heritage autobuilder (on jenkins-debian1) Thu, 09 Apr 2020 10:46:29 +0000 swh-storage (0.0.182-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.182 - (tagged by Antoine R. Dumont (@ardumont) on 2020-03-27 07:02:13 +0100) * Upstream changes: - v0.0.182 - storage*: Update origin_visit_update to make status parameter mandatory - test: Adapt origin validation test according to latest model changes - Respec discovery_date as a Python datetime instead of an ISO string. - origin_visit_add: Add missing db/cur argument to call to origin_get. -- Software Heritage autobuilder (on jenkins-debian1) Fri, 27 Mar 2020 06:13:17 +0000 swh-storage (0.0.181-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.181 - (tagged by Antoine R. Dumont (@ardumont) on 2020-03-25 09:50:49 +0100) * Upstream changes: - v0.0.181 - storage*: Hex encode content hashes in HashCollision exception - Add format of discovery_date in the metadata specification. - Store the value of token(partition_key) in skipped_content_by_* table, instead of three hashes. - Store the value of token(partition_key) in content_by_* table, instead of three hashes. -- Software Heritage autobuilder (on jenkins-debian1) Wed, 25 Mar 2020 09:03:43 +0000 swh-storage (0.0.180-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.180 - (tagged by Nicolas Dandrimont on 2020-03-18 18:24:41 +0100) * Upstream changes: - Release swh.storage v0.0.180 - Stop counting origin additions multiple times in statsd -- Software Heritage autobuilder (on jenkins-debian1) Wed, 18 Mar 2020 17:45:36 +0000 swh-storage (0.0.179-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.179 - (tagged by Nicolas Dandrimont on 2020-03-18 16:05:13 +0100) * Upstream changes: - Release swh.storage v0.0.179. - fix requirements-swh.txt to use proper version restriction - reduce the transaction load for content writes and reads -- Software Heritage autobuilder (on jenkins-debian1) Wed, 18 Mar 2020 15:50:50 +0000 swh-storage (0.0.178-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.178 - (tagged by Antoine R. Dumont (@ardumont) on 2020-03-16 12:51:28 +0100) * Upstream changes: - v0.0.178 - origin_visit_add: Adapt endpoint signature to return OriginVisit - origin_visit_upsert: Use OriginVisit object as input - storage/writer: refactor JournalWriter.content_add to send model objects -- Software Heritage autobuilder (on jenkins-debian1) Mon, 16 Mar 2020 11:59:18 +0000 swh-storage (0.0.177-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.177 - (tagged by Antoine R. Dumont (@ardumont) on 2020-03-10 11:37:33 +0100) * Upstream changes: - v0.0.177 - storage: Identify and provide the collision hashes in exception - Guarantee the order of results for revision_get and release_get - tests: Improve test speed - sql: do not attempt to create the plpgsql lang if already exists - Update requirement on swh.core for RPCClient method overrides -- Software Heritage autobuilder (on jenkins-debian1) Tue, 10 Mar 2020 10:48:11 +0000 swh-storage (0.0.176-1~swh2) unstable-swh; urgency=medium * Update build dependencies -- Antoine R. Dumont (@ardumont) Mon, 02 Mar 2020 14:36:00 +0100 swh-storage (0.0.176-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.176 - (tagged by Valentin Lorentz on 2020-02-28 14:44:10 +0100) * Upstream changes: - v0.0.176 - * Accept cassandra-driver >= 3.22. - * Make the RPC client and objstorage helper fetch Content.data from lazy - contents. - * Move ctime out of the validation proxy. -- Software Heritage autobuilder (on jenkins-debian1) Fri, 28 Feb 2020 15:21:27 +0000 swh-storage (0.0.175-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.175 - (tagged by Antoine Lambert on 2020-02-20 13:51:40 +0100) * Upstream changes: - version 0.0.175 -- Software Heritage autobuilder (on jenkins-debian1) Thu, 20 Feb 2020 13:18:34 +0000 swh-storage (0.0.174-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.174 - (tagged by Valentin Lorentz on 2020-02-19 14:18:59 +0100) * Upstream changes: - v0.0.174 - * Fix inconsistent behavior of skipped_content_missing across backends. - * Fix FilteringProxy to not drop skipped-contents with a missing sha1_git. - * Make storage proxies use swh-model objects instead of dicts. - * Add support for (de)serializing swh-model in RPC calls. -- Software Heritage autobuilder (on jenkins-debian1) Wed, 19 Feb 2020 15:00:32 +0000 swh-storage (0.0.172-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.172 - (tagged by Valentin Lorentz on 2020-02-12 14:00:04 +0100) * Upstream changes: - v0.0.172 - * Unify exception raised by invalid input to API endpoints. - * Add a validation proxy for _add() methods. This proxy is *required* - in front of all backends whose _add() methods may be called or they'll - crash at runtime. - * Fix RecursionError when storage proxies are deepcopied or unpickled. - * storages: Refactor objstorage operations with a dedicated collaborator - * storages: Refactor journal operations with a dedicated writer collab -- Software Heritage autobuilder (on jenkins-debian1) Wed, 12 Feb 2020 13:13:47 +0000 swh-storage (0.0.171-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.171 - (tagged by Valentin Lorentz on 2020-02-06 14:46:05 +0100) * Upstream changes: - v0.0.171 - * Split 'content_add' method into 'content_add' and 'skipped_content_add'. - * Increase Cassandra requests timeout to 1 second. -- Software Heritage autobuilder (on jenkins-debian1) Thu, 06 Feb 2020 14:07:37 +0000 swh-storage (0.0.170-1~swh3) unstable-swh; urgency=medium * Update build dependencies -- Antoine R. Dumont (@ardumont) Mon, 03 Feb 2020 17:30:38 +0100 swh-storage (0.0.170-1~swh2) unstable-swh; urgency=medium * Update build dependencies -- Antoine R. Dumont (@ardumont) Mon, 03 Feb 2020 16:00:39 +0100 swh-storage (0.0.170-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.170 - (tagged by Antoine R. Dumont (@ardumont) on 2020-02-03 14:11:53 +0100) * Upstream changes: - v0.0.170 - swh.storage.cassandra: Add Cassandra backend implementation -- Software Heritage autobuilder (on jenkins-debian1) Mon, 03 Feb 2020 13:23:48 +0000 swh-storage (0.0.169-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.169 - (tagged by Antoine R. Dumont (@ardumont) on 2020-01-30 13:40:00 +0100) * Upstream changes: - v0.0.169 - retry: Add retry behavior on pipeline storage with flushing failure -- Software Heritage autobuilder (on jenkins-debian1) Thu, 30 Jan 2020 13:26:23 +0000 swh-storage (0.0.168-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.168 - (tagged by Valentin Lorentz on 2020-01-30 11:19:31 +0100) * Upstream changes: - v0.0.168 - * Implement content_update for the in-mem storage. - * Remove cur/db arguments from the in- mem storage. - * Move Storage documentation and endpoint paths to a new StorageInterface class - * Rename in_memory.Storage to in_memory.InMemoryStorage. - * CONTRIBUTORS: add Daniele Serafini -- Software Heritage autobuilder (on jenkins-debian1) Thu, 30 Jan 2020 10:25:30 +0000 swh-storage (0.0.167-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.167 - (tagged by Antoine R. Dumont (@ardumont) on 2020-01-24 14:55:57 +0100) * Upstream changes: - v0.0.167 - pgstorage: Empty temp tables instead of dropping them -- Software Heritage autobuilder (on jenkins-debian1) Fri, 24 Jan 2020 14:01:57 +0000 swh-storage (0.0.166-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.166 - (tagged by Antoine R. Dumont (@ardumont) on 2020-01-24 09:51:52 +0100) * Upstream changes: - v0.0.166 - storage: Add endpoint to get missing content (by sha1_git) and missing snapshot - Remove redundant config checks in load_and_check_config - Remove 'id' and 'object_id' from the output of object_find_by_sha1_git - Make origin_visit_get_random return None instead of {} if there are no results - docs: Fix sphinx warnings -- Software Heritage autobuilder (on jenkins-debian1) Fri, 24 Jan 2020 09:00:12 +0000 swh-storage (0.0.165-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.165 - (tagged by Antoine R. Dumont (@ardumont) on 2020-01-17 14:04:53 +0100) * Upstream changes: - v0.0.165 - storage.retry: Fix objects loading when using generator parameters -- Software Heritage autobuilder (on jenkins-debian1) Fri, 17 Jan 2020 13:09:39 +0000 swh-storage (0.0.164-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.164 - (tagged by Antoine Lambert on 2020-01-16 17:54:40 +0100) * Upstream changes: - version 0.0.164 -- Software Heritage autobuilder (on jenkins-debian1) Thu, 16 Jan 2020 17:05:02 +0000 swh-storage (0.0.163-1~swh2) unstable-swh; urgency=medium * Fix test dependency -- Antoine R. Dumont (@ardumont) Tue, 14 Jan 2020 17:26:08 +0100 swh-storage (0.0.163-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.163 - (tagged by Antoine R. Dumont (@ardumont) on 2020-01-14 17:12:03 +0100) * Upstream changes: - v0.0.163 - retry: Improve proxy storage for add endpoints - in_memory: Make directory_get_random return None when storage empty - storage: Change content_get_metadata api to return Dict[bytes, List[Dict]] - storage: Add content_get_partition endpoint to replace content_get_range - storage: Add endpoint origin_list to replace origin_get_range -- Software Heritage autobuilder (on jenkins-debian1) Tue, 14 Jan 2020 16:17:45 +0000 swh-storage (0.0.162-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.162 - (tagged by Valentin Lorentz on 2019-12-16 14:37:44 +0100) * Upstream changes: - v0.0.162 - Add {content,directory,revision,release,snapshot}_get_random. -- Software Heritage autobuilder (on jenkins-debian1) Mon, 16 Dec 2019 13:41:39 +0000 swh-storage (0.0.161-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.161 - (tagged by Antoine R. Dumont (@ardumont) on 2019-12-10 15:03:28 +0100) * Upstream changes: - v0.0.161 - storage: Add endpoint to randomly pick an origin -- Software Heritage autobuilder (on jenkins-debian1) Tue, 10 Dec 2019 14:08:15 +0000 swh-storage (0.0.160-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.160 - (tagged by Antoine R. Dumont (@ardumont) on 2019-12-06 11:15:48 +0100) * Upstream changes: - v0.0.160 - storage.buffer: Buffer release objects as well - storage.tests: Unify tests sample data - Implement origin lookup by sha1 -- Software Heritage autobuilder (on jenkins-debian1) Fri, 06 Dec 2019 10:23:44 +0000 swh-storage (0.0.159-1~swh2) unstable-swh; urgency=medium * Force fast hypothesis profile when running tests -- Antoine R. Dumont (@ardumont) Tue, 26 Nov 2019 17:08:16 +0100 swh-storage (0.0.159-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.159 - (tagged by Antoine R. Dumont (@ardumont) on 2019-11-22 11:05:41 +0100) * Upstream changes: - v0.0.159 - Add 'pipeline' storage "class" for more readable configurations. - tests: Improve tests environments configuration - Fix a few typos reported by codespell - Add a pre-commit-hooks.yaml config file - Remove utils/(dump|fix)_revisions scripts -- Software Heritage autobuilder (on jenkins-debian1) Fri, 22 Nov 2019 10:10:31 +0000 swh-storage (0.0.158-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.158 - (tagged by Antoine R. Dumont (@ardumont) on 2019-11-14 13:33:00 +0100) * Upstream changes: - v0.0.158 - Drop schemata module (migrated back to swh-lister) -- Software Heritage autobuilder (on jenkins-debian1) Thu, 14 Nov 2019 12:37:18 +0000 swh-storage (0.0.157-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.157 - (tagged by Nicolas Dandrimont on 2019-11-13 13:22:39 +0100) * Upstream changes: - Release swh.storage 0.0.157 - schemata.distribution: Fix bogus NotImplementedError on Area.index_uris -- Software Heritage autobuilder (on jenkins-debian1) Wed, 13 Nov 2019 12:27:07 +0000 swh-storage (0.0.156-1~swh2) unstable-swh; urgency=medium * Add version constraint on psycopg2 -- Nicolas Dandrimont Wed, 30 Oct 2019 18:21:34 +0100 swh-storage (0.0.156-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.156 - (tagged by Valentin Lorentz on 2019-10-30 15:12:10 +0100) * Upstream changes: - v0.0.156 - * Stop supporting origin ids in API (except in origin_get_range). - * Make visit['origin'] a string everywhere (instead of a dict). -- Software Heritage autobuilder (on jenkins-debian1) Wed, 30 Oct 2019 14:29:28 +0000 swh-storage (0.0.155-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.155 - (tagged by David Douard on 2019-10-30 12:14:14 +0100) * Upstream changes: - v0.0.155 -- Software Heritage autobuilder (on jenkins-debian1) Wed, 30 Oct 2019 11:18:37 +0000 swh-storage (0.0.154-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.154 - (tagged by Antoine R. Dumont (@ardumont) on 2019-10-17 13:47:57 +0200) * Upstream changes: - v0.0.154 - Fix tests in debian build -- Software Heritage autobuilder (on jenkins-debian1) Thu, 17 Oct 2019 11:52:46 +0000 swh-storage (0.0.153-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.153 - (tagged by Antoine R. Dumont (@ardumont) on 2019-10-17 13:21:00 +0200) * Upstream changes: - v0.0.153 - Deploy new test fixture -- Software Heritage autobuilder (on jenkins-debian1) Thu, 17 Oct 2019 11:26:12 +0000 swh-storage (0.0.152-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.152 - (tagged by Antoine R. Dumont (@ardumont) on 2019-10-08 16:55:43 +0200) * Upstream changes: - v0.0.152 - swh.storage.buffer: Add buffering proxy storage implementation - swh.storage.filter: Add filtering storage implementation - swh.storage.tests: Improve db transaction handling - swh.storage.tests: Add more tests - swh.storage.storage: introduce a db() context manager -- Software Heritage autobuilder (on jenkins-debian1) Tue, 08 Oct 2019 15:03:16 +0000 swh-storage (0.0.151-1~swh2) unstable-swh; urgency=medium * Add missing build-dependency on python3-swh.journal -- Nicolas Dandrimont Tue, 01 Oct 2019 18:28:19 +0200 swh-storage (0.0.151-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.151 - (tagged by Stefano Zacchiroli on 2019-10-01 10:04:36 +0200) * Upstream changes: - v0.0.151 - * tox: anticipate mypy run to just after flake8 - * mypy.ini: be less flaky w.r.t. the packages installed in tox - * storage.py: ignore typing of optional get_journal_writer import - * mypy: ignore swh.journal to work-around dependency loop - * init.py: switch to documented way of extending path - * typing: minimal changes to make a no- op mypy run pass - * Write objects to the journal only if they don't exist yet. - * Use origin URLs for skipped_content['origin'] instead of origin ids. - * Properly mock get_journal_writer for the remote-pg-storage tests. - * journal_writer: use journal writer from swh.journal - * fix typos in docstrings and sample paths - * storage.origin_visit_add: Remove deprecated 'ts' parameter - * click "required" param wants bool, not int -- Software Heritage autobuilder (on jenkins-debian1) Tue, 01 Oct 2019 08:09:53 +0000 swh-storage (0.0.150-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.150 - (tagged by Antoine R. Dumont (@ardumont) on 2019-09-04 16:09:59 +0200) * Upstream changes: - v0.0.150 - tests/test_storage: Remove failing assertion after swh-model update - tests/test_storage: Fix tests execution with psycopg2 < 2.8 -- Software Heritage autobuilder (on jenkins-debian1) Wed, 04 Sep 2019 14:16:09 +0000 swh-storage (0.0.149-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.149 - (tagged by Antoine R. Dumont (@ardumont) on 2019-09-03 14:00:57 +0200) * Upstream changes: - v0.0.149 - Add support for origin_url in origin_metadata_* - Make origin_add/origin_visit_update validate their input - Make snapshot_add validate its input - Make revision_add and release_add validate their input - Make directory_add validate its input - Make content_add validate its input using swh-model -- Software Heritage autobuilder (on jenkins-debian1) Tue, 03 Sep 2019 12:27:51 +0000 swh-storage (0.0.148-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.148 - (tagged by Valentin Lorentz on 2019-08-23 10:33:02 +0200) * Upstream changes: - v0.0.148 - Tests improvements: - * Remove 'next_branch' from test input data. - * Fix off-by-one error when using origin_visit_upsert on with an unknown visit id. - * Use explicit arguments for origin_visit_add. - * Remove test_content_missing__marked_missing, it makes no sense. - Drop person ids: - * Stop leaking person ids. - * Remove person_get endpoint. - Logging fixes: - * Enforce log level for the werkzeug logger. - * Eliminate warnings about %TYPE. - * api: use RPCServerApp and RPCClient instead of deprecated classes - Other: - * Add support for skipped content in in- memory storage -- Software Heritage autobuilder (on jenkins-debian1) Fri, 23 Aug 2019 08:48:21 +0000 swh-storage (0.0.147-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.147 - (tagged by Valentin Lorentz on 2019-07-18 12:11:37 +0200) * Upstream changes: - Make origin_get ignore the `type` argument -- Software Heritage autobuilder (on jenkins-debian1) Thu, 18 Jul 2019 10:16:16 +0000 swh-storage (0.0.146-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.146 - (tagged by Valentin Lorentz on 2019-07-18 10:46:21 +0200) * Upstream changes: - Progress toward getting rid of origin ids - * Less dependency on origin ids in the in-mem storage - * add the SWH_STORAGE_IN_MEMORY_ENABLE_ORIGIN_IDS env var - * Remove legacy behavior of snapshot_add -- Software Heritage autobuilder (on jenkins-debian1) Thu, 18 Jul 2019 08:52:09 +0000 swh-storage (0.0.145-1~swh3) unstable-swh; urgency=medium * Properly rebuild for unstable-swh -- Nicolas Dandrimont Thu, 11 Jul 2019 14:03:30 +0200 swh-storage (0.0.145-1~swh2) buster-swh; urgency=medium * Remove useless swh.scheduler dependency -- Nicolas Dandrimont Thu, 11 Jul 2019 13:53:45 +0200 swh-storage (0.0.145-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.145 - (tagged by Valentin Lorentz on 2019-07-02 12:00:53 +0200) * Upstream changes: - v0.0.145 - Add an 'origin_visit_find_by_date' endpoint. - Add support for origin urls in all endpoints -- Software Heritage autobuilder (on jenkins-debian1) Tue, 02 Jul 2019 10:19:19 +0000 swh-storage (0.0.143-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.143 - (tagged by Valentin Lorentz on 2019-06-05 13:18:14 +0200) * Upstream changes: - Add test for snapshot/release counters. -- Software Heritage autobuilder (on jenkins-debian1) Mon, 01 Jul 2019 12:38:40 +0000 swh-storage (0.0.142-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.142 - (tagged by Valentin Lorentz on 2019-06-11 15:24:49 +0200) * Upstream changes: - Mark network tests, so they can be disabled. -- Software Heritage autobuilder (on jenkins-debian1) Tue, 11 Jun 2019 13:44:19 +0000 swh-storage (0.0.141-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.141 - (tagged by Valentin Lorentz on 2019-06-06 17:05:03 +0200) * Upstream changes: - Add support for using URL instead of ID in snapshot_get_latest. -- Software Heritage autobuilder (on jenkins-debian1) Tue, 11 Jun 2019 10:36:32 +0000 swh-storage (0.0.140-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.140 - (tagged by mihir(faux__) on 2019-03-24 21:47:31 +0530) * Upstream changes: - Changes the output of content_find method to a list in case of hash collisions and makes the sql query on python side and added test duplicate input, colliding sha256 and colliding blake2s256 -- Software Heritage autobuilder (on jenkins-debian1) Thu, 16 May 2019 12:09:04 +0000 swh-storage (0.0.139-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.139 - (tagged by Nicolas Dandrimont on 2019-04-18 17:57:57 +0200) * Upstream changes: - Release swh.storage v0.0.139 - Backwards- compatibility improvements for snapshot_add - Better transactionality in revision_add/release_add - Fix backwards metric names - Handle shallow histories properly -- Software Heritage autobuilder (on jenkins-debian1) Thu, 18 Apr 2019 16:08:28 +0000 swh-storage (0.0.138-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.138 - (tagged by Valentin Lorentz on 2019-04-09 16:40:49 +0200) * Upstream changes: - Use the db_transaction decorator on all _add() methods. - So they gracefully release the connection on error instead - of relying on reference-counting to call the Db's `__del__` - (which does not happen in Hypothesis tests) because a ref - to it is kept via the traceback object. -- Software Heritage autobuilder (on jenkins-debian1) Tue, 09 Apr 2019 16:50:48 +0000 swh-storage (0.0.137-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.137 - (tagged by Valentin Lorentz on 2019-04-08 15:40:24 +0200) * Upstream changes: - Make test_origin_get_range run faster. -- Software Heritage autobuilder (on jenkins-debian1) Mon, 08 Apr 2019 13:56:16 +0000 swh-storage (0.0.135-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.135 - (tagged by Valentin Lorentz on 2019-04-04 20:42:32 +0200) * Upstream changes: - Make content_add_metadata require a ctime argument. - This makes Python set the ctime instead of pgsql. -- Software Heritage autobuilder (on jenkins-debian1) Fri, 05 Apr 2019 14:43:28 +0000 swh-storage (0.0.134-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.134 - (tagged by Valentin Lorentz on 2019-04-03 13:38:58 +0200) * Upstream changes: - Don't leak origin ids to the journal. -- Software Heritage autobuilder (on jenkins-debian1) Thu, 04 Apr 2019 10:16:09 +0000 swh-storage (0.0.132-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.132 - (tagged by Valentin Lorentz on 2019-04-01 11:50:30 +0200) * Upstream changes: - Use sha1 instead of bigint as FK from origin_visit to snapshot (part 1: add new column) -- Software Heritage autobuilder (on jenkins-debian1) Mon, 01 Apr 2019 13:30:48 +0000 swh-storage (0.0.131-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.131 - (tagged by Nicolas Dandrimont on 2019-03-28 17:24:44 +0100) * Upstream changes: - Release swh.storage v0.0.131 - Add statsd metrics to storage RPC backend - Clean up snapshot_add/origin_visit_update - Uniformize RPC backend to use POSTs everywhere -- Software Heritage autobuilder (on jenkins-debian1) Thu, 28 Mar 2019 16:34:07 +0000 swh-storage (0.0.130-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.130 - (tagged by Valentin Lorentz on 2019-02-26 10:50:44 +0100) * Upstream changes: - Add an helper function to list all origins in the storage. -- Software Heritage autobuilder (on jenkins-debian1) Wed, 13 Mar 2019 14:01:04 +0000 swh-storage (0.0.129-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.129 - (tagged by Valentin Lorentz on 2019-02-27 10:42:29 +0100) * Upstream changes: - Double the timeout of revision_get. - Metadata indexers often hit the limit. -- Software Heritage autobuilder (on jenkins-debian1) Fri, 01 Mar 2019 10:11:28 +0000 swh-storage (0.0.128-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.128 - (tagged by Antoine R. Dumont (@ardumont) on 2019-02-21 14:59:22 +0100) * Upstream changes: - v0.0.128 - api.server: Fix wrong exception type - storage.cli: Fix cli entry point name to the expected name (setup.py) -- Software Heritage autobuilder (on jenkins-debian1) Thu, 21 Feb 2019 14:07:23 +0000 swh-storage (0.0.127-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.127 - (tagged by Antoine R. Dumont (@ardumont) on 2019-02-21 13:34:19 +0100) * Upstream changes: - v0.0.127 - api.wsgi: Open wsgi entrypoint and check config at startup time - api.server: Make the api server load and check its configuration - swh.storage.cli: Migrate the api server startup in swh.storage.cli -- Software Heritage autobuilder (on jenkins-debian1) Thu, 21 Feb 2019 12:59:48 +0000 swh-storage (0.0.126-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.126 - (tagged by Valentin Lorentz on 2019-02-21 10:18:26 +0100) * Upstream changes: - Double the timeout of snapshot_get_latest. - Metadata indexers often hit the limit. -- Software Heritage autobuilder (on jenkins-debian1) Thu, 21 Feb 2019 11:24:52 +0000 swh-storage (0.0.125-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.125 - (tagged by Antoine R. Dumont (@ardumont) on 2019-02-14 10:13:31 +0100) * Upstream changes: - v0.0.125 - api/server: Do not read configuration at each request -- Software Heritage autobuilder (on jenkins-debian1) Thu, 14 Feb 2019 16:57:01 +0000 swh-storage (0.0.124-1~swh3) unstable-swh; urgency=low * New upstream release, fixing the distribution this time -- Antoine R. Dumont (@ardumont) Thu, 14 Feb 2019 17:51:29 +0100 swh-storage (0.0.124-1~swh2) unstable; urgency=medium * New upstream release for dependency fix reasons -- Antoine R. Dumont (@ardumont) Thu, 14 Feb 2019 09:27:55 +0100 swh-storage (0.0.124-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.124 - (tagged by Antoine Lambert on 2019-02-12 14:40:53 +0100) * Upstream changes: - version 0.0.124 -- Software Heritage autobuilder (on jenkins-debian1) Tue, 12 Feb 2019 13:46:08 +0000 swh-storage (0.0.123-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.123 - (tagged by Antoine R. Dumont (@ardumont) on 2019-02-08 15:06:49 +0100) * Upstream changes: - v0.0.123 - Make Storage.origin_get support a list of origins, like other - Storage.*_get methods. - Stop using _to_bytes functions. - Use the BaseDb (and friends) from swh-core -- Software Heritage autobuilder (on jenkins-debian1) Fri, 08 Feb 2019 14:14:18 +0000 swh-storage (0.0.122-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.122 - (tagged by Antoine Lambert on 2019-01-28 11:57:27 +0100) * Upstream changes: - version 0.0.122 -- Software Heritage autobuilder (on jenkins-debian1) Mon, 28 Jan 2019 11:02:45 +0000 swh-storage (0.0.121-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.121 - (tagged by Antoine Lambert on 2019-01-28 11:31:48 +0100) * Upstream changes: - version 0.0.121 -- Software Heritage autobuilder (on jenkins-debian1) Mon, 28 Jan 2019 10:36:40 +0000 swh-storage (0.0.120-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.120 - (tagged by Antoine Lambert on 2019-01-17 12:04:27 +0100) * Upstream changes: - version 0.0.120 -- Software Heritage autobuilder (on jenkins-debian1) Thu, 17 Jan 2019 11:12:47 +0000 swh-storage (0.0.119-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.119 - (tagged by Antoine R. Dumont (@ardumont) on 2019-01-11 11:57:13 +0100) * Upstream changes: - v0.0.119 - listener: Notify Kafka when an origin visit is updated -- Software Heritage autobuilder (on jenkins-debian1) Fri, 11 Jan 2019 11:02:07 +0000 swh-storage (0.0.118-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.118 - (tagged by Antoine Lambert on 2019-01-09 16:59:15 +0100) * Upstream changes: - version 0.0.118 -- Software Heritage autobuilder (on jenkins-debian1) Wed, 09 Jan 2019 18:51:34 +0000 swh-storage (0.0.117-1~swh1) unstable-swh; urgency=medium * v0.0.117 * listener: Adapt decoding behavior depending on the object type -- Antoine R. Dumont (@ardumont) Thu, 20 Dec 2018 14:48:44 +0100 swh-storage (0.0.116-1~swh1) unstable-swh; urgency=medium * v0.0.116 * Update requirements to latest swh.core -- Antoine R. Dumont (@ardumont) Fri, 14 Dec 2018 15:57:04 +0100 swh-storage (0.0.115-1~swh1) unstable-swh; urgency=medium * version 0.0.115 -- Antoine Lambert Fri, 14 Dec 2018 15:47:52 +0100 swh-storage (0.0.114-1~swh1) unstable-swh; urgency=medium * version 0.0.114 -- Antoine Lambert Wed, 05 Dec 2018 10:59:49 +0100 swh-storage (0.0.113-1~swh1) unstable-swh; urgency=medium * v0.0.113 * in-memory storage: Add recursive argument to directory_ls endpoint -- Antoine R. Dumont (@ardumont) Fri, 30 Nov 2018 11:56:44 +0100 swh-storage (0.0.112-1~swh1) unstable-swh; urgency=medium * v0.0.112 * in-memory storage: Align with existing storage * docstring: Improvements and adapt according to api * doc: update index to match new swh-doc format * Increase test coverage for stat_counters + fix its bugs. -- Antoine R. Dumont (@ardumont) Fri, 30 Nov 2018 10:28:02 +0100 swh-storage (0.0.111-1~swh1) unstable-swh; urgency=medium * v0.0.111 * Move generative tests in their own module * Open in-memory storage implementation -- Antoine R. Dumont (@ardumont) Wed, 21 Nov 2018 08:55:14 +0100 swh-storage (0.0.110-1~swh1) unstable-swh; urgency=medium * v0.0.110 * storage: Open content_get_range endpoint * tests: Start using hypothesis for tests generation * Improvements: Remove SQLisms from the tests and API * docs: Document metadata providers -- Antoine R. Dumont (@ardumont) Fri, 16 Nov 2018 11:53:14 +0100 swh-storage (0.0.109-1~swh1) unstable-swh; urgency=medium * version 0.0.109 -- Antoine Lambert Mon, 12 Nov 2018 14:11:09 +0100 swh-storage (0.0.108-1~swh1) unstable-swh; urgency=medium * Release swh.storage v0.0.108 * Add a function to get a full snapshot from the paginated view -- Nicolas Dandrimont Thu, 18 Oct 2018 18:32:10 +0200 swh-storage (0.0.107-1~swh1) unstable-swh; urgency=medium * Release swh.storage v0.0.107 * Enable pagination of snapshot branches * Drop occurrence-related tables * Drop entity-related tables -- Nicolas Dandrimont Wed, 17 Oct 2018 15:06:07 +0200 swh-storage (0.0.106-1~swh1) unstable-swh; urgency=medium * Release swh.storage v0.0.106 * Fix origin_visit_get_latest_snapshot logic * Improve directory iterator * Drop backwards compatibility between snapshots and occurrences * Drop the occurrence table -- Nicolas Dandrimont Mon, 08 Oct 2018 17:03:54 +0200 swh-storage (0.0.105-1~swh1) unstable-swh; urgency=medium * v0.0.105 * Increase directory_ls endpoint to 20 seconds * Add snapshot to the stats endpoint * Improve documentation -- Antoine R. Dumont (@ardumont) Mon, 10 Sep 2018 11:36:27 +0200 swh-storage (0.0.104-1~swh1) unstable-swh; urgency=medium * version 0.0.104 -- Antoine Lambert Wed, 29 Aug 2018 15:55:37 +0200 swh-storage (0.0.103-1~swh1) unstable-swh; urgency=medium * v0.0.103 * swh.storage.storage: origin_add returns updated list of dict with id -- Antoine R. Dumont (@ardumont) Mon, 30 Jul 2018 11:47:53 +0200 swh-storage (0.0.102-1~swh1) unstable-swh; urgency=medium * Release swh-storage v0.0.102 * Stop using temporary tables for read-only queries * Add timeouts for some read-only queries -- Nicolas Dandrimont Tue, 05 Jun 2018 14:06:54 +0200 swh-storage (0.0.101-1~swh1) unstable-swh; urgency=medium * v0.0.101 * swh.storage.api.client: Permit to specify the query timeout option -- Antoine R. Dumont (@ardumont) Thu, 24 May 2018 12:13:51 +0200 swh-storage (0.0.100-1~swh1) unstable-swh; urgency=medium * Release swh.storage v0.0.100 * remote api: only instantiate storage once per import * add thread-awareness to the storage implementation * properly cleanup after tests * parallelize objstorage and storage additions -- Nicolas Dandrimont Sat, 12 May 2018 18:12:40 +0200 swh-storage (0.0.99-1~swh1) unstable-swh; urgency=medium * v0.0.99 * storage: Add methods to compute directories/revisions diff * Add a new table for "bucketed" object counts * doc: update table clusters in SQL diagram * swh.storage.content_missing: Improve docstring -- Antoine R. Dumont (@ardumont) Tue, 20 Feb 2018 13:32:25 +0100 swh-storage (0.0.98-1~swh1) unstable-swh; urgency=medium * Release swh.storage v0.0.98 * Switch backwards compatibility for snapshots off -- Nicolas Dandrimont Tue, 06 Feb 2018 15:27:15 +0100 swh-storage (0.0.97-1~swh1) unstable-swh; urgency=medium * Release swh.storage v0.0.97 * refactor database initialization * use a separate thread instead of a temporary file for COPY operations * add more snapshot-related endpoints -- Nicolas Dandrimont Tue, 06 Feb 2018 14:07:07 +0100 swh-storage (0.0.96-1~swh1) unstable-swh; urgency=medium * Release swh.storage v0.0.96 * Add snapshot models * Add support for hg revision type -- Nicolas Dandrimont Tue, 19 Dec 2017 16:25:57 +0100 swh-storage (0.0.95-1~swh1) unstable-swh; urgency=medium * v0.0.95 * swh.storage: Rename indexer_configuration to tool * swh.storage: Migrate indexer model to its own model -- Antoine R. Dumont (@ardumont) Thu, 07 Dec 2017 09:56:31 +0100 swh-storage (0.0.94-1~swh1) unstable-swh; urgency=medium * v0.0.94 * Open searching origins methods to storage -- Antoine R. Dumont (@ardumont) Tue, 05 Dec 2017 12:32:57 +0100 swh-storage (0.0.93-1~swh1) unstable-swh; urgency=medium * v0.0.93 * swh.storage: Open indexer_configuration_add endpoint * swh-data: Update content mimetype indexer configuration * origin_visit_get: make order repeatable * db: Make unique indices actually unique and vice versa * Add origin_metadata endpoints (add, get, etc...) * cleanup: Remove unused content provenance cache tables -- Antoine R. Dumont (@ardumont) Fri, 24 Nov 2017 11:14:11 +0100 swh-storage (0.0.92-1~swh1) unstable-swh; urgency=medium * Release swh.storage v0.0.92 * make swh.storage.schemata work on SQLAlchemy 1.0 -- Nicolas Dandrimont Thu, 12 Oct 2017 19:51:24 +0200 swh-storage (0.0.91-1~swh1) unstable-swh; urgency=medium * Release swh.storage version 0.0.91 * Update packaging runes -- Nicolas Dandrimont Thu, 12 Oct 2017 18:41:46 +0200 swh-storage (0.0.90-1~swh1) unstable-swh; urgency=medium * Release swh.storage v0.0.90 * Remove leaky dependency on python3-kafka -- Nicolas Dandrimont Wed, 11 Oct 2017 18:53:22 +0200 swh-storage (0.0.89-1~swh1) unstable-swh; urgency=medium * Release swh.storage v0.0.89 * Add new package for ancillary schemata * Add new metadata-related entry points * Update for new swh.model -- Nicolas Dandrimont Wed, 11 Oct 2017 17:39:29 +0200 swh-storage (0.0.88-1~swh1) unstable-swh; urgency=medium * Release swh.storage v0.0.88 * Move the archiver to its own module * Prepare building for stretch -- Nicolas Dandrimont Fri, 30 Jun 2017 14:52:12 +0200 swh-storage (0.0.87-1~swh1) unstable-swh; urgency=medium * Release swh.storage v0.0.87 * update tasks to new swh.scheduler api -- Nicolas Dandrimont Mon, 12 Jun 2017 17:54:11 +0200 swh-storage (0.0.86-1~swh1) unstable-swh; urgency=medium * Release swh.storage v0.0.86 * archiver updates -- Nicolas Dandrimont Tue, 06 Jun 2017 18:43:43 +0200 swh-storage (0.0.85-1~swh1) unstable-swh; urgency=medium * v0.0.85 * Improve license endpoint's unknown license policy -- Antoine R. Dumont (@ardumont) Tue, 06 Jun 2017 17:55:40 +0200 swh-storage (0.0.84-1~swh1) unstable-swh; urgency=medium * v0.0.84 * Update indexer endpoints to use indexer configuration id * Add indexer configuration endpoint -- Antoine R. Dumont (@ardumont) Fri, 02 Jun 2017 16:16:47 +0200 swh-storage (0.0.83-1~swh1) unstable-swh; urgency=medium * v0.0.83 * Add blake2s256 new hash computation on content -- Antoine R. Dumont (@ardumont) Fri, 31 Mar 2017 12:27:09 +0200 swh-storage (0.0.82-1~swh1) unstable-swh; urgency=medium * v0.0.82 * swh.storage.listener: Subscribe to new origin notifications * sql/swh-func: improve equality check on the three columns for swh_content_missing * swh.storage: add length to directory listing primitives * refactoring: Migrate from swh.core.hashutil to swh.model.hashutil * swh.storage.archiver.updater: Create a content updater journal client * vault: add a git fast-import cooker * vault: generic cache to allow multiple cooker types and formats -- Antoine R. Dumont (@ardumont) Tue, 21 Mar 2017 14:50:16 +0100 swh-storage (0.0.81-1~swh1) unstable-swh; urgency=medium * Release swh.storage v0.0.81 * archiver improvements for mass injection in azure -- Nicolas Dandrimont Thu, 09 Mar 2017 11:15:28 +0100 swh-storage (0.0.80-1~swh1) unstable-swh; urgency=medium * Release swh.storage v0.0.80 * archiver improvements related to the mass injection of contents in azure * updates to the vault cooker -- Nicolas Dandrimont Tue, 07 Mar 2017 15:12:35 +0100 swh-storage (0.0.79-1~swh1) unstable-swh; urgency=medium * Release swh.storage v0.0.79 * archiver: keep counts of objects in each archive * converters: normalize timestamps using swh.model -- Nicolas Dandrimont Tue, 14 Feb 2017 19:37:36 +0100 swh-storage (0.0.78-1~swh1) unstable-swh; urgency=medium * v0.0.78 * Refactoring some common code into swh.core + adaptation api calls in * swh.objstorage and swh.storage (storage and vault) -- Antoine R. Dumont (@ardumont) Thu, 26 Jan 2017 15:08:03 +0100 swh-storage (0.0.77-1~swh1) unstable-swh; urgency=medium * v0.0.77 * Paginate results for origin_visits endpoint -- Antoine R. Dumont (@ardumont) Thu, 19 Jan 2017 14:41:49 +0100 swh-storage (0.0.76-1~swh1) unstable-swh; urgency=medium * v0.0.76 * Unify storage and objstorage configuration and instantiation functions -- Antoine R. Dumont (@ardumont) Thu, 15 Dec 2016 18:25:58 +0100 swh-storage (0.0.75-1~swh1) unstable-swh; urgency=medium * v0.0.75 * Add information on indexer tools (T610) -- Antoine R. Dumont (@ardumont) Fri, 02 Dec 2016 18:21:36 +0100 swh-storage (0.0.74-1~swh1) unstable-swh; urgency=medium * v0.0.74 * Use strict equality for content ctags' symbols search -- Antoine R. Dumont (@ardumont) Tue, 29 Nov 2016 17:25:29 +0100 swh-storage (0.0.73-1~swh1) unstable-swh; urgency=medium * v0.0.73 * Improve ctags search query for edge cases -- Antoine R. Dumont (@ardumont) Mon, 28 Nov 2016 16:34:55 +0100 swh-storage (0.0.72-1~swh1) unstable-swh; urgency=medium * v0.0.72 * Permit pagination on content_ctags_search api endpoint -- Antoine R. Dumont (@ardumont) Thu, 24 Nov 2016 14:19:29 +0100 swh-storage (0.0.71-1~swh1) unstable-swh; urgency=medium * v0.0.71 * Open full-text search endpoint on ctags -- Antoine R. Dumont (@ardumont) Wed, 23 Nov 2016 17:33:51 +0100 swh-storage (0.0.70-1~swh1) unstable-swh; urgency=medium * v0.0.70 * Add new license endpoints (add/get) * Update ctags endpoints to align update conflict policy -- Antoine R. Dumont (@ardumont) Thu, 10 Nov 2016 17:27:49 +0100 swh-storage (0.0.69-1~swh1) unstable-swh; urgency=medium * v0.0.69 * storage: Open ctags entry points (missing, add, get) * storage: allow adding several origins at once -- Antoine R. Dumont (@ardumont) Thu, 20 Oct 2016 16:07:07 +0200 swh-storage (0.0.68-1~swh1) unstable-swh; urgency=medium * v0.0.68 * indexer: Open mimetype/language get endpoints * indexer: Add the mimetype/language add function with conflict_update flag * archiver: Extend worker-to-backend to transmit messages to another * queue (once done) -- Antoine R. Dumont (@ardumont) Thu, 13 Oct 2016 15:30:21 +0200 swh-storage (0.0.67-1~swh1) unstable-swh; urgency=medium * v0.0.67 * Fix provenance storage init function -- Antoine R. Dumont (@ardumont) Wed, 12 Oct 2016 02:24:12 +0200 swh-storage (0.0.66-1~swh1) unstable-swh; urgency=medium * v0.0.66 * Improve provenance configuration format -- Antoine R. Dumont (@ardumont) Wed, 12 Oct 2016 01:39:26 +0200 swh-storage (0.0.65-1~swh1) unstable-swh; urgency=medium * v0.0.65 * Open api entry points for swh.indexer about content mimetype and * language * Update schema graph to latest version -- Antoine R. Dumont (@ardumont) Sat, 08 Oct 2016 10:00:30 +0200 swh-storage (0.0.64-1~swh1) unstable-swh; urgency=medium * v0.0.64 * Fix: Missing incremented version 5 for archiver.dbversion * Retrieve information on a content cached * sql/swh-func: content cache populates lines in deterministic order -- Antoine R. Dumont (@ardumont) Thu, 29 Sep 2016 21:50:59 +0200 swh-storage (0.0.63-1~swh1) unstable-swh; urgency=medium * v0.0.63 * Make the 'worker to backend' destination agnostic (message parameter) * Improve 'unknown sha1' policy (archiver db can lag behind swh db) * Improve 'force copy' policy -- Antoine R. Dumont (@ardumont) Fri, 23 Sep 2016 12:29:50 +0200 swh-storage (0.0.62-1~swh1) unstable-swh; urgency=medium * Release swh.storage v0.0.62 * Updates to the provenance cache to reduce churn on the main tables -- Nicolas Dandrimont Thu, 22 Sep 2016 18:54:52 +0200 swh-storage (0.0.61-1~swh1) unstable-swh; urgency=medium * v0.0.61 * Handle copies of unregistered sha1 in archiver db * Fix copy to only the targeted destination * Update to latest python3-swh.core dependency -- Antoine R. Dumont (@ardumont) Thu, 22 Sep 2016 13:44:05 +0200 swh-storage (0.0.60-1~swh1) unstable-swh; urgency=medium * v0.0.60 * Update archiver dependencies -- Antoine R. Dumont (@ardumont) Tue, 20 Sep 2016 16:46:48 +0200 swh-storage (0.0.59-1~swh1) unstable-swh; urgency=medium * v0.0.59 * Unify configuration property between director/worker * Deal with potential missing contents in the archiver db * Improve get_contents_error implementation * Remove dead code in swh.storage.db about archiver -- Antoine R. Dumont (@ardumont) Sat, 17 Sep 2016 12:50:14 +0200 swh-storage (0.0.58-1~swh1) unstable-swh; urgency=medium * v0.0.58 * ArchiverDirectorToBackend reads sha1 from stdin and sends chunks of sha1 * for archival. -- Antoine R. Dumont (@ardumont) Fri, 16 Sep 2016 22:17:14 +0200 swh-storage (0.0.57-1~swh1) unstable-swh; urgency=medium * v0.0.57 * Update swh.storage.archiver -- Antoine R. Dumont (@ardumont) Thu, 15 Sep 2016 16:30:11 +0200 swh-storage (0.0.56-1~swh1) unstable-swh; urgency=medium * v0.0.56 * Vault: Add vault implementation (directory cooker & cache * implementation + its api) * Archiver: Add another archiver implementation (direct to backend) -- Antoine R. Dumont (@ardumont) Thu, 15 Sep 2016 10:56:35 +0200 swh-storage (0.0.55-1~swh1) unstable-swh; urgency=medium * v0.0.55 * Fix origin_visit endpoint -- Antoine R. Dumont (@ardumont) Thu, 08 Sep 2016 15:21:28 +0200 swh-storage (0.0.54-1~swh1) unstable-swh; urgency=medium * v0.0.54 * Open origin_visit_get_by entry point -- Antoine R. Dumont (@ardumont) Mon, 05 Sep 2016 12:36:34 +0200 swh-storage (0.0.53-1~swh1) unstable-swh; urgency=medium * v0.0.53 * Add cache about content provenance * debian: fix python3-swh.storage.archiver runtime dependency * debian: create new package python3-swh.storage.provenance -- Antoine R. Dumont (@ardumont) Fri, 02 Sep 2016 11:14:09 +0200 swh-storage (0.0.52-1~swh1) unstable-swh; urgency=medium * v0.0.52 * Package python3-swh.storage.archiver -- Antoine R. Dumont (@ardumont) Thu, 25 Aug 2016 14:55:23 +0200 swh-storage (0.0.51-1~swh1) unstable-swh; urgency=medium * Release swh.storage v0.0.51 * Add new metadata column to origin_visit * Update swh-add-directory script for updated API -- Nicolas Dandrimont Wed, 24 Aug 2016 14:36:03 +0200 swh-storage (0.0.50-1~swh1) unstable-swh; urgency=medium * v0.0.50 * Add a function to pull (only) metadata for a list of contents * Update occurrence_add api entry point to properly deal with origin_visit * Add origin_visit api entry points to create/update origin_visit -- Antoine R. Dumont (@ardumont) Tue, 23 Aug 2016 16:29:26 +0200 swh-storage (0.0.49-1~swh1) unstable-swh; urgency=medium * Release swh.storage v0.0.49 * Proper dependency on python3-kafka -- Nicolas Dandrimont Fri, 19 Aug 2016 13:45:52 +0200 swh-storage (0.0.48-1~swh1) unstable-swh; urgency=medium * Release swh.storage v0.0.48 * Updates to the archiver * Notification support for new object creations -- Nicolas Dandrimont Fri, 19 Aug 2016 12:13:50 +0200 swh-storage (0.0.47-1~swh1) unstable-swh; urgency=medium * Release swh.storage v0.0.47 * Update storage archiver to new schemaless schema -- Nicolas Dandrimont Fri, 22 Jul 2016 16:59:19 +0200 swh-storage (0.0.46-1~swh1) unstable-swh; urgency=medium * v0.0.46 * Update archiver bootstrap -- Antoine R. Dumont (@ardumont) Wed, 20 Jul 2016 19:04:42 +0200 swh-storage (0.0.45-1~swh1) unstable-swh; urgency=medium * v0.0.45 * Separate swh.storage.archiver's db from swh.storage.storage -- Antoine R. Dumont (@ardumont) Tue, 19 Jul 2016 15:05:36 +0200 swh-storage (0.0.44-1~swh1) unstable-swh; urgency=medium * v0.0.44 * Open listing visits per origin api -- Quentin Campos Fri, 08 Jul 2016 11:27:10 +0200 swh-storage (0.0.43-1~swh1) unstable-swh; urgency=medium * v0.0.43 * Extract objstorage to its own package swh.objstorage -- Quentin Campos Mon, 27 Jun 2016 14:57:12 +0200 swh-storage (0.0.42-1~swh1) unstable-swh; urgency=medium * Add an object storage multiplexer to allow transition between multiple versions of object storages. -- Quentin Campos Tue, 21 Jun 2016 15:03:52 +0200 swh-storage (0.0.41-1~swh1) unstable-swh; urgency=medium * Refactoring of the object storage in order to allow multiple versions of it, as well as a multiplexer for version transition. -- Quentin Campos Thu, 16 Jun 2016 15:54:16 +0200 swh-storage (0.0.40-1~swh1) unstable-swh; urgency=medium * Release swh.storage v0.0.40: * Refactor objstorage to allow for different implementations * Updates to the checker functionality * Bump swh.core dependency to v0.0.20 -- Nicolas Dandrimont Tue, 14 Jun 2016 17:25:42 +0200 swh-storage (0.0.39-1~swh1) unstable-swh; urgency=medium * v0.0.39 * Add run_from_webserver function for objstorage api server * Add unique identifier message on default api server route endpoints -- Antoine R. Dumont (@ardumont) Fri, 20 May 2016 15:27:34 +0200 swh-storage (0.0.38-1~swh1) unstable-swh; urgency=medium * v0.0.38 * Add an http api for object storage * Implement an archiver to perform backup copies -- Quentin Campos Fri, 20 May 2016 14:40:14 +0200 swh-storage (0.0.37-1~swh1) unstable-swh; urgency=medium * Release swh.storage v0.0.37 * Add fullname to person table * Add svn as a revision type -- Nicolas Dandrimont Fri, 08 Apr 2016 16:44:24 +0200 swh-storage (0.0.36-1~swh1) unstable-swh; urgency=medium * Release swh.storage v0.0.36 * Add json-schema documentation for the jsonb fields * Overhaul entity handling -- Nicolas Dandrimont Wed, 16 Mar 2016 17:27:17 +0100 swh-storage (0.0.35-1~swh1) unstable-swh; urgency=medium * Release swh-storage v0.0.35 * Factor in temporary tables with only an id (db v059) * Allow generic object search by sha1_git (db v060) -- Nicolas Dandrimont Thu, 25 Feb 2016 16:21:01 +0100 swh-storage (0.0.34-1~swh1) unstable-swh; urgency=medium * Release swh.storage version 0.0.34 * occurrence improvements * commit metadata improvements -- Nicolas Dandrimont Fri, 19 Feb 2016 18:20:07 +0100 swh-storage (0.0.33-1~swh1) unstable-swh; urgency=medium * Bump swh.storage to version 0.0.33 -- Nicolas Dandrimont Fri, 05 Feb 2016 11:17:00 +0100 swh-storage (0.0.32-1~swh1) unstable-swh; urgency=medium * v0.0.32 * Let the person's id flow * sql/upgrades/051: 050->051 schema change * sql/upgrades/050: 049->050 schema change - Clean up obsolete functions * sql/upgrades/049: Final take for 048->049 schema change. * sql: Use a new schema for occurrences -- Antoine R. Dumont (@ardumont) Fri, 29 Jan 2016 17:44:27 +0100 swh-storage (0.0.31-1~swh1) unstable-swh; urgency=medium * v0.0.31 * Deal with occurrence_history.branch, occurrence.branch, release.name as bytes -- Antoine R. Dumont (@ardumont) Wed, 27 Jan 2016 15:45:53 +0100 swh-storage (0.0.30-1~swh1) unstable-swh; urgency=medium * Prepare swh.storage v0.0.30 release * type-agnostic occurrences and revisions -- Nicolas Dandrimont Tue, 26 Jan 2016 07:36:43 +0100 swh-storage (0.0.29-1~swh1) unstable-swh; urgency=medium * v0.0.29 * New: * Upgrade sql schema to 041→043 * Deal with communication downtime between clients and storage * Open occurrence_get(origin_id) to retrieve latest occurrences per origin * Open release_get_by to retrieve a release by origin * Open directory_get to retrieve information on directory by id * Open entity_get to retrieve information on entity + hierarchy from its uuid * Open directory_get that retrieve information on directory per id * Update: * directory_get/directory_ls: Rename to directory_ls * revision_log: update to retrieve logs from multiple root revisions * revision_get_by: branch name filtering is now optional -- Antoine R. Dumont (@ardumont) Wed, 20 Jan 2016 16:15:50 +0100 swh-storage (0.0.28-1~swh1) unstable-swh; urgency=medium * v0.0.28 * Open entity_get api -- Antoine R. Dumont (@ardumont) Fri, 15 Jan 2016 16:37:27 +0100 swh-storage (0.0.27-1~swh1) unstable-swh; urgency=medium * v0.0.27 * Open directory_entry_get_by_path api * Improve get_revision_by api performance * sql/swh-schema: add index on origin(type, url) --> improve origin lookup api * Bump to 039 db version -- Antoine R. Dumont (@ardumont) Fri, 15 Jan 2016 12:42:47 +0100 swh-storage (0.0.26-1~swh1) unstable-swh; urgency=medium * v0.0.26 * Open revision_get_by to retrieve a revision by occurrence criterion filtering * sql/upgrades/036: add 035→036 upgrade script -- Antoine R. Dumont (@ardumont) Wed, 13 Jan 2016 12:46:44 +0100 swh-storage (0.0.25-1~swh1) unstable-swh; urgency=medium * v0.0.25 * Limit results in swh_revision_list* * Create the package to align the current db production version on https://archive.softwareheritage.org/ -- Antoine R. Dumont (@ardumont) Fri, 08 Jan 2016 11:33:08 +0100 swh-storage (0.0.24-1~swh1) unstable-swh; urgency=medium * Prepare swh.storage release v0.0.24 * Add a limit argument to revision_log -- Nicolas Dandrimont Wed, 06 Jan 2016 15:12:53 +0100 swh-storage (0.0.23-1~swh1) unstable-swh; urgency=medium * v0.0.23 * Protect against overflow, wrapped in ValueError for client * Fix relative path import for remote storage. * api to retrieve revision_log is now 'parents' aware -- Antoine R. Dumont (@ardumont) Wed, 06 Jan 2016 11:30:58 +0100 swh-storage (0.0.22-1~swh1) unstable-swh; urgency=medium * Release v0.0.22 * Fix relative import for remote storage -- Nicolas Dandrimont Wed, 16 Dec 2015 16:04:48 +0100 swh-storage (0.0.21-1~swh1) unstable-swh; urgency=medium * Prepare release v0.0.21 * Protect the storage api client from overflows * Add a get_storage function mapping to local or remote storage -- Nicolas Dandrimont Wed, 16 Dec 2015 13:34:46 +0100 swh-storage (0.0.20-1~swh1) unstable-swh; urgency=medium * v0.0.20 * allow numeric timestamps with offset * Open revision_log api * start migration to swh.model -- Antoine R. Dumont (@ardumont) Mon, 07 Dec 2015 15:20:36 +0100 swh-storage (0.0.19-1~swh1) unstable-swh; urgency=medium * v0.0.19 * Improve directory listing with content data * Open person_get * Open release_get data reading * Improve origin_get api * Effort to unify api output on dict (for read) * Migrate backend to 032 -- Antoine R. Dumont (@ardumont) Fri, 27 Nov 2015 13:33:34 +0100 swh-storage (0.0.18-1~swh1) unstable-swh; urgency=medium * v0.0.18 * Improve origin_get to permit retrieval per id * Update directory_get implementation (add join from * directory_entry_file to content) * Open release_get : [sha1] -> [Release] -- Antoine R. Dumont (@ardumont) Thu, 19 Nov 2015 11:18:35 +0100 swh-storage (0.0.17-1~swh1) unstable-swh; urgency=medium * Prepare deployment of swh.storage v0.0.17 * Add some entity related entry points -- Nicolas Dandrimont Tue, 03 Nov 2015 16:40:59 +0100 swh-storage (0.0.16-1~swh1) unstable-swh; urgency=medium * v0.0.16 * Add metadata column in revision (db version 29) * cache http connection for remote storage client -- Antoine R. Dumont (@ardumont) Thu, 29 Oct 2015 10:29:00 +0100 swh-storage (0.0.15-1~swh1) unstable-swh; urgency=medium * Prepare deployment of swh.storage v0.0.15 * Allow population of fetch_history * Update organizations / projects as entities * Use schema v028 for directory addition -- Nicolas Dandrimont Tue, 27 Oct 2015 11:43:39 +0100 swh-storage (0.0.14-1~swh1) unstable-swh; urgency=medium * Prepare swh.storage v0.0.14 deployment -- Nicolas Dandrimont Fri, 16 Oct 2015 15:34:08 +0200 swh-storage (0.0.13-1~swh1) unstable-swh; urgency=medium * Prepare deploying swh.storage v0.0.13 -- Nicolas Dandrimont Fri, 16 Oct 2015 14:51:44 +0200 swh-storage (0.0.12-1~swh1) unstable-swh; urgency=medium * Prepare deploying swh.storage v0.0.12 -- Nicolas Dandrimont Tue, 13 Oct 2015 12:39:18 +0200 swh-storage (0.0.11-1~swh1) unstable-swh; urgency=medium * Preparing deployment of swh.storage v0.0.11 -- Nicolas Dandrimont Fri, 09 Oct 2015 17:44:51 +0200 swh-storage (0.0.10-1~swh1) unstable-swh; urgency=medium * Prepare deployment of swh.storage v0.0.10 -- Nicolas Dandrimont Tue, 06 Oct 2015 17:37:00 +0200 swh-storage (0.0.9-1~swh1) unstable-swh; urgency=medium * Prepare deployment of swh.storage v0.0.9 -- Nicolas Dandrimont Thu, 01 Oct 2015 19:03:00 +0200 swh-storage (0.0.8-1~swh1) unstable-swh; urgency=medium * Prepare deployment of swh.storage v0.0.8 -- Nicolas Dandrimont Thu, 01 Oct 2015 11:32:46 +0200 swh-storage (0.0.7-1~swh1) unstable-swh; urgency=medium * Prepare deployment of swh.storage v0.0.7 -- Nicolas Dandrimont Tue, 29 Sep 2015 16:52:54 +0200 swh-storage (0.0.6-1~swh1) unstable-swh; urgency=medium * Prepare deployment of swh.storage v0.0.6 -- Nicolas Dandrimont Tue, 29 Sep 2015 16:43:24 +0200 swh-storage (0.0.5-1~swh1) unstable-swh; urgency=medium * Prepare deploying swh.storage v0.0.5 -- Nicolas Dandrimont Tue, 29 Sep 2015 16:27:00 +0200 swh-storage (0.0.1-1~swh1) unstable-swh; urgency=medium * Initial release * swh.storage.api: Properly escape arbitrary byte sequences in arguments -- Nicolas Dandrimont Tue, 22 Sep 2015 17:02:34 +0200 diff --git a/debian/control b/debian/control index ddb1279e..02881126 100644 --- a/debian/control +++ b/debian/control @@ -1,55 +1,55 @@ Source: swh-storage Maintainer: Software Heritage developers Section: python Priority: optional Build-Depends: debhelper (>= 9), dh-python (>= 2), cassandra, openjdk-11-jre, python3-aiohttp, python3-all, python3-cassandra, python3-click, python3-dateutil, python3-flask, python3-hypothesis (>= 3.11.0~), python3-kafka, python3-mypy-extensions, python3-psycopg2 (>= 2.8), python3-pytest, python3-pytest-mock, python3-pytest-redis, python3-requests, python3-setuptools, python3-setuptools-scm, python3-sqlalchemy (>= 1.0), - python3-swh.core (>= 0.14), + python3-swh.core (>= 2), python3-swh.counters (>= 0.8), python3-swh.journal (>= 0.9), - python3-swh.model (>= 2.1), + python3-swh.model (>= 4.4), python3-swh.objstorage (>= 0.2.2), - python3-swh.core.db.pytestplugin (>= 0.14), + python3-swh.core.db.pytestplugin (>= 2), python3-typing-extensions (>= 3.7.4~), - python3-tenacity, + python3-tenacity (>= 6.2), redis-server # Only the jre 11 is supported with cassandra. Unfortunately, some other jre packages # are pulled, so we prevent those from being installed. # Related to https://forge.softwareheritage.org/T3053#58819 Build-Conflicts: openjdk-17-jre-headless, openjdk-16-jre-headless, openjdk-15-jre-headless, Standards-Version: 3.9.6 Homepage: https://forge.softwareheritage.org/diffusion/DSTO/ Package: python3-swh.storage Architecture: all -Depends: python3-swh.core (>= 0.9), - python3-swh.model (>= 0.4), - python3-swh.objstorage (>= 0.0.40~), +Depends: python3-swh.core (>= 2), + python3-swh.model (>= 4.4), + python3-swh.objstorage (>= 0.2.2), python3-psycopg2 (>= 2.8), ${misc:Depends}, ${python3:Depends} Breaks: python3-swh.archiver (<< 0.0.4~), python3-swh.indexer (<< 0.0.51~), python3-swh.vault (<< 0.0.19~) Description: Software Heritage storage utilities diff --git a/requirements-swh.txt b/requirements-swh.txt index 8d375057..bacba9c8 100644 --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -1,4 +1,4 @@ -swh.core[db,http] >= 0.14.0 +swh.core[db,http] >= 2 swh.counters >= v0.8.0 swh.model >= 4.4.0 swh.objstorage >= 0.2.2 diff --git a/requirements-test.txt b/requirements-test.txt index a33e143a..44d0d3f2 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,16 +1,17 @@ hypothesis >= 3.11.0 pytest < 7.0.0 # v7.0.0 removed _pytest.tmpdir.TempdirFactory, which is used by some of the pytest plugins we use pytest-mock # pytz is in fact a dep of swh.model[testing] and should not be necessary, but # the dep on swh.model in the main requirements-swh.txt file shadows this one # adding the [testing] extra. swh.model[testing] >= 0.0.50 pytz pytest-redis pytest-xdist types-python-dateutil types-pytz types-pyyaml types-redis types-requests +types-toml diff --git a/swh.storage.egg-info/PKG-INFO b/swh.storage.egg-info/PKG-INFO index a4dc5adc..b0e720db 100644 --- a/swh.storage.egg-info/PKG-INFO +++ b/swh.storage.egg-info/PKG-INFO @@ -1,250 +1,250 @@ Metadata-Version: 2.1 Name: swh.storage -Version: 0.43.1 +Version: 1.0.0 Summary: Software Heritage storage manager Home-page: https://forge.softwareheritage.org/diffusion/DSTO/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-storage Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-storage/ Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Requires-Python: >=3.7 Description-Content-Type: text/markdown Provides-Extra: testing Provides-Extra: journal License-File: LICENSE License-File: AUTHORS swh-storage =========== Abstraction layer over the archive, allowing to access all stored source code artifacts as well as their metadata. See the [documentation](https://docs.softwareheritage.org/devel/swh-storage/index.html) for more details. ## Quick start ### Dependencies Python tests for this module include tests that cannot be run without a local Postgresql database, so you need the Postgresql server executable on your machine (no need to have a running Postgresql server). They also expect a cassandra server. #### Debian-like host ``` $ sudo apt install libpq-dev postgresql-11 cassandra ``` #### Non Debian-like host The tests expects the path to `cassandra` to either be unspecified, it is then looked up at `/usr/sbin/cassandra`, either specified through the environment variable `SWH_CASSANDRA_BIN`. Optionally, you can avoid running the cassandra tests. ``` (swh) :~/swh-storage$ tox -- -m 'not cassandra' ``` ### Installation It is strongly recommended to use a virtualenv. In the following, we consider you work in a virtualenv named `swh`. See the [developer setup guide](https://docs.softwareheritage.org/devel/developer-setup.html#developer-setup) for a more details on how to setup a working environment. You can install the package directly from [pypi](https://pypi.org/p/swh.storage): ``` (swh) :~$ pip install swh.storage [...] ``` Or from sources: ``` (swh) :~$ git clone https://forge.softwareheritage.org/source/swh-storage.git [...] (swh) :~$ cd swh-storage (swh) :~/swh-storage$ pip install . [...] ``` Then you can check it's properly installed: ``` (swh) :~$ swh storage --help Usage: swh storage [OPTIONS] COMMAND [ARGS]... Software Heritage Storage tools. Options: -h, --help Show this message and exit. Commands: rpc-serve Software Heritage Storage RPC server. ``` ## Tests The best way of running Python tests for this module is to use [tox](https://tox.readthedocs.io/). ``` (swh) :~$ pip install tox ``` ### tox From the sources directory, simply use tox: ``` (swh) :~/swh-storage$ tox [...] ========= 315 passed, 6 skipped, 15 warnings in 40.86 seconds ========== _______________________________ summary ________________________________ flake8: commands succeeded py3: commands succeeded congratulations :) ``` Note: it is possible to set the `JAVA_HOME` environment variable to specify the version of the JVM to be used by Cassandra. For example, at the time of writing this, Cassandra does not support java 14, so one may want to use for example java 11: ``` (swh) :~/swh-storage$ export JAVA_HOME=/usr/lib/jvm/java-14-openjdk-amd64/bin/java (swh) :~/swh-storage$ tox [...] ``` ## Development The storage server can be locally started. It requires a configuration file and a running Postgresql database. ### Sample configuration A typical configuration `storage.yml` file is: ``` storage: cls: postgresql db: "dbname=softwareheritage-dev user= password=" objstorage: cls: pathslicing root: /tmp/swh-storage/ slicing: 0:2/2:4/4:6 ``` which means, this uses: - a local storage instance whose db connection is to `softwareheritage-dev` local instance, - the objstorage uses a local objstorage instance whose: - `root` path is /tmp/swh-storage, - slicing scheme is `0:2/2:4/4:6`. This means that the identifier of the content (sha1) which will be stored on disk at first level with the first 2 hex characters, the second level with the next 2 hex characters and the third level with the next 2 hex characters. And finally the complete hash file holding the raw content. For example: 00062f8bd330715c4f819373653d97b3cd34394c will be stored at 00/06/2f/00062f8bd330715c4f819373653d97b3cd34394c Note that the `root` path should exist on disk before starting the server. ### Starting the storage server If the python package has been properly installed (e.g. in a virtual env), you should be able to use the command: ``` (swh) :~/swh-storage$ swh storage rpc-serve storage.yml ``` This runs a local swh-storage api at 5002 port. ``` (swh) :~/swh-storage$ curl http://127.0.0.1:5002 Software Heritage storage server

You have reached the Software Heritage storage server.
See its documentation and API for more information

``` ### And then what? In your upper layer ([loader-git](https://forge.softwareheritage.org/source/swh-loader-git/), [loader-svn](https://forge.softwareheritage.org/source/swh-loader-svn/), etc...), you can define a remote storage with this snippet of yaml configuration. ``` storage: cls: remote url: http://localhost:5002/ ``` You could directly define a postgresql storage with the following snippet: ``` storage: cls: postgresql db: service=swh-dev objstorage: cls: pathslicing root: /home/storage/swh-storage/ slicing: 0:2/2:4/4:6 ``` ## Cassandra As an alternative to PostgreSQL, swh-storage can use Cassandra as a database backend. It can be used like this: ``` storage: cls: cassandra hosts: - localhost objstorage: cls: pathslicing root: /home/storage/swh-storage/ slicing: 0:2/2:4/4:6 ``` The Cassandra swh-storage implementation supports both Cassandra >= 4.0-alpha2 and ScyllaDB >= 4.4 (and possibly earlier versions, but this is untested). While the main code supports both transparently, running tests or configuring the schema requires specific code when using ScyllaDB, enabled by setting the `SWH_USE_SCYLLADB=1` environment variable. diff --git a/swh.storage.egg-info/SOURCES.txt b/swh.storage.egg-info/SOURCES.txt index 3f465599..bf07ad3b 100644 --- a/swh.storage.egg-info/SOURCES.txt +++ b/swh.storage.egg-info/SOURCES.txt @@ -1,337 +1,337 @@ .gitignore .pre-commit-config.yaml AUTHORS CODE_OF_CONDUCT.md CONTRIBUTORS LICENSE MANIFEST.in Makefile Makefile.local README.md conftest.py mypy.ini pyproject.toml pytest.ini requirements-swh-journal.txt requirements-swh.txt requirements-test.txt requirements.txt setup.cfg setup.py tox.ini ./requirements-swh-journal.txt ./requirements-swh.txt ./requirements-test.txt ./requirements.txt bin/swh-storage-add-dir docs/.gitignore docs/Makefile docs/Makefile.local docs/archive-copies.rst docs/cli.rst docs/conf.py docs/extrinsic-metadata-specification.rst docs/index.rst docs/sql-storage.rst docs/_static/.placeholder docs/_templates/.placeholder docs/images/.gitignore docs/images/Makefile docs/images/swh-archive-copies.dia sql/.gitignore sql/Makefile sql/TODO sql/clusters.dot sql/bin/db-upgrade sql/bin/dot_add_content sql/doc/json sql/doc/json/.gitignore sql/doc/json/Makefile sql/doc/json/entity.lister_metadata.schema.json sql/doc/json/entity.metadata.schema.json sql/doc/json/entity_history.lister_metadata.schema.json sql/doc/json/entity_history.metadata.schema.json sql/doc/json/fetch_history.result.schema.json sql/doc/json/list_history.result.schema.json sql/doc/json/listable_entity.list_params.schema.json sql/doc/json/origin_visit.metadata.json sql/doc/json/tool.tool_configuration.schema.json sql/json/.gitignore sql/json/Makefile sql/json/entity.lister_metadata.schema.json sql/json/entity.metadata.schema.json sql/json/entity_history.lister_metadata.schema.json sql/json/entity_history.metadata.schema.json sql/json/fetch_history.result.schema.json sql/json/list_history.result.schema.json sql/json/listable_entity.list_params.schema.json sql/json/origin_visit.metadata.json sql/json/tool.tool_configuration.schema.json -sql/upgrades/015.sql -sql/upgrades/016.sql -sql/upgrades/017.sql -sql/upgrades/018.sql -sql/upgrades/019.sql -sql/upgrades/020.sql -sql/upgrades/021.sql -sql/upgrades/022.sql -sql/upgrades/023.sql -sql/upgrades/024.sql -sql/upgrades/025.sql -sql/upgrades/026.sql -sql/upgrades/027.sql -sql/upgrades/028.sql -sql/upgrades/029.sql -sql/upgrades/030.sql -sql/upgrades/032.sql -sql/upgrades/033.sql -sql/upgrades/034.sql -sql/upgrades/035.sql -sql/upgrades/036.sql -sql/upgrades/037.sql -sql/upgrades/038.sql -sql/upgrades/039.sql -sql/upgrades/040.sql -sql/upgrades/041.sql -sql/upgrades/042.sql -sql/upgrades/043.sql -sql/upgrades/044.sql -sql/upgrades/045.sql -sql/upgrades/046.sql -sql/upgrades/047.sql -sql/upgrades/048.sql -sql/upgrades/049.sql -sql/upgrades/050.sql -sql/upgrades/051.sql -sql/upgrades/052.sql -sql/upgrades/053.sql -sql/upgrades/054.sql -sql/upgrades/055.sql -sql/upgrades/056.sql -sql/upgrades/057.sql -sql/upgrades/058.sql -sql/upgrades/059.sql -sql/upgrades/060.sql -sql/upgrades/061.sql -sql/upgrades/062.sql -sql/upgrades/063.sql -sql/upgrades/064.sql -sql/upgrades/065.sql -sql/upgrades/066.sql -sql/upgrades/067.sql -sql/upgrades/068.sql -sql/upgrades/069.sql -sql/upgrades/070.sql -sql/upgrades/071.sql -sql/upgrades/072.sql -sql/upgrades/073.sql -sql/upgrades/074.sql -sql/upgrades/075.sql -sql/upgrades/076.sql -sql/upgrades/077.sql -sql/upgrades/078.sql -sql/upgrades/079.sql -sql/upgrades/080.sql -sql/upgrades/081.sql -sql/upgrades/082.sql -sql/upgrades/083.sql -sql/upgrades/084.sql -sql/upgrades/085.sql -sql/upgrades/086.sql -sql/upgrades/087.sql -sql/upgrades/088.sql -sql/upgrades/089.sql -sql/upgrades/090.sql -sql/upgrades/091.sql -sql/upgrades/092.sql -sql/upgrades/093.sql -sql/upgrades/094.sql -sql/upgrades/095.sql -sql/upgrades/096.sql -sql/upgrades/097.sql -sql/upgrades/098.sql -sql/upgrades/099.sql -sql/upgrades/100.sql -sql/upgrades/101.sql -sql/upgrades/102.sql -sql/upgrades/103.sql -sql/upgrades/104.sql -sql/upgrades/105.sql -sql/upgrades/106.sql -sql/upgrades/107.sql -sql/upgrades/108.sql -sql/upgrades/109.sql -sql/upgrades/110.sql -sql/upgrades/111.sql -sql/upgrades/112.sql -sql/upgrades/113.sql -sql/upgrades/114.sql -sql/upgrades/115.sql -sql/upgrades/116.sql -sql/upgrades/117.sql -sql/upgrades/118.sql -sql/upgrades/119.sql -sql/upgrades/120.sql -sql/upgrades/121.sql -sql/upgrades/122.sql -sql/upgrades/123.sql -sql/upgrades/124.sql -sql/upgrades/125.sql -sql/upgrades/126.sql -sql/upgrades/127.sql -sql/upgrades/128.sql -sql/upgrades/129.sql -sql/upgrades/130.sql -sql/upgrades/131.sql -sql/upgrades/132.sql -sql/upgrades/133.sql -sql/upgrades/134.sql -sql/upgrades/135.sql -sql/upgrades/136.sql -sql/upgrades/137.sql -sql/upgrades/138.sql -sql/upgrades/139.sql -sql/upgrades/140.sql -sql/upgrades/141.sql -sql/upgrades/142.sql -sql/upgrades/143.sql -sql/upgrades/144.sql -sql/upgrades/145.sql -sql/upgrades/146.sql -sql/upgrades/147.sql -sql/upgrades/148.sql -sql/upgrades/149.sql -sql/upgrades/150.sql -sql/upgrades/151.sql -sql/upgrades/152.sql -sql/upgrades/153.sql -sql/upgrades/154.sql -sql/upgrades/155.sql -sql/upgrades/156.sql -sql/upgrades/157.sql -sql/upgrades/158.sql -sql/upgrades/159.sql -sql/upgrades/160.sql -sql/upgrades/161.sql -sql/upgrades/162.sql -sql/upgrades/163.sql -sql/upgrades/164.sql -sql/upgrades/165.sql -sql/upgrades/166.sql -sql/upgrades/167.sql -sql/upgrades/168.sql -sql/upgrades/169.sql -sql/upgrades/170.sql -sql/upgrades/171.sql -sql/upgrades/172.sql -sql/upgrades/173.sql -sql/upgrades/174.sql -sql/upgrades/175.sql -sql/upgrades/176.sql -sql/upgrades/177.sql -sql/upgrades/178.sql -sql/upgrades/179.sql -sql/upgrades/180.sql -sql/upgrades/181.sql -sql/upgrades/182.sql swh/__init__.py swh.storage.egg-info/PKG-INFO swh.storage.egg-info/SOURCES.txt swh.storage.egg-info/dependency_links.txt swh.storage.egg-info/entry_points.txt swh.storage.egg-info/requires.txt swh.storage.egg-info/top_level.txt swh/storage/__init__.py swh/storage/backfill.py swh/storage/cli.py swh/storage/common.py swh/storage/exc.py swh/storage/fixer.py swh/storage/in_memory.py swh/storage/interface.py swh/storage/metrics.py swh/storage/migrate_extrinsic_metadata.py swh/storage/objstorage.py swh/storage/py.typed swh/storage/pytest_plugin.py swh/storage/replay.py swh/storage/utils.py swh/storage/writer.py swh/storage/algos/__init__.py swh/storage/algos/diff.py swh/storage/algos/dir_iterators.py swh/storage/algos/origin.py swh/storage/algos/revisions_walker.py swh/storage/algos/snapshot.py swh/storage/api/__init__.py swh/storage/api/client.py swh/storage/api/serializers.py swh/storage/api/server.py swh/storage/cassandra/__init__.py swh/storage/cassandra/common.py swh/storage/cassandra/converters.py swh/storage/cassandra/cql.py swh/storage/cassandra/model.py swh/storage/cassandra/schema.py swh/storage/cassandra/storage.py swh/storage/postgresql/__init__.py swh/storage/postgresql/converters.py swh/storage/postgresql/db.py swh/storage/postgresql/storage.py swh/storage/proxies/buffer.py swh/storage/proxies/counter.py swh/storage/proxies/filter.py swh/storage/proxies/retry.py swh/storage/proxies/tenacious.py swh/storage/proxies/validate.py swh/storage/sql/10-superuser-init.sql swh/storage/sql/15-flavor.sql swh/storage/sql/20-enums.sql swh/storage/sql/30-schema.sql swh/storage/sql/40-funcs.sql swh/storage/sql/60-indexes.sql swh/storage/sql/logical_replication/replication_source.sql +swh/storage/sql/upgrades/015.sql +swh/storage/sql/upgrades/016.sql +swh/storage/sql/upgrades/017.sql +swh/storage/sql/upgrades/018.sql +swh/storage/sql/upgrades/019.sql +swh/storage/sql/upgrades/020.sql +swh/storage/sql/upgrades/021.sql +swh/storage/sql/upgrades/022.sql +swh/storage/sql/upgrades/023.sql +swh/storage/sql/upgrades/024.sql +swh/storage/sql/upgrades/025.sql +swh/storage/sql/upgrades/026.sql +swh/storage/sql/upgrades/027.sql +swh/storage/sql/upgrades/028.sql +swh/storage/sql/upgrades/029.sql +swh/storage/sql/upgrades/030.sql +swh/storage/sql/upgrades/032.sql +swh/storage/sql/upgrades/033.sql +swh/storage/sql/upgrades/034.sql +swh/storage/sql/upgrades/035.sql +swh/storage/sql/upgrades/036.sql +swh/storage/sql/upgrades/037.sql +swh/storage/sql/upgrades/038.sql +swh/storage/sql/upgrades/039.sql +swh/storage/sql/upgrades/040.sql +swh/storage/sql/upgrades/041.sql +swh/storage/sql/upgrades/042.sql +swh/storage/sql/upgrades/043.sql +swh/storage/sql/upgrades/044.sql +swh/storage/sql/upgrades/045.sql +swh/storage/sql/upgrades/046.sql +swh/storage/sql/upgrades/047.sql +swh/storage/sql/upgrades/048.sql +swh/storage/sql/upgrades/049.sql +swh/storage/sql/upgrades/050.sql +swh/storage/sql/upgrades/051.sql +swh/storage/sql/upgrades/052.sql +swh/storage/sql/upgrades/053.sql +swh/storage/sql/upgrades/054.sql +swh/storage/sql/upgrades/055.sql +swh/storage/sql/upgrades/056.sql +swh/storage/sql/upgrades/057.sql +swh/storage/sql/upgrades/058.sql +swh/storage/sql/upgrades/059.sql +swh/storage/sql/upgrades/060.sql +swh/storage/sql/upgrades/061.sql +swh/storage/sql/upgrades/062.sql +swh/storage/sql/upgrades/063.sql +swh/storage/sql/upgrades/064.sql +swh/storage/sql/upgrades/065.sql +swh/storage/sql/upgrades/066.sql +swh/storage/sql/upgrades/067.sql +swh/storage/sql/upgrades/068.sql +swh/storage/sql/upgrades/069.sql +swh/storage/sql/upgrades/070.sql +swh/storage/sql/upgrades/071.sql +swh/storage/sql/upgrades/072.sql +swh/storage/sql/upgrades/073.sql +swh/storage/sql/upgrades/074.sql +swh/storage/sql/upgrades/075.sql +swh/storage/sql/upgrades/076.sql +swh/storage/sql/upgrades/077.sql +swh/storage/sql/upgrades/078.sql +swh/storage/sql/upgrades/079.sql +swh/storage/sql/upgrades/080.sql +swh/storage/sql/upgrades/081.sql +swh/storage/sql/upgrades/082.sql +swh/storage/sql/upgrades/083.sql +swh/storage/sql/upgrades/084.sql +swh/storage/sql/upgrades/085.sql +swh/storage/sql/upgrades/086.sql +swh/storage/sql/upgrades/087.sql +swh/storage/sql/upgrades/088.sql +swh/storage/sql/upgrades/089.sql +swh/storage/sql/upgrades/090.sql +swh/storage/sql/upgrades/091.sql +swh/storage/sql/upgrades/092.sql +swh/storage/sql/upgrades/093.sql +swh/storage/sql/upgrades/094.sql +swh/storage/sql/upgrades/095.sql +swh/storage/sql/upgrades/096.sql +swh/storage/sql/upgrades/097.sql +swh/storage/sql/upgrades/098.sql +swh/storage/sql/upgrades/099.sql +swh/storage/sql/upgrades/100.sql +swh/storage/sql/upgrades/101.sql +swh/storage/sql/upgrades/102.sql +swh/storage/sql/upgrades/103.sql +swh/storage/sql/upgrades/104.sql +swh/storage/sql/upgrades/105.sql +swh/storage/sql/upgrades/106.sql +swh/storage/sql/upgrades/107.sql +swh/storage/sql/upgrades/108.sql +swh/storage/sql/upgrades/109.sql +swh/storage/sql/upgrades/110.sql +swh/storage/sql/upgrades/111.sql +swh/storage/sql/upgrades/112.sql +swh/storage/sql/upgrades/113.sql +swh/storage/sql/upgrades/114.sql +swh/storage/sql/upgrades/115.sql +swh/storage/sql/upgrades/116.sql +swh/storage/sql/upgrades/117.sql +swh/storage/sql/upgrades/118.sql +swh/storage/sql/upgrades/119.sql +swh/storage/sql/upgrades/120.sql +swh/storage/sql/upgrades/121.sql +swh/storage/sql/upgrades/122.sql +swh/storage/sql/upgrades/123.sql +swh/storage/sql/upgrades/124.sql +swh/storage/sql/upgrades/125.sql +swh/storage/sql/upgrades/126.sql +swh/storage/sql/upgrades/127.sql +swh/storage/sql/upgrades/128.sql +swh/storage/sql/upgrades/129.sql +swh/storage/sql/upgrades/130.sql +swh/storage/sql/upgrades/131.sql +swh/storage/sql/upgrades/132.sql +swh/storage/sql/upgrades/133.sql +swh/storage/sql/upgrades/134.sql +swh/storage/sql/upgrades/135.sql +swh/storage/sql/upgrades/136.sql +swh/storage/sql/upgrades/137.sql +swh/storage/sql/upgrades/138.sql +swh/storage/sql/upgrades/139.sql +swh/storage/sql/upgrades/140.sql +swh/storage/sql/upgrades/141.sql +swh/storage/sql/upgrades/142.sql +swh/storage/sql/upgrades/143.sql +swh/storage/sql/upgrades/144.sql +swh/storage/sql/upgrades/145.sql +swh/storage/sql/upgrades/146.sql +swh/storage/sql/upgrades/147.sql +swh/storage/sql/upgrades/148.sql +swh/storage/sql/upgrades/149.sql +swh/storage/sql/upgrades/150.sql +swh/storage/sql/upgrades/151.sql +swh/storage/sql/upgrades/152.sql +swh/storage/sql/upgrades/153.sql +swh/storage/sql/upgrades/154.sql +swh/storage/sql/upgrades/155.sql +swh/storage/sql/upgrades/156.sql +swh/storage/sql/upgrades/157.sql +swh/storage/sql/upgrades/158.sql +swh/storage/sql/upgrades/159.sql +swh/storage/sql/upgrades/160.sql +swh/storage/sql/upgrades/161.sql +swh/storage/sql/upgrades/162.sql +swh/storage/sql/upgrades/163.sql +swh/storage/sql/upgrades/164.sql +swh/storage/sql/upgrades/165.sql +swh/storage/sql/upgrades/166.sql +swh/storage/sql/upgrades/167.sql +swh/storage/sql/upgrades/168.sql +swh/storage/sql/upgrades/169.sql +swh/storage/sql/upgrades/170.sql +swh/storage/sql/upgrades/171.sql +swh/storage/sql/upgrades/172.sql +swh/storage/sql/upgrades/173.sql +swh/storage/sql/upgrades/174.sql +swh/storage/sql/upgrades/175.sql +swh/storage/sql/upgrades/176.sql +swh/storage/sql/upgrades/177.sql +swh/storage/sql/upgrades/178.sql +swh/storage/sql/upgrades/179.sql +swh/storage/sql/upgrades/180.sql +swh/storage/sql/upgrades/181.sql +swh/storage/sql/upgrades/182.sql swh/storage/tests/__init__.py swh/storage/tests/conftest.py swh/storage/tests/storage_data.py swh/storage/tests/storage_tests.py swh/storage/tests/test_api_client.py swh/storage/tests/test_backfill.py swh/storage/tests/test_buffer.py swh/storage/tests/test_cassandra.py swh/storage/tests/test_cassandra_converters.py swh/storage/tests/test_cassandra_migration.py swh/storage/tests/test_cli.py swh/storage/tests/test_counter.py swh/storage/tests/test_exception.py swh/storage/tests/test_filter.py swh/storage/tests/test_in_memory.py swh/storage/tests/test_init.py swh/storage/tests/test_kafka_writer.py swh/storage/tests/test_metrics.py swh/storage/tests/test_postgresql.py swh/storage/tests/test_postgresql_converters.py swh/storage/tests/test_pytest_plugin.py swh/storage/tests/test_replay.py swh/storage/tests/test_retry.py swh/storage/tests/test_revision_bw_compat.py swh/storage/tests/test_serializers.py swh/storage/tests/test_server.py swh/storage/tests/test_storage_data.py swh/storage/tests/test_tenacious.py swh/storage/tests/test_utils.py swh/storage/tests/test_validate.py swh/storage/tests/algos/__init__.py swh/storage/tests/algos/test_diff.py swh/storage/tests/algos/test_dir_iterator.py swh/storage/tests/algos/test_origin.py swh/storage/tests/algos/test_revisions_walker.py swh/storage/tests/algos/test_snapshot.py swh/storage/tests/data/storage.yml swh/storage/tests/migrate_extrinsic_metadata/test_cran.py swh/storage/tests/migrate_extrinsic_metadata/test_debian.py swh/storage/tests/migrate_extrinsic_metadata/test_deposit.py swh/storage/tests/migrate_extrinsic_metadata/test_gnu.py swh/storage/tests/migrate_extrinsic_metadata/test_nixguix.py swh/storage/tests/migrate_extrinsic_metadata/test_npm.py swh/storage/tests/migrate_extrinsic_metadata/test_pypi.py \ No newline at end of file diff --git a/swh.storage.egg-info/entry_points.txt b/swh.storage.egg-info/entry_points.txt index d3369c8e..ae4d3eec 100644 --- a/swh.storage.egg-info/entry_points.txt +++ b/swh.storage.egg-info/entry_points.txt @@ -1,4 +1,2 @@ - - [swh.cli.subcommands] - storage=swh.storage.cli - \ No newline at end of file +[swh.cli.subcommands] +storage = swh.storage.cli diff --git a/swh.storage.egg-info/requires.txt b/swh.storage.egg-info/requires.txt index 75004d1a..262af7e1 100644 --- a/swh.storage.egg-info/requires.txt +++ b/swh.storage.egg-info/requires.txt @@ -1,33 +1,34 @@ aiohttp cassandra-driver!=3.21.0,>=3.19.0 click deprecated flask iso8601 mypy_extensions psycopg2 redis tenacity>=6.2 typing-extensions -swh.core[db,http]>=0.14.0 +swh.core[db,http]>=2 swh.counters>=v0.8.0 swh.model>=4.4.0 swh.objstorage>=0.2.2 [journal] swh.journal>=0.9 [testing] hypothesis>=3.11.0 pytest<7.0.0 pytest-mock swh.model[testing]>=0.0.50 pytz pytest-redis pytest-xdist types-python-dateutil types-pytz types-pyyaml types-redis types-requests +types-toml swh.journal>=0.9 diff --git a/swh/storage/__init__.py b/swh/storage/__init__.py index cd342de8..cdbe7c49 100644 --- a/swh/storage/__init__.py +++ b/swh/storage/__init__.py @@ -1,122 +1,125 @@ -# Copyright (C) 2015-2020 The Software Heritage developers +# Copyright (C) 2015-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import importlib from typing import TYPE_CHECKING, Any, Dict, List import warnings if TYPE_CHECKING: from .interface import StorageInterface STORAGE_IMPLEMENTATIONS = { "remote": ".api.client.RemoteStorage", "memory": ".in_memory.InMemoryStorage", "cassandra": ".cassandra.CassandraStorage", "postgresql": ".postgresql.storage.Storage", # deprecated "local": ".postgresql.storage.Storage", # proxy storages "buffer": ".proxies.buffer.BufferingProxyStorage", "counter": ".proxies.counter.CountingProxyStorage", "filter": ".proxies.filter.FilteringProxyStorage", "retry": ".proxies.retry.RetryingProxyStorage", "tenacious": ".proxies.tenacious.TenaciousProxyStorage", "validate": ".proxies.validate.ValidatingProxyStorage", } def get_storage(cls: str, **kwargs) -> "StorageInterface": """Get a storage object of class `storage_class` with arguments `storage_args`. Args: cls (str): storage's class, can be: - ``local`` to use a postgresql database - ``cassandra`` to use a cassandra database - ``remote`` to connect to a swh-storage server - ``memory`` for an in-memory storage, useful for fast tests - ``filter``, ``buffer``, ... to use specific storage "proxies", see their respective documentations args (dict): dictionary with keys Returns: an instance of swh.storage.Storage or compatible class Raises: ValueError if passed an unknown storage class. """ if "args" in kwargs: warnings.warn( 'Explicit "args" key is deprecated, use keys directly instead.', DeprecationWarning, ) kwargs = kwargs["args"] if cls == "pipeline": return get_storage_pipeline(**kwargs) if cls == "local": warnings.warn( 'The "local" storage class is deprecated, use "postgresql" instead.', DeprecationWarning, ) class_path = STORAGE_IMPLEMENTATIONS.get(cls) if class_path is None: raise ValueError( "Unknown storage class `%s`. Supported: %s" % (cls, ", ".join(STORAGE_IMPLEMENTATIONS)) ) (module_path, class_name) = class_path.rsplit(".", 1) module = importlib.import_module(module_path, package=__package__) Storage = getattr(module, class_name) check_config = kwargs.pop("check_config", {}) storage = Storage(**kwargs) if check_config: if not storage.check_config(**check_config): raise EnvironmentError("storage check config failed") return storage +get_datastore = get_storage + + def get_storage_pipeline( steps: List[Dict[str, Any]], check_config=None ) -> "StorageInterface": """Recursively get a storage object that may use other storage objects as backends. Args: steps (List[dict]): List of dicts that may be used as kwargs for `get_storage`. Returns: an instance of swh.storage.Storage or compatible class Raises: ValueError if passed an unknown storage class. """ storage_config = None for step in reversed(steps): if "args" in step: warnings.warn( 'Explicit "args" key is deprecated, use keys directly ' "instead.", DeprecationWarning, ) step = { "cls": step["cls"], **step["args"], } if storage_config: step["storage"] = storage_config step["check_config"] = check_config storage_config = step if storage_config is None: raise ValueError("'pipeline' has no steps.") return get_storage(**storage_config) diff --git a/swh/storage/postgresql/storage.py b/swh/storage/postgresql/storage.py index 40797931..25035684 100644 --- a/swh/storage/postgresql/storage.py +++ b/swh/storage/postgresql/storage.py @@ -1,1625 +1,1630 @@ # Copyright (C) 2015-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import base64 from collections import defaultdict import contextlib from contextlib import contextmanager import datetime import itertools import operator from typing import Any, Counter, Dict, Iterable, List, Optional, Sequence, Tuple import attr import psycopg2 import psycopg2.errors import psycopg2.pool from swh.core.api.serializers import msgpack_dumps, msgpack_loads from swh.core.db.common import db_transaction, db_transaction_generator from swh.model.hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, hash_to_hex from swh.model.model import ( SHA1_SIZE, Content, Directory, DirectoryEntry, ExtID, MetadataAuthority, MetadataAuthorityType, MetadataFetcher, Origin, OriginVisit, OriginVisitStatus, RawExtrinsicMetadata, Release, Revision, Sha1, Sha1Git, SkippedContent, Snapshot, SnapshotBranch, TargetType, ) from swh.model.swhids import ExtendedObjectType, ExtendedSWHID, ObjectType from swh.storage.exc import HashCollision, StorageArgumentException, StorageDBError from swh.storage.interface import ( VISIT_STATUSES, ListOrder, PagedResult, PartialBranches, ) from swh.storage.metrics import process_metrics, send_metric, timed from swh.storage.objstorage import ObjStorage from swh.storage.utils import ( extract_collision_hash, get_partition_bounds_bytes, map_optional, now, ) from swh.storage.writer import JournalWriter from . import converters from .db import Db # Max block size of contents to return BULK_BLOCK_CONTENT_LEN_MAX = 10000 EMPTY_SNAPSHOT_ID = hash_to_bytes("1a8893e6a86f444e8be8e7bda6cb34fb1735a00e") """Identifier for the empty snapshot""" VALIDATION_EXCEPTIONS = ( KeyError, TypeError, ValueError, psycopg2.errors.CheckViolation, psycopg2.errors.IntegrityError, psycopg2.errors.InvalidTextRepresentation, psycopg2.errors.NotNullViolation, psycopg2.errors.NumericValueOutOfRange, psycopg2.errors.UndefinedFunction, # (raised on wrong argument typs) ) """Exceptions raised by postgresql when validation of the arguments failed.""" @contextlib.contextmanager def convert_validation_exceptions(): """Catches postgresql errors related to invalid arguments, and re-raises a StorageArgumentException.""" try: yield except psycopg2.errors.UniqueViolation: # This only happens because of concurrent insertions, but it is # a subclass of IntegrityError; so we need to catch and reraise it # before the next clause converts it to StorageArgumentException. raise except VALIDATION_EXCEPTIONS as e: raise StorageArgumentException(str(e)) class Storage: """SWH storage proxy, encompassing DB and object storage """ def __init__( self, db, objstorage, min_pool_conns=1, max_pool_conns=10, journal_writer=None ): """ Args: db_conn: either a libpq connection string, or a psycopg2 connection obj_root: path to the root of the object storage """ try: if isinstance(db, psycopg2.extensions.connection): self._pool = None self._db = Db(db) # See comment below self._db.cursor().execute("SET TIME ZONE 'UTC'") else: self._pool = psycopg2.pool.ThreadedConnectionPool( min_pool_conns, max_pool_conns, db ) self._db = None except psycopg2.OperationalError as e: raise StorageDBError(e) self.journal_writer = JournalWriter(journal_writer) self.objstorage = ObjStorage(objstorage) def get_db(self): if self._db: return self._db else: db = Db.from_pool(self._pool) # Workaround for psycopg2 < 2.9.0 not handling fractional timezones, # which may happen on old revision/release dates on systems configured # with non-UTC timezones. # https://www.psycopg.org/docs/usage.html#time-zones-handling db.cursor().execute("SET TIME ZONE 'UTC'") return db def put_db(self, db): if db is not self._db: db.put_conn() @contextmanager def db(self): db = None try: db = self.get_db() yield db finally: if db: self.put_db(db) @timed @db_transaction() def check_config(self, *, check_write: bool, db: Db, cur=None) -> bool: if not self.objstorage.check_config(check_write=check_write): return False if not db.check_dbversion(): return False # Check permissions on one of the tables if check_write: check = "INSERT" else: check = "SELECT" cur.execute("select has_table_privilege(current_user, 'content', %s)", (check,)) return cur.fetchone()[0] + @db_transaction() + def get_current_version(self, *, db: Db, cur=None): + """Returns the current code (expected) version""" + return db.current_version + def _content_unique_key(self, hash, db): """Given a hash (tuple or dict), return a unique key from the aggregation of keys. """ keys = db.content_hash_keys if isinstance(hash, tuple): return hash return tuple([hash[k] for k in keys]) def _content_add_metadata(self, db, cur, content): """Add content to the postgresql database but not the object storage. """ # create temporary table for metadata injection db.mktemp("content", cur) db.copy_to( (c.to_dict() for c in content), "tmp_content", db.content_add_keys, cur ) # move metadata in place try: db.content_add_from_temp(cur) except psycopg2.IntegrityError as e: if e.diag.sqlstate == "23505" and e.diag.table_name == "content": message_detail = e.diag.message_detail if message_detail: hash_name, hash_id = extract_collision_hash(message_detail) collision_contents_hashes = [ c.hashes() for c in content if c.get_hash(hash_name) == hash_id ] else: constraint_to_hash_name = { "content_pkey": "sha1", "content_sha1_git_idx": "sha1_git", "content_sha256_idx": "sha256", } hash_name = constraint_to_hash_name.get(e.diag.constraint_name) hash_id = None collision_contents_hashes = None raise HashCollision( hash_name, hash_id, collision_contents_hashes ) from None else: raise @timed @process_metrics def content_add(self, content: List[Content]) -> Dict[str, int]: ctime = now() contents = [attr.evolve(c, ctime=ctime) for c in content] # Must add to the objstorage before the DB and journal. Otherwise: # 1. in case of a crash the DB may "believe" we have the content, but # we didn't have time to write to the objstorage before the crash # 2. the objstorage mirroring, which reads from the journal, may attempt to # read from the objstorage before we finished writing it objstorage_summary = self.objstorage.content_add(contents) with self.db() as db: with db.transaction() as cur: missing = list( self.content_missing( map(Content.to_dict, contents), key_hash="sha1_git", db=db, cur=cur, ) ) contents = [c for c in contents if c.sha1_git in missing] self.journal_writer.content_add(contents) self._content_add_metadata(db, cur, contents) return { "content:add": len(contents), "content:add:bytes": objstorage_summary["content:add:bytes"], } @timed @db_transaction() def content_update( self, contents: List[Dict[str, Any]], keys: List[str] = [], *, db: Db, cur=None ) -> None: # TODO: Add a check on input keys. How to properly implement # this? We don't know yet the new columns. self.journal_writer.content_update(contents) db.mktemp("content", cur) select_keys = list(set(db.content_get_metadata_keys).union(set(keys))) with convert_validation_exceptions(): db.copy_to(contents, "tmp_content", select_keys, cur) db.content_update_from_temp(keys_to_update=keys, cur=cur) @timed @process_metrics @db_transaction() def content_add_metadata( self, content: List[Content], *, db: Db, cur=None ) -> Dict[str, int]: missing = self.content_missing( (c.to_dict() for c in content), key_hash="sha1_git", db=db, cur=cur, ) contents = [c for c in content if c.sha1_git in missing] self.journal_writer.content_add_metadata(contents) self._content_add_metadata(db, cur, contents) return { "content:add": len(contents), } @timed def content_get_data(self, content: Sha1) -> Optional[bytes]: # FIXME: Make this method support slicing the `data` return self.objstorage.content_get(content) @timed @db_transaction() def content_get_partition( self, partition_id: int, nb_partitions: int, page_token: Optional[str] = None, limit: int = 1000, *, db: Db, cur=None, ) -> PagedResult[Content]: if limit is None: raise StorageArgumentException("limit should not be None") (start, end) = get_partition_bounds_bytes( partition_id, nb_partitions, SHA1_SIZE ) if page_token: start = hash_to_bytes(page_token) if end is None: end = b"\xff" * SHA1_SIZE next_page_token: Optional[str] = None contents = [] for counter, row in enumerate(db.content_get_range(start, end, limit + 1, cur)): row_d = dict(zip(db.content_get_metadata_keys, row)) content = Content(**row_d) if counter >= limit: # take the last content for the next page starting from this next_page_token = hash_to_hex(content.sha1) break contents.append(content) assert len(contents) <= limit return PagedResult(results=contents, next_page_token=next_page_token) @timed @db_transaction(statement_timeout=500) def content_get( self, contents: List[bytes], algo: str = "sha1", *, db: Db, cur=None ) -> List[Optional[Content]]: contents_by_hash: Dict[bytes, Optional[Content]] = {} if algo not in DEFAULT_ALGORITHMS: raise StorageArgumentException( "algo should be one of {','.join(DEFAULT_ALGORITHMS)}" ) rows = db.content_get_metadata_from_hashes(contents, algo, cur) key = operator.attrgetter(algo) for row in rows: row_d = dict(zip(db.content_get_metadata_keys, row)) content = Content(**row_d) contents_by_hash[key(content)] = content return [contents_by_hash.get(sha1) for sha1 in contents] @timed @db_transaction_generator() def content_missing( self, contents: List[Dict[str, Any]], key_hash: str = "sha1", *, db: Db, cur=None, ) -> Iterable[bytes]: if key_hash not in DEFAULT_ALGORITHMS: raise StorageArgumentException( "key_hash should be one of {','.join(DEFAULT_ALGORITHMS)}" ) keys = db.content_hash_keys key_hash_idx = keys.index(key_hash) for obj in db.content_missing_from_list(contents, cur): yield obj[key_hash_idx] @timed @db_transaction_generator() def content_missing_per_sha1( self, contents: List[bytes], *, db: Db, cur=None ) -> Iterable[bytes]: for obj in db.content_missing_per_sha1(contents, cur): yield obj[0] @timed @db_transaction_generator() def content_missing_per_sha1_git( self, contents: List[bytes], *, db: Db, cur=None ) -> Iterable[Sha1Git]: for obj in db.content_missing_per_sha1_git(contents, cur): yield obj[0] @timed @db_transaction() def content_find( self, content: Dict[str, Any], *, db: Db, cur=None ) -> List[Content]: if not set(content).intersection(DEFAULT_ALGORITHMS): raise StorageArgumentException( "content keys must contain at least one " f"of: {', '.join(sorted(DEFAULT_ALGORITHMS))}" ) rows = db.content_find( sha1=content.get("sha1"), sha1_git=content.get("sha1_git"), sha256=content.get("sha256"), blake2s256=content.get("blake2s256"), cur=cur, ) contents = [] for row in rows: row_d = dict(zip(db.content_find_cols, row)) contents.append(Content(**row_d)) return contents @timed @db_transaction() def content_get_random(self, *, db: Db, cur=None) -> Sha1Git: return db.content_get_random(cur) @staticmethod def _skipped_content_normalize(d): d = d.copy() if d.get("status") is None: d["status"] = "absent" if d.get("length") is None: d["length"] = -1 return d def _skipped_content_add_metadata(self, db, cur, content: List[SkippedContent]): origin_ids = db.origin_id_get_by_url([cont.origin for cont in content], cur=cur) content = [ attr.evolve(c, origin=origin_id) for (c, origin_id) in zip(content, origin_ids) ] db.mktemp("skipped_content", cur) db.copy_to( [c.to_dict() for c in content], "tmp_skipped_content", db.skipped_content_keys, cur, ) # move metadata in place db.skipped_content_add_from_temp(cur) @timed @process_metrics @db_transaction() def skipped_content_add( self, content: List[SkippedContent], *, db: Db, cur=None ) -> Dict[str, int]: ctime = now() content = [attr.evolve(c, ctime=ctime) for c in content] missing_contents = self.skipped_content_missing( (c.to_dict() for c in content), db=db, cur=cur, ) content = [ c for c in content if any( all( c.get_hash(algo) == missing_content.get(algo) for algo in DEFAULT_ALGORITHMS ) for missing_content in missing_contents ) ] self.journal_writer.skipped_content_add(content) self._skipped_content_add_metadata(db, cur, content) return { "skipped_content:add": len(content), } @timed @db_transaction_generator() def skipped_content_missing( self, contents: List[Dict[str, Any]], *, db: Db, cur=None ) -> Iterable[Dict[str, Any]]: contents = list(contents) for content in db.skipped_content_missing(contents, cur): yield dict(zip(db.content_hash_keys, content)) @timed @process_metrics @db_transaction() def directory_add( self, directories: List[Directory], *, db: Db, cur=None ) -> Dict[str, int]: summary = {"directory:add": 0} dirs = set() dir_entries: Dict[str, defaultdict] = { "file": defaultdict(list), "dir": defaultdict(list), "rev": defaultdict(list), } for cur_dir in directories: dir_id = cur_dir.id dirs.add(dir_id) for src_entry in cur_dir.entries: entry = src_entry.to_dict() entry["dir_id"] = dir_id dir_entries[entry["type"]][dir_id].append(entry) dirs_missing = set(self.directory_missing(dirs, db=db, cur=cur)) if not dirs_missing: return summary self.journal_writer.directory_add( dir_ for dir_ in directories if dir_.id in dirs_missing ) # Copy directory metadata dirs_missing_dict = ( {"id": dir_.id, "raw_manifest": dir_.raw_manifest} for dir_ in directories if dir_.id in dirs_missing ) db.mktemp("directory", cur) db.copy_to(dirs_missing_dict, "tmp_directory", ["id", "raw_manifest"], cur) # Copy entries for entry_type, entry_list in dir_entries.items(): entries = itertools.chain.from_iterable( entries_for_dir for dir_id, entries_for_dir in entry_list.items() if dir_id in dirs_missing ) db.mktemp_dir_entry(entry_type) db.copy_to( entries, "tmp_directory_entry_%s" % entry_type, ["target", "name", "perms", "dir_id"], cur, ) # Do the final copy db.directory_add_from_temp(cur) summary["directory:add"] = len(dirs_missing) return summary @timed @db_transaction_generator() def directory_missing( self, directories: List[Sha1Git], *, db: Db, cur=None ) -> Iterable[Sha1Git]: for obj in db.directory_missing_from_list(directories, cur): yield obj[0] @timed @db_transaction_generator(statement_timeout=20000) def directory_ls( self, directory: Sha1Git, recursive: bool = False, *, db: Db, cur=None ) -> Iterable[Dict[str, Any]]: if recursive: res_gen = db.directory_walk(directory, cur=cur) else: res_gen = db.directory_walk_one(directory, cur=cur) for line in res_gen: yield dict(zip(db.directory_ls_cols, line)) @timed @db_transaction(statement_timeout=2000) def directory_entry_get_by_path( self, directory: Sha1Git, paths: List[bytes], *, db: Db, cur=None ) -> Optional[Dict[str, Any]]: res = db.directory_entry_get_by_path(directory, paths, cur) return dict(zip(db.directory_ls_cols, res)) if res else None @timed @db_transaction() def directory_get_random(self, *, db: Db, cur=None) -> Sha1Git: return db.directory_get_random(cur) @db_transaction() def directory_get_entries( self, directory_id: Sha1Git, page_token: Optional[bytes] = None, limit: int = 1000, *, db: Db, cur=None, ) -> Optional[PagedResult[DirectoryEntry]]: if list(self.directory_missing([directory_id], db=db, cur=cur)): return None if page_token is not None: raise StorageArgumentException("Unsupported page token") # TODO: actually paginate rows = db.directory_get_entries(directory_id, cur=cur) return PagedResult( results=[ DirectoryEntry(**dict(zip(db.directory_get_entries_cols, row))) for row in rows ], next_page_token=None, ) @timed @db_transaction() def directory_get_raw_manifest( self, directory_ids: List[Sha1Git], *, db: Db, cur=None ) -> Dict[Sha1Git, Optional[bytes]]: return dict(db.directory_get_raw_manifest(directory_ids, cur=cur)) @timed @process_metrics @db_transaction() def revision_add( self, revisions: List[Revision], *, db: Db, cur=None ) -> Dict[str, int]: summary = {"revision:add": 0} revisions_missing = set( self.revision_missing( set(revision.id for revision in revisions), db=db, cur=cur ) ) if not revisions_missing: return summary db.mktemp_revision(cur) revisions_filtered = [ revision for revision in revisions if revision.id in revisions_missing ] self.journal_writer.revision_add(revisions_filtered) db_revisions_filtered = list(map(converters.revision_to_db, revisions_filtered)) parents_filtered: List[Dict[str, Any]] = [] with convert_validation_exceptions(): db.copy_to( db_revisions_filtered, "tmp_revision", db.revision_add_cols, cur, lambda rev: parents_filtered.extend(rev["parents"]), ) db.revision_add_from_temp(cur) db.copy_to( parents_filtered, "revision_history", ["id", "parent_id", "parent_rank"], cur, ) return {"revision:add": len(revisions_missing)} @timed @db_transaction_generator() def revision_missing( self, revisions: List[Sha1Git], *, db: Db, cur=None ) -> Iterable[Sha1Git]: if not revisions: return None for obj in db.revision_missing_from_list(revisions, cur): yield obj[0] @timed @db_transaction(statement_timeout=1000) def revision_get( self, revision_ids: List[Sha1Git], ignore_displayname: bool = False, *, db: Db, cur=None, ) -> List[Optional[Revision]]: revisions = [] for line in db.revision_get_from_list(revision_ids, ignore_displayname, cur): revision = converters.db_to_revision(dict(zip(db.revision_get_cols, line))) revisions.append(revision) return revisions @timed @db_transaction_generator(statement_timeout=2000) def revision_log( self, revisions: List[Sha1Git], ignore_displayname: bool = False, limit: Optional[int] = None, *, db: Db, cur=None, ) -> Iterable[Optional[Dict[str, Any]]]: for line in db.revision_log( revisions, ignore_displayname=ignore_displayname, limit=limit, cur=cur ): data = converters.db_to_revision(dict(zip(db.revision_get_cols, line))) if not data: yield None continue yield data.to_dict() @timed @db_transaction_generator(statement_timeout=2000) def revision_shortlog( self, revisions: List[Sha1Git], limit: Optional[int] = None, *, db: Db, cur=None ) -> Iterable[Optional[Tuple[Sha1Git, Tuple[Sha1Git, ...]]]]: yield from db.revision_shortlog(revisions, limit, cur) @timed @db_transaction() def revision_get_random(self, *, db: Db, cur=None) -> Sha1Git: return db.revision_get_random(cur) @timed @db_transaction() def extid_get_from_extid( self, id_type: str, ids: List[bytes], version: Optional[int] = None, *, db: Db, cur=None, ) -> List[ExtID]: extids = [] for row in db.extid_get_from_extid_list(id_type, ids, version=version, cur=cur): if row[0] is not None: extids.append(converters.db_to_extid(dict(zip(db.extid_cols, row)))) return extids @timed @db_transaction() def extid_get_from_target( self, target_type: ObjectType, ids: List[Sha1Git], extid_type: Optional[str] = None, extid_version: Optional[int] = None, *, db: Db, cur=None, ) -> List[ExtID]: extids = [] if (extid_version is not None and extid_type is None) or ( extid_version is None and extid_type is not None ): raise ValueError("You must provide both extid_type and extid_version") for row in db.extid_get_from_swhid_list( target_type.value, ids, extid_version=extid_version, extid_type=extid_type, cur=cur, ): if row[0] is not None: extids.append(converters.db_to_extid(dict(zip(db.extid_cols, row)))) return extids @timed @db_transaction() def extid_add(self, ids: List[ExtID], *, db: Db, cur=None) -> Dict[str, int]: extid = [ { "extid": extid.extid, "extid_type": extid.extid_type, "extid_version": getattr(extid, "extid_version", 0), "target": extid.target.object_id, "target_type": extid.target.object_type.name.lower(), # arghh } for extid in ids ] db.mktemp("extid", cur) self.journal_writer.extid_add(ids) db.copy_to(extid, "tmp_extid", db.extid_cols, cur) # move metadata in place db.extid_add_from_temp(cur) return {"extid:add": len(extid)} @timed @process_metrics @db_transaction() def release_add( self, releases: List[Release], *, db: Db, cur=None ) -> Dict[str, int]: summary = {"release:add": 0} release_ids = set(release.id for release in releases) releases_missing = set(self.release_missing(release_ids, db=db, cur=cur)) if not releases_missing: return summary db.mktemp_release(cur) releases_filtered = [ release for release in releases if release.id in releases_missing ] self.journal_writer.release_add(releases_filtered) db_releases_filtered = list(map(converters.release_to_db, releases_filtered)) with convert_validation_exceptions(): db.copy_to(db_releases_filtered, "tmp_release", db.release_add_cols, cur) db.release_add_from_temp(cur) return {"release:add": len(releases_missing)} @timed @db_transaction_generator() def release_missing( self, releases: List[Sha1Git], *, db: Db, cur=None ) -> Iterable[Sha1Git]: if not releases: return for obj in db.release_missing_from_list(releases, cur): yield obj[0] @timed @db_transaction(statement_timeout=500) def release_get( self, releases: List[Sha1Git], ignore_displayname: bool = False, *, db: Db, cur=None, ) -> List[Optional[Release]]: rels = [] for release in db.release_get_from_list(releases, ignore_displayname, cur): data = converters.db_to_release(dict(zip(db.release_get_cols, release))) rels.append(data if data else None) return rels @timed @db_transaction() def release_get_random(self, *, db: Db, cur=None) -> Sha1Git: return db.release_get_random(cur) @timed @process_metrics @db_transaction() def snapshot_add( self, snapshots: List[Snapshot], *, db: Db, cur=None ) -> Dict[str, int]: created_temp_table = False count = 0 for snapshot in snapshots: if not db.snapshot_exists(snapshot.id, cur): if not created_temp_table: db.mktemp_snapshot_branch(cur) created_temp_table = True with convert_validation_exceptions(): db.copy_to( ( { "name": name, "target": info.target if info else None, "target_type": ( info.target_type.value if info else None ), } for name, info in snapshot.branches.items() ), "tmp_snapshot_branch", ["name", "target", "target_type"], cur, ) self.journal_writer.snapshot_add([snapshot]) db.snapshot_add(snapshot.id, cur) count += 1 return {"snapshot:add": count} @timed @db_transaction_generator() def snapshot_missing( self, snapshots: List[Sha1Git], *, db: Db, cur=None ) -> Iterable[Sha1Git]: for obj in db.snapshot_missing_from_list(snapshots, cur): yield obj[0] @timed @db_transaction(statement_timeout=2000) def snapshot_get( self, snapshot_id: Sha1Git, *, db: Db, cur=None ) -> Optional[Dict[str, Any]]: d = self.snapshot_get_branches(snapshot_id) if d is None: return d return { "id": d["id"], "branches": { name: branch.to_dict() if branch else None for (name, branch) in d["branches"].items() }, "next_branch": d["next_branch"], } @timed @db_transaction(statement_timeout=2000) def snapshot_count_branches( self, snapshot_id: Sha1Git, branch_name_exclude_prefix: Optional[bytes] = None, *, db: Db, cur=None, ) -> Optional[Dict[Optional[str], int]]: return dict( [ bc for bc in db.snapshot_count_branches( snapshot_id, branch_name_exclude_prefix, cur, ) ] ) @timed @db_transaction(statement_timeout=2000) def snapshot_get_branches( self, snapshot_id: Sha1Git, branches_from: bytes = b"", branches_count: int = 1000, target_types: Optional[List[str]] = None, branch_name_include_substring: Optional[bytes] = None, branch_name_exclude_prefix: Optional[bytes] = None, *, db: Db, cur=None, ) -> Optional[PartialBranches]: if snapshot_id == EMPTY_SNAPSHOT_ID: return PartialBranches(id=snapshot_id, branches={}, next_branch=None,) if list(self.snapshot_missing([snapshot_id])): return None branches = {} next_branch = None fetched_branches = list( db.snapshot_get_by_id( snapshot_id, branches_from=branches_from, # the underlying SQL query can be quite expensive to execute for small # branches_count value, so we ensure a minimum branches limit of 10 for # optimal performances branches_count=max(branches_count + 1, 10), target_types=target_types, branch_name_include_substring=branch_name_include_substring, branch_name_exclude_prefix=branch_name_exclude_prefix, cur=cur, ) ) for row in fetched_branches[:branches_count]: branch_d = dict(zip(db.snapshot_get_cols, row)) del branch_d["snapshot_id"] name = branch_d.pop("name") if branch_d["target"] is None and branch_d["target_type"] is None: branch = None else: assert branch_d["target_type"] is not None branch = SnapshotBranch( target=branch_d["target"], target_type=TargetType(branch_d["target_type"]), ) branches[name] = branch if len(fetched_branches) > branches_count: next_branch = dict( zip(db.snapshot_get_cols, fetched_branches[branches_count]) )["name"] return PartialBranches( id=snapshot_id, branches=branches, next_branch=next_branch, ) @timed @db_transaction() def snapshot_get_random(self, *, db: Db, cur=None) -> Sha1Git: return db.snapshot_get_random(cur) @timed @db_transaction() def origin_visit_add( self, visits: List[OriginVisit], *, db: Db, cur=None ) -> Iterable[OriginVisit]: for visit in visits: origin = self.origin_get([visit.origin], db=db, cur=cur)[0] if not origin: # Cannot add a visit without an origin raise StorageArgumentException("Unknown origin %s", visit.origin) all_visits = [] nb_visits = 0 for visit in visits: nb_visits += 1 if not visit.visit: with convert_validation_exceptions(): visit_id = db.origin_visit_add( visit.origin, visit.date, visit.type, cur=cur ) visit = attr.evolve(visit, visit=visit_id) else: db.origin_visit_add_with_id(visit, cur=cur) assert visit.visit is not None all_visits.append(visit) # Forced to write after for the case when the visit has no id self.journal_writer.origin_visit_add([visit]) visit_status = OriginVisitStatus( origin=visit.origin, visit=visit.visit, date=visit.date, type=visit.type, status="created", snapshot=None, ) self._origin_visit_status_add(visit_status, db=db, cur=cur) send_metric("origin_visit:add", count=nb_visits, method_name="origin_visit") return all_visits def _origin_visit_status_add( self, visit_status: OriginVisitStatus, db, cur ) -> None: """Add an origin visit status""" self.journal_writer.origin_visit_status_add([visit_status]) db.origin_visit_status_add(visit_status, cur=cur) @timed @process_metrics @db_transaction() def origin_visit_status_add( self, visit_statuses: List[OriginVisitStatus], *, db: Db, cur=None, ) -> Dict[str, int]: visit_statuses_ = [] # First round to check existence (fail early if any is ko) for visit_status in visit_statuses: origin_url = self.origin_get([visit_status.origin], db=db, cur=cur)[0] if not origin_url: raise StorageArgumentException(f"Unknown origin {visit_status.origin}") if visit_status.type is None: origin_visit = self.origin_visit_get_by( visit_status.origin, visit_status.visit, db=db, cur=cur ) if origin_visit is None: raise StorageArgumentException( f"Unknown origin visit {visit_status.visit} " f"of origin {visit_status.origin}" ) origin_visit_status = attr.evolve(visit_status, type=origin_visit.type) else: origin_visit_status = visit_status visit_statuses_.append(origin_visit_status) for visit_status in visit_statuses_: self._origin_visit_status_add(visit_status, db, cur) return {"origin_visit_status:add": len(visit_statuses_)} @timed @db_transaction() def origin_visit_status_get_latest( self, origin_url: str, visit: int, allowed_statuses: Optional[List[str]] = None, require_snapshot: bool = False, *, db: Db, cur=None, ) -> Optional[OriginVisitStatus]: if allowed_statuses and not set(allowed_statuses).intersection(VISIT_STATUSES): raise StorageArgumentException( f"Unknown allowed statuses {','.join(allowed_statuses)}, only " f"{','.join(VISIT_STATUSES)} authorized" ) row_d = db.origin_visit_status_get_latest( origin_url, visit, allowed_statuses, require_snapshot, cur=cur ) if not row_d: return None return OriginVisitStatus(**row_d) @timed @db_transaction(statement_timeout=500) def origin_visit_get( self, origin: str, page_token: Optional[str] = None, order: ListOrder = ListOrder.ASC, limit: int = 10, *, db: Db, cur=None, ) -> PagedResult[OriginVisit]: page_token = page_token or "0" if not isinstance(order, ListOrder): raise StorageArgumentException("order must be a ListOrder value") if not isinstance(page_token, str): raise StorageArgumentException("page_token must be a string.") next_page_token = None visit_from = int(page_token) visits: List[OriginVisit] = [] extra_limit = limit + 1 for row in db.origin_visit_get_range( origin, visit_from=visit_from, order=order, limit=extra_limit, cur=cur ): row_d = dict(zip(db.origin_visit_cols, row)) visits.append( OriginVisit( origin=row_d["origin"], visit=row_d["visit"], date=row_d["date"], type=row_d["type"], ) ) assert len(visits) <= extra_limit if len(visits) == extra_limit: visits = visits[:limit] next_page_token = str(visits[-1].visit) return PagedResult(results=visits, next_page_token=next_page_token) @timed @db_transaction(statement_timeout=500) def origin_visit_find_by_date( self, origin: str, visit_date: datetime.datetime, *, db: Db, cur=None ) -> Optional[OriginVisit]: row_d = db.origin_visit_find_by_date(origin, visit_date, cur=cur) if not row_d: return None return OriginVisit( origin=row_d["origin"], visit=row_d["visit"], date=row_d["date"], type=row_d["type"], ) @timed @db_transaction(statement_timeout=500) def origin_visit_get_by( self, origin: str, visit: int, *, db: Db, cur=None ) -> Optional[OriginVisit]: row = db.origin_visit_get(origin, visit, cur) if row: row_d = dict(zip(db.origin_visit_get_cols, row)) return OriginVisit( origin=row_d["origin"], visit=row_d["visit"], date=row_d["date"], type=row_d["type"], ) return None @timed @db_transaction(statement_timeout=4000) def origin_visit_get_latest( self, origin: str, type: Optional[str] = None, allowed_statuses: Optional[List[str]] = None, require_snapshot: bool = False, *, db: Db, cur=None, ) -> Optional[OriginVisit]: if allowed_statuses and not set(allowed_statuses).intersection(VISIT_STATUSES): raise StorageArgumentException( f"Unknown allowed statuses {','.join(allowed_statuses)}, only " f"{','.join(VISIT_STATUSES)} authorized" ) row = db.origin_visit_get_latest( origin, type=type, allowed_statuses=allowed_statuses, require_snapshot=require_snapshot, cur=cur, ) if row: row_d = dict(zip(db.origin_visit_get_cols, row)) visit = OriginVisit( origin=row_d["origin"], visit=row_d["visit"], date=row_d["date"], type=row_d["type"], ) return visit return None @timed @db_transaction(statement_timeout=500) def origin_visit_status_get( self, origin: str, visit: int, page_token: Optional[str] = None, order: ListOrder = ListOrder.ASC, limit: int = 10, *, db: Db, cur=None, ) -> PagedResult[OriginVisitStatus]: next_page_token = None date_from = None if page_token is not None: date_from = datetime.datetime.fromisoformat(page_token) visit_statuses: List[OriginVisitStatus] = [] # Take one more visit status so we can reuse it as the next page token if any for row in db.origin_visit_status_get_range( origin, visit, date_from=date_from, order=order, limit=limit + 1, cur=cur, ): row_d = dict(zip(db.origin_visit_status_cols, row)) visit_statuses.append(OriginVisitStatus(**row_d)) if len(visit_statuses) > limit: # last visit status date is the next page token next_page_token = str(visit_statuses[-1].date) # excluding that visit status from the result to respect the limit size visit_statuses = visit_statuses[:limit] return PagedResult(results=visit_statuses, next_page_token=next_page_token) @timed @db_transaction() def origin_visit_status_get_random( self, type: str, *, db: Db, cur=None ) -> Optional[OriginVisitStatus]: row = db.origin_visit_get_random(type, cur) if row is not None: row_d = dict(zip(db.origin_visit_status_cols, row)) return OriginVisitStatus(**row_d) return None @timed @db_transaction(statement_timeout=2000) def object_find_by_sha1_git( self, ids: List[Sha1Git], *, db: Db, cur=None ) -> Dict[Sha1Git, List[Dict]]: ret: Dict[Sha1Git, List[Dict]] = {id: [] for id in ids} for retval in db.object_find_by_sha1_git(ids, cur=cur): if retval[1]: ret[retval[0]].append( dict(zip(db.object_find_by_sha1_git_cols, retval)) ) return ret @timed @db_transaction(statement_timeout=500) def origin_get( self, origins: List[str], *, db: Db, cur=None ) -> Iterable[Optional[Origin]]: rows = db.origin_get_by_url(origins, cur) result: List[Optional[Origin]] = [] for row in rows: origin_d = dict(zip(db.origin_cols, row)) url = origin_d["url"] result.append(None if url is None else Origin(url=url)) return result @timed @db_transaction(statement_timeout=500) def origin_get_by_sha1( self, sha1s: List[bytes], *, db: Db, cur=None ) -> List[Optional[Dict[str, Any]]]: return [ dict(zip(db.origin_cols, row)) if row[0] else None for row in db.origin_get_by_sha1(sha1s, cur) ] @timed @db_transaction_generator() def origin_get_range(self, origin_from=1, origin_count=100, *, db: Db, cur=None): for origin in db.origin_get_range(origin_from, origin_count, cur): yield dict(zip(db.origin_get_range_cols, origin)) @timed @db_transaction() def origin_list( self, page_token: Optional[str] = None, limit: int = 100, *, db: Db, cur=None ) -> PagedResult[Origin]: page_token = page_token or "0" if not isinstance(page_token, str): raise StorageArgumentException("page_token must be a string.") origin_from = int(page_token) next_page_token = None origins: List[Origin] = [] # Take one more origin so we can reuse it as the next page token if any for row_d in self.origin_get_range(origin_from, limit + 1, db=db, cur=cur): origins.append(Origin(url=row_d["url"])) # keep the last_id for the pagination if needed last_id = row_d["id"] if len(origins) > limit: # data left for subsequent call # last origin id is the next page token next_page_token = str(last_id) # excluding that origin from the result to respect the limit size origins = origins[:limit] assert len(origins) <= limit return PagedResult(results=origins, next_page_token=next_page_token) @timed @db_transaction() def origin_search( self, url_pattern: str, page_token: Optional[str] = None, limit: int = 50, regexp: bool = False, with_visit: bool = False, visit_types: Optional[List[str]] = None, *, db: Db, cur=None, ) -> PagedResult[Origin]: next_page_token = None offset = int(page_token) if page_token else 0 origins = [] # Take one more origin so we can reuse it as the next page token if any for origin in db.origin_search( url_pattern, offset, limit + 1, regexp, with_visit, visit_types, cur ): row_d = dict(zip(db.origin_cols, origin)) origins.append(Origin(url=row_d["url"])) if len(origins) > limit: # next offset next_page_token = str(offset + limit) # excluding that origin from the result to respect the limit size origins = origins[:limit] assert len(origins) <= limit return PagedResult(results=origins, next_page_token=next_page_token) @timed @db_transaction() def origin_count( self, url_pattern: str, regexp: bool = False, with_visit: bool = False, *, db: Db, cur=None, ) -> int: return db.origin_count(url_pattern, regexp, with_visit, cur) @timed @db_transaction() def origin_snapshot_get_all( self, origin_url: str, *, db: Db, cur=None ) -> List[Sha1Git]: return list(db.origin_snapshot_get_all(origin_url, cur)) @timed @process_metrics @db_transaction() def origin_add(self, origins: List[Origin], *, db: Db, cur=None) -> Dict[str, int]: urls = [o.url for o in origins] known_origins = set(url for (url,) in db.origin_get_by_url(urls, cur)) # keep only one occurrence of each given origin while keeping the list # sorted as originally given to_add = sorted(set(urls) - known_origins, key=urls.index) self.journal_writer.origin_add([Origin(url=url) for url in to_add]) added = 0 for url in to_add: if db.origin_add(url, cur): added += 1 return {"origin:add": added} @db_transaction(statement_timeout=500) def stat_counters(self, *, db: Db, cur=None): return {k: v for (k, v) in db.stat_counters()} @db_transaction() def refresh_stat_counters(self, *, db: Db, cur=None): keys = [ "content", "directory", "directory_entry_dir", "directory_entry_file", "directory_entry_rev", "origin", "origin_visit", "person", "release", "revision", "revision_history", "skipped_content", "snapshot", ] for key in keys: cur.execute("select * from swh_update_counter(%s)", (key,)) @timed @process_metrics @db_transaction() def raw_extrinsic_metadata_add( self, metadata: List[RawExtrinsicMetadata], db, cur, ) -> Dict[str, int]: metadata = list(metadata) self.journal_writer.raw_extrinsic_metadata_add(metadata) counter = Counter[ExtendedObjectType]() for metadata_entry in metadata: authority_id = self._get_authority_id(metadata_entry.authority, db, cur) fetcher_id = self._get_fetcher_id(metadata_entry.fetcher, db, cur) db.raw_extrinsic_metadata_add( id=metadata_entry.id, type=metadata_entry.target.object_type.name.lower(), target=str(metadata_entry.target), discovery_date=metadata_entry.discovery_date, authority_id=authority_id, fetcher_id=fetcher_id, format=metadata_entry.format, metadata=metadata_entry.metadata, origin=metadata_entry.origin, visit=metadata_entry.visit, snapshot=map_optional(str, metadata_entry.snapshot), release=map_optional(str, metadata_entry.release), revision=map_optional(str, metadata_entry.revision), path=metadata_entry.path, directory=map_optional(str, metadata_entry.directory), cur=cur, ) counter[metadata_entry.target.object_type] += 1 return { f"{type.value}_metadata:add": count for (type, count) in counter.items() } @db_transaction() def raw_extrinsic_metadata_get( self, target: ExtendedSWHID, authority: MetadataAuthority, after: Optional[datetime.datetime] = None, page_token: Optional[bytes] = None, limit: int = 1000, *, db: Db, cur=None, ) -> PagedResult[RawExtrinsicMetadata]: if page_token: (after_time, after_fetcher) = msgpack_loads(base64.b64decode(page_token)) if after and after_time < after: raise StorageArgumentException( "page_token is inconsistent with the value of 'after'." ) else: after_time = after after_fetcher = None authority_id = self._get_authority_id(authority, db, cur) if not authority_id: return PagedResult(next_page_token=None, results=[],) rows = db.raw_extrinsic_metadata_get( str(target), authority_id, after_time, after_fetcher, limit + 1, cur, ) rows = [dict(zip(db.raw_extrinsic_metadata_get_cols, row)) for row in rows] results = [] for row in rows: assert str(target) == row["raw_extrinsic_metadata.target"] results.append(converters.db_to_raw_extrinsic_metadata(row)) if len(results) > limit: results.pop() assert len(results) == limit last_returned_row = rows[-2] # rows[-1] corresponds to the popped result next_page_token: Optional[str] = base64.b64encode( msgpack_dumps( ( last_returned_row["discovery_date"], last_returned_row["metadata_fetcher.id"], ) ) ).decode() else: next_page_token = None return PagedResult(next_page_token=next_page_token, results=results,) @db_transaction() def raw_extrinsic_metadata_get_by_ids( self, ids: List[Sha1Git], *, db: Db, cur=None, ) -> List[RawExtrinsicMetadata]: return [ converters.db_to_raw_extrinsic_metadata( dict(zip(db.raw_extrinsic_metadata_get_cols, row)) ) for row in db.raw_extrinsic_metadata_get_by_ids(ids) ] @db_transaction() def raw_extrinsic_metadata_get_authorities( self, target: ExtendedSWHID, *, db: Db, cur=None, ) -> List[MetadataAuthority]: return [ MetadataAuthority( type=MetadataAuthorityType(authority_type), url=authority_url ) for ( authority_type, authority_url, ) in db.raw_extrinsic_metadata_get_authorities(str(target), cur) ] @timed @process_metrics @db_transaction() def metadata_fetcher_add( self, fetchers: List[MetadataFetcher], *, db: Db, cur=None ) -> Dict[str, int]: fetchers = list(fetchers) self.journal_writer.metadata_fetcher_add(fetchers) count = 0 for fetcher in fetchers: db.metadata_fetcher_add(fetcher.name, fetcher.version, cur=cur) count += 1 return {"metadata_fetcher:add": count} @timed @db_transaction(statement_timeout=500) def metadata_fetcher_get( self, name: str, version: str, *, db: Db, cur=None ) -> Optional[MetadataFetcher]: row = db.metadata_fetcher_get(name, version, cur=cur) if not row: return None return MetadataFetcher.from_dict(dict(zip(db.metadata_fetcher_cols, row))) @timed @process_metrics @db_transaction() def metadata_authority_add( self, authorities: List[MetadataAuthority], *, db: Db, cur=None ) -> Dict[str, int]: authorities = list(authorities) self.journal_writer.metadata_authority_add(authorities) count = 0 for authority in authorities: db.metadata_authority_add(authority.type.value, authority.url, cur=cur) count += 1 return {"metadata_authority:add": count} @timed @db_transaction() def metadata_authority_get( self, type: MetadataAuthorityType, url: str, *, db: Db, cur=None ) -> Optional[MetadataAuthority]: row = db.metadata_authority_get(type.value, url, cur=cur) if not row: return None return MetadataAuthority.from_dict(dict(zip(db.metadata_authority_cols, row))) def clear_buffers(self, object_types: Sequence[str] = ()) -> None: """Do nothing """ return None def flush(self, object_types: Sequence[str] = ()) -> Dict[str, int]: return {} def _get_authority_id(self, authority: MetadataAuthority, db, cur): authority_id = db.metadata_authority_get_id( authority.type.value, authority.url, cur ) if not authority_id: raise StorageArgumentException(f"Unknown authority {authority}") return authority_id def _get_fetcher_id(self, fetcher: MetadataFetcher, db, cur): fetcher_id = db.metadata_fetcher_get_id(fetcher.name, fetcher.version, cur) if not fetcher_id: raise StorageArgumentException(f"Unknown fetcher {fetcher}") return fetcher_id diff --git a/swh/storage/pytest_plugin.py b/swh/storage/pytest_plugin.py index 604a3d90..26d14ee1 100644 --- a/swh/storage/pytest_plugin.py +++ b/swh/storage/pytest_plugin.py @@ -1,54 +1,64 @@ # Copyright (C) 2019-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from os import environ, path +from functools import partial +from os import environ import pytest +from pytest_postgresql import factories -from swh.core.db.pytest_plugin import postgresql_fact -import swh.storage +from swh.core.db.pytest_plugin import initialize_database_for_module, postgresql_fact from swh.storage import get_storage +from swh.storage.postgresql.db import Db as StorageDb from swh.storage.tests.storage_data import StorageData -SQL_DIR = path.join(path.dirname(swh.storage.__file__), "sql") - environ["LC_ALL"] = "C.UTF-8" -swh_storage_postgresql = postgresql_fact( - "postgresql_proc", dbname="storage", dump_files=path.join(SQL_DIR, "*.sql") +swh_storage_postgresql_proc = factories.postgresql_proc( + dbname="storage", + load=[ + partial( + initialize_database_for_module, + modname="storage", + version=StorageDb.current_version, + ) + ], ) +swh_storage_postgresql = postgresql_fact("swh_storage_postgresql_proc") + + @pytest.fixture def swh_storage_backend_config(swh_storage_postgresql): """Basic pg storage configuration with no journal collaborator (to avoid pulling optional dependency on clients of this fixture) """ yield { "cls": "postgresql", "db": swh_storage_postgresql.dsn, "objstorage": {"cls": "memory"}, "check_config": {"check_write": True}, } @pytest.fixture def swh_storage(swh_storage_backend_config): return get_storage(**swh_storage_backend_config) @pytest.fixture def sample_data() -> StorageData: """Pre-defined sample storage object data to manipulate Returns: StorageData whose attribute keys are data model objects. Either multiple objects: contents, directories, revisions, releases, ... or simple ones: content, directory, revision, release, ... """ return StorageData() diff --git a/swh/storage/sql/30-schema.sql b/swh/storage/sql/30-schema.sql index 3708a248..063f9dd1 100644 --- a/swh/storage/sql/30-schema.sql +++ b/swh/storage/sql/30-schema.sql @@ -1,532 +1,517 @@ --- --- SQL implementation of the Software Heritage data model --- --- schema versions -create table dbversion -( - version int primary key, - release timestamptz, - description text -); - -comment on table dbversion is 'Details of current db version'; -comment on column dbversion.version is 'SQL schema version'; -comment on column dbversion.release is 'Version deployment timestamp'; -comment on column dbversion.description is 'Release description'; - --- latest schema version -insert into dbversion(version, release, description) - values(182, now(), 'Work In Progress'); +-- schema versions table (dbversion) is now created by swh.core.db directly -- a SHA1 checksum create domain sha1 as bytea check (length(value) = 20); -- a Git object ID, i.e., a Git-style salted SHA1 checksum create domain sha1_git as bytea check (length(value) = 20); -- a SHA256 checksum create domain sha256 as bytea check (length(value) = 32); -- a blake2 checksum create domain blake2s256 as bytea check (length(value) = 32); -- UNIX path (absolute, relative, individual path component, etc.) create domain unix_path as bytea; -- a set of UNIX-like access permissions, as manipulated by, e.g., chmod create domain file_perms as int; -- an SWHID create domain swhid as text check (value ~ '^swh:[0-9]+:.*'); -- Checksums about actual file content. Note that the content itself is not -- stored in the DB, but on external (key-value) storage. A single checksum is -- used as key there, but the other can be used to verify that we do not inject -- content collisions not knowingly. create table content ( sha1 sha1 not null, sha1_git sha1_git not null, sha256 sha256 not null, blake2s256 blake2s256 not null, length bigint not null, ctime timestamptz not null default now(), -- creation time, i.e. time of (first) injection into the storage status content_status not null default 'visible', object_id bigserial ); comment on table content is 'Checksums of file content which is actually stored externally'; comment on column content.sha1 is 'Content sha1 hash'; comment on column content.sha1_git is 'Git object sha1 hash'; comment on column content.sha256 is 'Content Sha256 hash'; comment on column content.blake2s256 is 'Content blake2s hash'; comment on column content.length is 'Content length'; comment on column content.ctime is 'First seen time'; comment on column content.status is 'Content status (absent, visible, hidden)'; comment on column content.object_id is 'Content identifier'; -- An origin is a place, identified by an URL, where software source code -- artifacts can be found. We support different kinds of origins, e.g., git and -- other VCS repositories, web pages that list tarballs URLs (e.g., -- http://www.kernel.org), indirect tarball URLs (e.g., -- http://www.example.org/latest.tar.gz), etc. The key feature of an origin is -- that it can be *fetched* from (wget, git clone, svn checkout, etc.) to -- retrieve all the contained software. create table origin ( id bigserial not null, url text not null ); comment on column origin.id is 'Artifact origin id'; comment on column origin.url is 'URL of origin'; -- Content blobs observed somewhere, but not ingested into the archive for -- whatever reason. This table is separate from the content table as we might -- not have the sha1 checksum of skipped contents (for instance when we inject -- git repositories, objects that are too big will be skipped here, and we will -- only know their sha1_git). 'reason' contains the reason the content was -- skipped. origin is a nullable column allowing to find out which origin -- contains that skipped content. create table skipped_content ( sha1 sha1, sha1_git sha1_git, sha256 sha256, blake2s256 blake2s256, length bigint not null, ctime timestamptz not null default now(), status content_status not null default 'absent', reason text not null, origin bigint, object_id bigserial ); comment on table skipped_content is 'Content blobs observed, but not ingested in the archive'; comment on column skipped_content.sha1 is 'Skipped content sha1 hash'; comment on column skipped_content.sha1_git is 'Git object sha1 hash'; comment on column skipped_content.sha256 is 'Skipped content sha256 hash'; comment on column skipped_content.blake2s256 is 'Skipped content blake2s hash'; comment on column skipped_content.length is 'Skipped content length'; comment on column skipped_content.ctime is 'First seen time'; comment on column skipped_content.status is 'Skipped content status (absent, visible, hidden)'; comment on column skipped_content.reason is 'Reason for skipping'; comment on column skipped_content.origin is 'Origin table identifier'; comment on column skipped_content.object_id is 'Skipped content identifier'; -- A file-system directory. A directory is a list of directory entries (see -- tables: directory_entry_{dir,file}). -- -- To list the contents of a directory: -- 1. list the contained directory_entry_dir using array dir_entries -- 2. list the contained directory_entry_file using array file_entries -- 3. list the contained directory_entry_rev using array rev_entries -- 4. UNION -- -- Synonyms/mappings: -- * git: tree create table directory ( id sha1_git not null, dir_entries bigint[], -- sub-directories, reference directory_entry_dir file_entries bigint[], -- contained files, reference directory_entry_file rev_entries bigint[], -- mounted revisions, reference directory_entry_rev object_id bigserial, -- short object identifier raw_manifest bytea -- git manifest of the object, if it cannot be represented using only the other fields ); comment on table directory is 'Contents of a directory, synonymous to tree (git)'; comment on column directory.id is 'Git object sha1 hash'; comment on column directory.dir_entries is 'Sub-directories, reference directory_entry_dir'; comment on column directory.file_entries is 'Contained files, reference directory_entry_file'; comment on column directory.rev_entries is 'Mounted revisions, reference directory_entry_rev'; comment on column directory.object_id is 'Short object identifier'; comment on column directory.raw_manifest is 'git manifest of the object, if it cannot be represented using only the other fields'; -- A directory entry pointing to a (sub-)directory. create table directory_entry_dir ( id bigserial, target sha1_git not null, -- id of target directory name unix_path not null, -- path name, relative to containing dir perms file_perms not null -- unix-like permissions ); comment on table directory_entry_dir is 'Directory entry for directory'; comment on column directory_entry_dir.id is 'Directory identifier'; comment on column directory_entry_dir.target is 'Target directory identifier'; comment on column directory_entry_dir.name is 'Path name, relative to containing directory'; comment on column directory_entry_dir.perms is 'Unix-like permissions'; -- A directory entry pointing to a file content. create table directory_entry_file ( id bigserial, target sha1_git not null, -- id of target file name unix_path not null, -- path name, relative to containing dir perms file_perms not null -- unix-like permissions ); comment on table directory_entry_file is 'Directory entry for file'; comment on column directory_entry_file.id is 'File identifier'; comment on column directory_entry_file.target is 'Target file identifier'; comment on column directory_entry_file.name is 'Path name, relative to containing directory'; comment on column directory_entry_file.perms is 'Unix-like permissions'; -- A directory entry pointing to a revision. create table directory_entry_rev ( id bigserial, target sha1_git not null, -- id of target revision name unix_path not null, -- path name, relative to containing dir perms file_perms not null -- unix-like permissions ); comment on table directory_entry_rev is 'Directory entry for revision'; comment on column directory_entry_dir.id is 'Revision identifier'; comment on column directory_entry_dir.target is 'Target revision in identifier'; comment on column directory_entry_dir.name is 'Path name, relative to containing directory'; comment on column directory_entry_dir.perms is 'Unix-like permissions'; -- A person referenced by some source code artifacts, e.g., a VCS revision or -- release metadata. create table person ( id bigserial, name bytea, email bytea, fullname bytea not null, displayname bytea ); comment on table person is 'Person, referenced in Revision author/committer or Release author'; comment on column person.id is 'Internal id'; comment on column person.name is 'Name (advisory, only present if parsed from fullname)'; comment on column person.email is 'Email (advisory, only present if parsed from fullname)'; comment on column person.fullname is 'Full name, usually of the form `Name `, ' 'used in integrity computations'; comment on column person.displayname is 'Full name, usually of the form `Name `, ' 'used for display queries'; -- The state of a source code tree at a specific point in time. -- -- Synonyms/mappings: -- * git / subversion / etc: commit -- * tarball: a specific tarball -- -- Revisions are organized as DAGs. Each revision points to 0, 1, or more (in -- case of merges) parent revisions. Each revision points to a directory, i.e., -- a file-system tree containing files and directories. create table revision ( id sha1_git not null, date timestamptz, date_offset smallint, committer_date timestamptz, committer_date_offset smallint, type revision_type not null, directory sha1_git, -- source code 'root' directory message bytea, author bigint, committer bigint, synthetic boolean not null default false, -- true iff revision has been created by Software Heritage metadata jsonb, -- extra metadata (tarball checksums, extra commit information, etc...) object_id bigserial, date_neg_utc_offset boolean, committer_date_neg_utc_offset boolean, extra_headers bytea[][] not null, -- extra headers (used in hash computation) date_offset_bytes bytea, committer_date_offset_bytes bytea, raw_manifest bytea -- git manifest of the object, if it cannot be represented using only the other fields ); comment on table revision is 'A revision represents the state of a source code tree at a specific point in time'; comment on column revision.id is 'Git-style SHA1 commit identifier'; comment on column revision.date is 'Author timestamp as UNIX epoch'; comment on column revision.date_offset is 'Author timestamp timezone, as minute offsets from UTC'; comment on column revision.date_neg_utc_offset is 'True indicates a -0 UTC offset on author timestamp'; comment on column revision.committer_date is 'Committer timestamp as UNIX epoch'; comment on column revision.committer_date_offset is 'Committer timestamp timezone, as minute offsets from UTC'; comment on column revision.committer_date_neg_utc_offset is 'True indicates a -0 UTC offset on committer timestamp'; comment on column revision.type is 'Type of revision'; comment on column revision.directory is 'Directory identifier'; comment on column revision.message is 'Commit message'; comment on column revision.author is 'Author identity'; comment on column revision.committer is 'Committer identity'; comment on column revision.synthetic is 'True iff revision has been synthesized by Software Heritage'; comment on column revision.metadata is 'Extra revision metadata'; comment on column revision.object_id is 'Non-intrinsic, sequential object identifier'; comment on column revision.extra_headers is 'Extra revision headers; used in revision hash computation'; comment on column revision.date_offset_bytes is 'Raw git representation of the timezone, as an offset from UTC. It should follow this format: ``+HHMM`` or ``-HHMM``'; comment on column revision.committer_date_offset_bytes is 'Raw git representation of the timezone, as an offset from UTC. It should follow this format: ``+HHMM`` or ``-HHMM``'; comment on column revision.raw_manifest is 'git manifest of the object, if it cannot be represented using only the other fields'; -- either this table or the sha1_git[] column on the revision table create table revision_history ( id sha1_git not null, parent_id sha1_git not null, parent_rank int not null default 0 -- parent position in merge commits, 0-based ); comment on table revision_history is 'Sequence of revision history with parent and position in history'; comment on column revision_history.id is 'Revision history git object sha1 checksum'; comment on column revision_history.parent_id is 'Parent revision git object identifier'; comment on column revision_history.parent_rank is 'Parent position in merge commits, 0-based'; -- Crawling history of software origins visited by Software Heritage. Each -- visit is a 3-way mapping between a software origin, a timestamp, and a -- snapshot object capturing the full-state of the origin at visit time. create table origin_visit ( origin bigint not null, visit bigint not null, date timestamptz not null, type text not null ); comment on column origin_visit.origin is 'Visited origin'; comment on column origin_visit.visit is 'Sequential visit number for the origin'; comment on column origin_visit.date is 'Visit timestamp'; comment on column origin_visit.type is 'Type of loader that did the visit (hg, git, ...)'; -- Crawling history of software origin visits by Software Heritage. Each -- visit see its history change through new origin visit status updates create table origin_visit_status ( origin bigint not null, visit bigint not null, date timestamptz not null, type text not null, status origin_visit_state not null, metadata jsonb, snapshot sha1_git ); comment on column origin_visit_status.origin is 'Origin concerned by the visit update'; comment on column origin_visit_status.visit is 'Visit concerned by the visit update'; comment on column origin_visit_status.date is 'Visit update timestamp'; comment on column origin_visit_status.type is 'Type of loader that did the visit (hg, git, ...)'; comment on column origin_visit_status.status is 'Visit status (ongoing, failed, full)'; comment on column origin_visit_status.metadata is 'Optional origin visit metadata'; comment on column origin_visit_status.snapshot is 'Optional, possibly partial, snapshot of the origin visit. It can be partial.'; -- A snapshot represents the entire state of a software origin as crawled by -- Software Heritage. This table is a simple mapping between (public) intrinsic -- snapshot identifiers and (private) numeric sequential identifiers. create table snapshot ( object_id bigserial not null, -- PK internal object identifier id sha1_git not null -- snapshot intrinsic identifier ); comment on table snapshot is 'State of a software origin as crawled by Software Heritage'; comment on column snapshot.object_id is 'Internal object identifier'; comment on column snapshot.id is 'Intrinsic snapshot identifier'; -- Each snapshot associate "branch" names to other objects in the Software -- Heritage Merkle DAG. This table describes branches as mappings between names -- and target typed objects. create table snapshot_branch ( object_id bigserial not null, -- PK internal object identifier name bytea not null, -- branch name, e.g., "master" or "feature/drag-n-drop" target bytea, -- target object identifier, e.g., a revision identifier target_type snapshot_target -- target object type, e.g., "revision" ); comment on table snapshot_branch is 'Associates branches with objects in Heritage Merkle DAG'; comment on column snapshot_branch.object_id is 'Internal object identifier'; comment on column snapshot_branch.name is 'Branch name'; comment on column snapshot_branch.target is 'Target object identifier'; comment on column snapshot_branch.target_type is 'Target object type'; -- Mapping between snapshots and their branches. create table snapshot_branches ( snapshot_id bigint not null, -- snapshot identifier, ref. snapshot.object_id branch_id bigint not null -- branch identifier, ref. snapshot_branch.object_id ); comment on table snapshot_branches is 'Mapping between snapshot and their branches'; comment on column snapshot_branches.snapshot_id is 'Snapshot identifier'; comment on column snapshot_branches.branch_id is 'Branch identifier'; -- A "memorable" point in time in the development history of a software -- project. -- -- Synonyms/mappings: -- * git: tag (of the annotated kind, otherwise they are just references) -- * tarball: the release version number create table release ( id sha1_git not null, target sha1_git, date timestamptz, date_offset smallint, name bytea, comment bytea, author bigint, synthetic boolean not null default false, -- true iff release has been created by Software Heritage object_id bigserial, target_type object_type not null, date_neg_utc_offset boolean, date_offset_bytes bytea, raw_manifest bytea ); comment on table release is 'Details of a software release, synonymous with a tag (git) or version number (tarball)'; comment on column release.id is 'Release git identifier'; comment on column release.target is 'Target git identifier'; comment on column release.date is 'Release timestamp'; comment on column release.date_offset is 'Timestamp offset from UTC'; comment on column release.name is 'Name'; comment on column release.comment is 'Comment'; comment on column release.author is 'Author'; comment on column release.synthetic is 'Indicates if created by Software Heritage'; comment on column release.object_id is 'Object identifier'; comment on column release.target_type is 'Object type (''content'', ''directory'', ''revision'', ''release'', ''snapshot'')'; comment on column release.date_neg_utc_offset is 'True indicates -0 UTC offset for release timestamp'; comment on column release.date_offset_bytes is 'Raw git representation of the timezone, as an offset from UTC. It should follow this format: ``+HHMM`` or ``-HHMM``'; comment on column release.raw_manifest is 'git manifest of the object, if it cannot be represented using only the other fields'; -- Tools create table metadata_fetcher ( id serial not null, name text not null, version text not null ); comment on table metadata_fetcher is 'Tools used to retrieve metadata'; comment on column metadata_fetcher.id is 'Internal identifier of the fetcher'; comment on column metadata_fetcher.name is 'Fetcher name'; comment on column metadata_fetcher.version is 'Fetcher version'; create table metadata_authority ( id serial not null, type text not null, url text not null ); comment on table metadata_authority is 'Metadata authority information'; comment on column metadata_authority.id is 'Internal identifier of the authority'; comment on column metadata_authority.type is 'Type of authority (deposit_client/forge/registry)'; comment on column metadata_authority.url is 'Authority''s uri'; -- Extrinsic metadata on a DAG objects and origins. create table raw_extrinsic_metadata ( id sha1_git not null, type text not null, target text not null, -- metadata source authority_id bigint not null, fetcher_id bigint not null, discovery_date timestamptz not null, -- metadata itself format text not null, metadata bytea not null, -- context origin text, visit bigint, snapshot swhid, release swhid, revision swhid, path bytea, directory swhid ); comment on table raw_extrinsic_metadata is 'keeps all metadata found concerning an object'; comment on column raw_extrinsic_metadata.type is 'the type of object (content/directory/revision/release/snapshot/origin) the metadata is on'; comment on column raw_extrinsic_metadata.target is 'the SWHID or origin URL for which the metadata was found'; comment on column raw_extrinsic_metadata.discovery_date is 'the date of retrieval'; comment on column raw_extrinsic_metadata.authority_id is 'the metadata provider: github, openhub, deposit, etc.'; comment on column raw_extrinsic_metadata.fetcher_id is 'the tool used for extracting metadata: loaders, crawlers, etc.'; comment on column raw_extrinsic_metadata.format is 'name of the format of metadata, used by readers to interpret it.'; comment on column raw_extrinsic_metadata.metadata is 'original metadata in opaque format'; -- Keep a cache of object counts create table object_counts ( object_type text, -- table for which we're counting objects (PK) value bigint, -- count of objects in the table last_update timestamptz, -- last update for the object count in this table single_update boolean -- whether we update this table standalone (true) or through bucketed counts (false) ); comment on table object_counts is 'Cache of object counts'; comment on column object_counts.object_type is 'Object type (''content'', ''directory'', ''revision'', ''release'', ''snapshot'')'; comment on column object_counts.value is 'Count of objects in the table'; comment on column object_counts.last_update is 'Last update for object count'; comment on column object_counts.single_update is 'standalone (true) or bucketed counts (false)'; create table object_counts_bucketed ( line serial not null, -- PK object_type text not null, -- table for which we're counting objects identifier text not null, -- identifier across which we're bucketing objects bucket_start bytea, -- lower bound (inclusive) for the bucket bucket_end bytea, -- upper bound (exclusive) for the bucket value bigint, -- count of objects in the bucket last_update timestamptz -- last update for the object count in this bucket ); comment on table object_counts_bucketed is 'Bucketed count for objects ordered by type'; comment on column object_counts_bucketed.line is 'Auto incremented idenitfier value'; comment on column object_counts_bucketed.object_type is 'Object type (''content'', ''directory'', ''revision'', ''release'', ''snapshot'')'; comment on column object_counts_bucketed.identifier is 'Common identifier for bucketed objects'; comment on column object_counts_bucketed.bucket_start is 'Lower bound (inclusive) for the bucket'; comment on column object_counts_bucketed.bucket_end is 'Upper bound (exclusive) for the bucket'; comment on column object_counts_bucketed.value is 'Count of objects in the bucket'; comment on column object_counts_bucketed.last_update is 'Last update for the object count in this bucket'; -- The ExtID (typ. original VCS) <-> swhid relation table create table extid ( extid_type text not null, extid bytea not null, target_type object_type not null, target sha1_git not null, extid_version bigint not null default 0 ); comment on table extid is 'Correspondance SWH object (SWHID) <-> original revision id (vcs id)'; comment on column extid.extid_type is 'ExtID type'; comment on column extid.extid is 'Intrinsic identifier of the object (e.g. hg revision)'; comment on column extid.target_type is 'Type of SWHID of the referenced SWH object'; comment on column extid.target is 'Value (hash) of SWHID of the refenced SWH object'; comment on column extid.extid_version is 'Version of the extid type for the given original object'; diff --git a/sql/upgrades/015.sql b/swh/storage/sql/upgrades/015.sql similarity index 100% rename from sql/upgrades/015.sql rename to swh/storage/sql/upgrades/015.sql diff --git a/sql/upgrades/016.sql b/swh/storage/sql/upgrades/016.sql similarity index 100% rename from sql/upgrades/016.sql rename to swh/storage/sql/upgrades/016.sql diff --git a/sql/upgrades/017.sql b/swh/storage/sql/upgrades/017.sql similarity index 100% rename from sql/upgrades/017.sql rename to swh/storage/sql/upgrades/017.sql diff --git a/sql/upgrades/018.sql b/swh/storage/sql/upgrades/018.sql similarity index 100% rename from sql/upgrades/018.sql rename to swh/storage/sql/upgrades/018.sql diff --git a/sql/upgrades/019.sql b/swh/storage/sql/upgrades/019.sql similarity index 100% rename from sql/upgrades/019.sql rename to swh/storage/sql/upgrades/019.sql diff --git a/sql/upgrades/020.sql b/swh/storage/sql/upgrades/020.sql similarity index 100% rename from sql/upgrades/020.sql rename to swh/storage/sql/upgrades/020.sql diff --git a/sql/upgrades/021.sql b/swh/storage/sql/upgrades/021.sql similarity index 100% rename from sql/upgrades/021.sql rename to swh/storage/sql/upgrades/021.sql diff --git a/sql/upgrades/022.sql b/swh/storage/sql/upgrades/022.sql similarity index 100% rename from sql/upgrades/022.sql rename to swh/storage/sql/upgrades/022.sql diff --git a/sql/upgrades/023.sql b/swh/storage/sql/upgrades/023.sql similarity index 100% rename from sql/upgrades/023.sql rename to swh/storage/sql/upgrades/023.sql diff --git a/sql/upgrades/024.sql b/swh/storage/sql/upgrades/024.sql similarity index 100% rename from sql/upgrades/024.sql rename to swh/storage/sql/upgrades/024.sql diff --git a/sql/upgrades/025.sql b/swh/storage/sql/upgrades/025.sql similarity index 100% rename from sql/upgrades/025.sql rename to swh/storage/sql/upgrades/025.sql diff --git a/sql/upgrades/026.sql b/swh/storage/sql/upgrades/026.sql similarity index 100% rename from sql/upgrades/026.sql rename to swh/storage/sql/upgrades/026.sql diff --git a/sql/upgrades/027.sql b/swh/storage/sql/upgrades/027.sql similarity index 100% rename from sql/upgrades/027.sql rename to swh/storage/sql/upgrades/027.sql diff --git a/sql/upgrades/028.sql b/swh/storage/sql/upgrades/028.sql similarity index 100% rename from sql/upgrades/028.sql rename to swh/storage/sql/upgrades/028.sql diff --git a/sql/upgrades/029.sql b/swh/storage/sql/upgrades/029.sql similarity index 100% rename from sql/upgrades/029.sql rename to swh/storage/sql/upgrades/029.sql diff --git a/sql/upgrades/030.sql b/swh/storage/sql/upgrades/030.sql similarity index 100% rename from sql/upgrades/030.sql rename to swh/storage/sql/upgrades/030.sql diff --git a/sql/upgrades/032.sql b/swh/storage/sql/upgrades/032.sql similarity index 100% rename from sql/upgrades/032.sql rename to swh/storage/sql/upgrades/032.sql diff --git a/sql/upgrades/033.sql b/swh/storage/sql/upgrades/033.sql similarity index 100% rename from sql/upgrades/033.sql rename to swh/storage/sql/upgrades/033.sql diff --git a/sql/upgrades/034.sql b/swh/storage/sql/upgrades/034.sql similarity index 100% rename from sql/upgrades/034.sql rename to swh/storage/sql/upgrades/034.sql diff --git a/sql/upgrades/035.sql b/swh/storage/sql/upgrades/035.sql similarity index 100% rename from sql/upgrades/035.sql rename to swh/storage/sql/upgrades/035.sql diff --git a/sql/upgrades/036.sql b/swh/storage/sql/upgrades/036.sql similarity index 100% rename from sql/upgrades/036.sql rename to swh/storage/sql/upgrades/036.sql diff --git a/sql/upgrades/037.sql b/swh/storage/sql/upgrades/037.sql similarity index 100% rename from sql/upgrades/037.sql rename to swh/storage/sql/upgrades/037.sql diff --git a/sql/upgrades/038.sql b/swh/storage/sql/upgrades/038.sql similarity index 100% rename from sql/upgrades/038.sql rename to swh/storage/sql/upgrades/038.sql diff --git a/sql/upgrades/039.sql b/swh/storage/sql/upgrades/039.sql similarity index 100% rename from sql/upgrades/039.sql rename to swh/storage/sql/upgrades/039.sql diff --git a/sql/upgrades/040.sql b/swh/storage/sql/upgrades/040.sql similarity index 100% rename from sql/upgrades/040.sql rename to swh/storage/sql/upgrades/040.sql diff --git a/sql/upgrades/041.sql b/swh/storage/sql/upgrades/041.sql similarity index 100% rename from sql/upgrades/041.sql rename to swh/storage/sql/upgrades/041.sql diff --git a/sql/upgrades/042.sql b/swh/storage/sql/upgrades/042.sql similarity index 100% rename from sql/upgrades/042.sql rename to swh/storage/sql/upgrades/042.sql diff --git a/sql/upgrades/043.sql b/swh/storage/sql/upgrades/043.sql similarity index 100% rename from sql/upgrades/043.sql rename to swh/storage/sql/upgrades/043.sql diff --git a/sql/upgrades/044.sql b/swh/storage/sql/upgrades/044.sql similarity index 100% rename from sql/upgrades/044.sql rename to swh/storage/sql/upgrades/044.sql diff --git a/sql/upgrades/045.sql b/swh/storage/sql/upgrades/045.sql similarity index 100% rename from sql/upgrades/045.sql rename to swh/storage/sql/upgrades/045.sql diff --git a/sql/upgrades/046.sql b/swh/storage/sql/upgrades/046.sql similarity index 100% rename from sql/upgrades/046.sql rename to swh/storage/sql/upgrades/046.sql diff --git a/sql/upgrades/047.sql b/swh/storage/sql/upgrades/047.sql similarity index 100% rename from sql/upgrades/047.sql rename to swh/storage/sql/upgrades/047.sql diff --git a/sql/upgrades/048.sql b/swh/storage/sql/upgrades/048.sql similarity index 100% rename from sql/upgrades/048.sql rename to swh/storage/sql/upgrades/048.sql diff --git a/sql/upgrades/049.sql b/swh/storage/sql/upgrades/049.sql similarity index 100% rename from sql/upgrades/049.sql rename to swh/storage/sql/upgrades/049.sql diff --git a/sql/upgrades/050.sql b/swh/storage/sql/upgrades/050.sql similarity index 100% rename from sql/upgrades/050.sql rename to swh/storage/sql/upgrades/050.sql diff --git a/sql/upgrades/051.sql b/swh/storage/sql/upgrades/051.sql similarity index 100% rename from sql/upgrades/051.sql rename to swh/storage/sql/upgrades/051.sql diff --git a/sql/upgrades/052.sql b/swh/storage/sql/upgrades/052.sql similarity index 100% rename from sql/upgrades/052.sql rename to swh/storage/sql/upgrades/052.sql diff --git a/sql/upgrades/053.sql b/swh/storage/sql/upgrades/053.sql similarity index 100% rename from sql/upgrades/053.sql rename to swh/storage/sql/upgrades/053.sql diff --git a/sql/upgrades/054.sql b/swh/storage/sql/upgrades/054.sql similarity index 100% rename from sql/upgrades/054.sql rename to swh/storage/sql/upgrades/054.sql diff --git a/sql/upgrades/055.sql b/swh/storage/sql/upgrades/055.sql similarity index 100% rename from sql/upgrades/055.sql rename to swh/storage/sql/upgrades/055.sql diff --git a/sql/upgrades/056.sql b/swh/storage/sql/upgrades/056.sql similarity index 100% rename from sql/upgrades/056.sql rename to swh/storage/sql/upgrades/056.sql diff --git a/sql/upgrades/057.sql b/swh/storage/sql/upgrades/057.sql similarity index 100% rename from sql/upgrades/057.sql rename to swh/storage/sql/upgrades/057.sql diff --git a/sql/upgrades/058.sql b/swh/storage/sql/upgrades/058.sql similarity index 100% rename from sql/upgrades/058.sql rename to swh/storage/sql/upgrades/058.sql diff --git a/sql/upgrades/059.sql b/swh/storage/sql/upgrades/059.sql similarity index 100% rename from sql/upgrades/059.sql rename to swh/storage/sql/upgrades/059.sql diff --git a/sql/upgrades/060.sql b/swh/storage/sql/upgrades/060.sql similarity index 100% rename from sql/upgrades/060.sql rename to swh/storage/sql/upgrades/060.sql diff --git a/sql/upgrades/061.sql b/swh/storage/sql/upgrades/061.sql similarity index 100% rename from sql/upgrades/061.sql rename to swh/storage/sql/upgrades/061.sql diff --git a/sql/upgrades/062.sql b/swh/storage/sql/upgrades/062.sql similarity index 100% rename from sql/upgrades/062.sql rename to swh/storage/sql/upgrades/062.sql diff --git a/sql/upgrades/063.sql b/swh/storage/sql/upgrades/063.sql similarity index 100% rename from sql/upgrades/063.sql rename to swh/storage/sql/upgrades/063.sql diff --git a/sql/upgrades/064.sql b/swh/storage/sql/upgrades/064.sql similarity index 100% rename from sql/upgrades/064.sql rename to swh/storage/sql/upgrades/064.sql diff --git a/sql/upgrades/065.sql b/swh/storage/sql/upgrades/065.sql similarity index 100% rename from sql/upgrades/065.sql rename to swh/storage/sql/upgrades/065.sql diff --git a/sql/upgrades/066.sql b/swh/storage/sql/upgrades/066.sql similarity index 100% rename from sql/upgrades/066.sql rename to swh/storage/sql/upgrades/066.sql diff --git a/sql/upgrades/067.sql b/swh/storage/sql/upgrades/067.sql similarity index 100% rename from sql/upgrades/067.sql rename to swh/storage/sql/upgrades/067.sql diff --git a/sql/upgrades/068.sql b/swh/storage/sql/upgrades/068.sql similarity index 100% rename from sql/upgrades/068.sql rename to swh/storage/sql/upgrades/068.sql diff --git a/sql/upgrades/069.sql b/swh/storage/sql/upgrades/069.sql similarity index 100% rename from sql/upgrades/069.sql rename to swh/storage/sql/upgrades/069.sql diff --git a/sql/upgrades/070.sql b/swh/storage/sql/upgrades/070.sql similarity index 100% rename from sql/upgrades/070.sql rename to swh/storage/sql/upgrades/070.sql diff --git a/sql/upgrades/071.sql b/swh/storage/sql/upgrades/071.sql similarity index 100% rename from sql/upgrades/071.sql rename to swh/storage/sql/upgrades/071.sql diff --git a/sql/upgrades/072.sql b/swh/storage/sql/upgrades/072.sql similarity index 100% rename from sql/upgrades/072.sql rename to swh/storage/sql/upgrades/072.sql diff --git a/sql/upgrades/073.sql b/swh/storage/sql/upgrades/073.sql similarity index 100% rename from sql/upgrades/073.sql rename to swh/storage/sql/upgrades/073.sql diff --git a/sql/upgrades/074.sql b/swh/storage/sql/upgrades/074.sql similarity index 100% rename from sql/upgrades/074.sql rename to swh/storage/sql/upgrades/074.sql diff --git a/sql/upgrades/075.sql b/swh/storage/sql/upgrades/075.sql similarity index 100% rename from sql/upgrades/075.sql rename to swh/storage/sql/upgrades/075.sql diff --git a/sql/upgrades/076.sql b/swh/storage/sql/upgrades/076.sql similarity index 100% rename from sql/upgrades/076.sql rename to swh/storage/sql/upgrades/076.sql diff --git a/sql/upgrades/077.sql b/swh/storage/sql/upgrades/077.sql similarity index 100% rename from sql/upgrades/077.sql rename to swh/storage/sql/upgrades/077.sql diff --git a/sql/upgrades/078.sql b/swh/storage/sql/upgrades/078.sql similarity index 100% rename from sql/upgrades/078.sql rename to swh/storage/sql/upgrades/078.sql diff --git a/sql/upgrades/079.sql b/swh/storage/sql/upgrades/079.sql similarity index 100% rename from sql/upgrades/079.sql rename to swh/storage/sql/upgrades/079.sql diff --git a/sql/upgrades/080.sql b/swh/storage/sql/upgrades/080.sql similarity index 100% rename from sql/upgrades/080.sql rename to swh/storage/sql/upgrades/080.sql diff --git a/sql/upgrades/081.sql b/swh/storage/sql/upgrades/081.sql similarity index 100% rename from sql/upgrades/081.sql rename to swh/storage/sql/upgrades/081.sql diff --git a/sql/upgrades/082.sql b/swh/storage/sql/upgrades/082.sql similarity index 100% rename from sql/upgrades/082.sql rename to swh/storage/sql/upgrades/082.sql diff --git a/sql/upgrades/083.sql b/swh/storage/sql/upgrades/083.sql similarity index 100% rename from sql/upgrades/083.sql rename to swh/storage/sql/upgrades/083.sql diff --git a/sql/upgrades/084.sql b/swh/storage/sql/upgrades/084.sql similarity index 100% rename from sql/upgrades/084.sql rename to swh/storage/sql/upgrades/084.sql diff --git a/sql/upgrades/085.sql b/swh/storage/sql/upgrades/085.sql similarity index 100% rename from sql/upgrades/085.sql rename to swh/storage/sql/upgrades/085.sql diff --git a/sql/upgrades/086.sql b/swh/storage/sql/upgrades/086.sql similarity index 100% rename from sql/upgrades/086.sql rename to swh/storage/sql/upgrades/086.sql diff --git a/sql/upgrades/087.sql b/swh/storage/sql/upgrades/087.sql similarity index 100% rename from sql/upgrades/087.sql rename to swh/storage/sql/upgrades/087.sql diff --git a/sql/upgrades/088.sql b/swh/storage/sql/upgrades/088.sql similarity index 100% rename from sql/upgrades/088.sql rename to swh/storage/sql/upgrades/088.sql diff --git a/sql/upgrades/089.sql b/swh/storage/sql/upgrades/089.sql similarity index 100% rename from sql/upgrades/089.sql rename to swh/storage/sql/upgrades/089.sql diff --git a/sql/upgrades/090.sql b/swh/storage/sql/upgrades/090.sql similarity index 100% rename from sql/upgrades/090.sql rename to swh/storage/sql/upgrades/090.sql diff --git a/sql/upgrades/091.sql b/swh/storage/sql/upgrades/091.sql similarity index 100% rename from sql/upgrades/091.sql rename to swh/storage/sql/upgrades/091.sql diff --git a/sql/upgrades/092.sql b/swh/storage/sql/upgrades/092.sql similarity index 100% rename from sql/upgrades/092.sql rename to swh/storage/sql/upgrades/092.sql diff --git a/sql/upgrades/093.sql b/swh/storage/sql/upgrades/093.sql similarity index 100% rename from sql/upgrades/093.sql rename to swh/storage/sql/upgrades/093.sql diff --git a/sql/upgrades/094.sql b/swh/storage/sql/upgrades/094.sql similarity index 100% rename from sql/upgrades/094.sql rename to swh/storage/sql/upgrades/094.sql diff --git a/sql/upgrades/095.sql b/swh/storage/sql/upgrades/095.sql similarity index 100% rename from sql/upgrades/095.sql rename to swh/storage/sql/upgrades/095.sql diff --git a/sql/upgrades/096.sql b/swh/storage/sql/upgrades/096.sql similarity index 100% rename from sql/upgrades/096.sql rename to swh/storage/sql/upgrades/096.sql diff --git a/sql/upgrades/097.sql b/swh/storage/sql/upgrades/097.sql similarity index 100% rename from sql/upgrades/097.sql rename to swh/storage/sql/upgrades/097.sql diff --git a/sql/upgrades/098.sql b/swh/storage/sql/upgrades/098.sql similarity index 100% rename from sql/upgrades/098.sql rename to swh/storage/sql/upgrades/098.sql diff --git a/sql/upgrades/099.sql b/swh/storage/sql/upgrades/099.sql similarity index 100% rename from sql/upgrades/099.sql rename to swh/storage/sql/upgrades/099.sql diff --git a/sql/upgrades/100.sql b/swh/storage/sql/upgrades/100.sql similarity index 100% rename from sql/upgrades/100.sql rename to swh/storage/sql/upgrades/100.sql diff --git a/sql/upgrades/101.sql b/swh/storage/sql/upgrades/101.sql similarity index 100% rename from sql/upgrades/101.sql rename to swh/storage/sql/upgrades/101.sql diff --git a/sql/upgrades/102.sql b/swh/storage/sql/upgrades/102.sql similarity index 100% rename from sql/upgrades/102.sql rename to swh/storage/sql/upgrades/102.sql diff --git a/sql/upgrades/103.sql b/swh/storage/sql/upgrades/103.sql similarity index 100% rename from sql/upgrades/103.sql rename to swh/storage/sql/upgrades/103.sql diff --git a/sql/upgrades/104.sql b/swh/storage/sql/upgrades/104.sql similarity index 100% rename from sql/upgrades/104.sql rename to swh/storage/sql/upgrades/104.sql diff --git a/sql/upgrades/105.sql b/swh/storage/sql/upgrades/105.sql similarity index 100% rename from sql/upgrades/105.sql rename to swh/storage/sql/upgrades/105.sql diff --git a/sql/upgrades/106.sql b/swh/storage/sql/upgrades/106.sql similarity index 100% rename from sql/upgrades/106.sql rename to swh/storage/sql/upgrades/106.sql diff --git a/sql/upgrades/107.sql b/swh/storage/sql/upgrades/107.sql similarity index 100% rename from sql/upgrades/107.sql rename to swh/storage/sql/upgrades/107.sql diff --git a/sql/upgrades/108.sql b/swh/storage/sql/upgrades/108.sql similarity index 100% rename from sql/upgrades/108.sql rename to swh/storage/sql/upgrades/108.sql diff --git a/sql/upgrades/109.sql b/swh/storage/sql/upgrades/109.sql similarity index 100% rename from sql/upgrades/109.sql rename to swh/storage/sql/upgrades/109.sql diff --git a/sql/upgrades/110.sql b/swh/storage/sql/upgrades/110.sql similarity index 100% rename from sql/upgrades/110.sql rename to swh/storage/sql/upgrades/110.sql diff --git a/sql/upgrades/111.sql b/swh/storage/sql/upgrades/111.sql similarity index 100% rename from sql/upgrades/111.sql rename to swh/storage/sql/upgrades/111.sql diff --git a/sql/upgrades/112.sql b/swh/storage/sql/upgrades/112.sql similarity index 100% rename from sql/upgrades/112.sql rename to swh/storage/sql/upgrades/112.sql diff --git a/sql/upgrades/113.sql b/swh/storage/sql/upgrades/113.sql similarity index 100% rename from sql/upgrades/113.sql rename to swh/storage/sql/upgrades/113.sql diff --git a/sql/upgrades/114.sql b/swh/storage/sql/upgrades/114.sql similarity index 100% rename from sql/upgrades/114.sql rename to swh/storage/sql/upgrades/114.sql diff --git a/sql/upgrades/115.sql b/swh/storage/sql/upgrades/115.sql similarity index 100% rename from sql/upgrades/115.sql rename to swh/storage/sql/upgrades/115.sql diff --git a/sql/upgrades/116.sql b/swh/storage/sql/upgrades/116.sql similarity index 100% rename from sql/upgrades/116.sql rename to swh/storage/sql/upgrades/116.sql diff --git a/sql/upgrades/117.sql b/swh/storage/sql/upgrades/117.sql similarity index 100% rename from sql/upgrades/117.sql rename to swh/storage/sql/upgrades/117.sql diff --git a/sql/upgrades/118.sql b/swh/storage/sql/upgrades/118.sql similarity index 100% rename from sql/upgrades/118.sql rename to swh/storage/sql/upgrades/118.sql diff --git a/sql/upgrades/119.sql b/swh/storage/sql/upgrades/119.sql similarity index 100% rename from sql/upgrades/119.sql rename to swh/storage/sql/upgrades/119.sql diff --git a/sql/upgrades/120.sql b/swh/storage/sql/upgrades/120.sql similarity index 100% rename from sql/upgrades/120.sql rename to swh/storage/sql/upgrades/120.sql diff --git a/sql/upgrades/121.sql b/swh/storage/sql/upgrades/121.sql similarity index 100% rename from sql/upgrades/121.sql rename to swh/storage/sql/upgrades/121.sql diff --git a/sql/upgrades/122.sql b/swh/storage/sql/upgrades/122.sql similarity index 100% rename from sql/upgrades/122.sql rename to swh/storage/sql/upgrades/122.sql diff --git a/sql/upgrades/123.sql b/swh/storage/sql/upgrades/123.sql similarity index 100% rename from sql/upgrades/123.sql rename to swh/storage/sql/upgrades/123.sql diff --git a/sql/upgrades/124.sql b/swh/storage/sql/upgrades/124.sql similarity index 100% rename from sql/upgrades/124.sql rename to swh/storage/sql/upgrades/124.sql diff --git a/sql/upgrades/125.sql b/swh/storage/sql/upgrades/125.sql similarity index 100% rename from sql/upgrades/125.sql rename to swh/storage/sql/upgrades/125.sql diff --git a/sql/upgrades/126.sql b/swh/storage/sql/upgrades/126.sql similarity index 100% rename from sql/upgrades/126.sql rename to swh/storage/sql/upgrades/126.sql diff --git a/sql/upgrades/127.sql b/swh/storage/sql/upgrades/127.sql similarity index 100% rename from sql/upgrades/127.sql rename to swh/storage/sql/upgrades/127.sql diff --git a/sql/upgrades/128.sql b/swh/storage/sql/upgrades/128.sql similarity index 100% rename from sql/upgrades/128.sql rename to swh/storage/sql/upgrades/128.sql diff --git a/sql/upgrades/129.sql b/swh/storage/sql/upgrades/129.sql similarity index 100% rename from sql/upgrades/129.sql rename to swh/storage/sql/upgrades/129.sql diff --git a/sql/upgrades/130.sql b/swh/storage/sql/upgrades/130.sql similarity index 100% rename from sql/upgrades/130.sql rename to swh/storage/sql/upgrades/130.sql diff --git a/sql/upgrades/131.sql b/swh/storage/sql/upgrades/131.sql similarity index 100% rename from sql/upgrades/131.sql rename to swh/storage/sql/upgrades/131.sql diff --git a/sql/upgrades/132.sql b/swh/storage/sql/upgrades/132.sql similarity index 100% rename from sql/upgrades/132.sql rename to swh/storage/sql/upgrades/132.sql diff --git a/sql/upgrades/133.sql b/swh/storage/sql/upgrades/133.sql similarity index 100% rename from sql/upgrades/133.sql rename to swh/storage/sql/upgrades/133.sql diff --git a/sql/upgrades/134.sql b/swh/storage/sql/upgrades/134.sql similarity index 100% rename from sql/upgrades/134.sql rename to swh/storage/sql/upgrades/134.sql diff --git a/sql/upgrades/135.sql b/swh/storage/sql/upgrades/135.sql similarity index 100% rename from sql/upgrades/135.sql rename to swh/storage/sql/upgrades/135.sql diff --git a/sql/upgrades/136.sql b/swh/storage/sql/upgrades/136.sql similarity index 100% rename from sql/upgrades/136.sql rename to swh/storage/sql/upgrades/136.sql diff --git a/sql/upgrades/137.sql b/swh/storage/sql/upgrades/137.sql similarity index 100% rename from sql/upgrades/137.sql rename to swh/storage/sql/upgrades/137.sql diff --git a/sql/upgrades/138.sql b/swh/storage/sql/upgrades/138.sql similarity index 100% rename from sql/upgrades/138.sql rename to swh/storage/sql/upgrades/138.sql diff --git a/sql/upgrades/139.sql b/swh/storage/sql/upgrades/139.sql similarity index 100% rename from sql/upgrades/139.sql rename to swh/storage/sql/upgrades/139.sql diff --git a/sql/upgrades/140.sql b/swh/storage/sql/upgrades/140.sql similarity index 100% rename from sql/upgrades/140.sql rename to swh/storage/sql/upgrades/140.sql diff --git a/sql/upgrades/141.sql b/swh/storage/sql/upgrades/141.sql similarity index 100% rename from sql/upgrades/141.sql rename to swh/storage/sql/upgrades/141.sql diff --git a/sql/upgrades/142.sql b/swh/storage/sql/upgrades/142.sql similarity index 100% rename from sql/upgrades/142.sql rename to swh/storage/sql/upgrades/142.sql diff --git a/sql/upgrades/143.sql b/swh/storage/sql/upgrades/143.sql similarity index 100% rename from sql/upgrades/143.sql rename to swh/storage/sql/upgrades/143.sql diff --git a/sql/upgrades/144.sql b/swh/storage/sql/upgrades/144.sql similarity index 100% rename from sql/upgrades/144.sql rename to swh/storage/sql/upgrades/144.sql diff --git a/sql/upgrades/145.sql b/swh/storage/sql/upgrades/145.sql similarity index 100% rename from sql/upgrades/145.sql rename to swh/storage/sql/upgrades/145.sql diff --git a/sql/upgrades/146.sql b/swh/storage/sql/upgrades/146.sql similarity index 100% rename from sql/upgrades/146.sql rename to swh/storage/sql/upgrades/146.sql diff --git a/sql/upgrades/147.sql b/swh/storage/sql/upgrades/147.sql similarity index 100% rename from sql/upgrades/147.sql rename to swh/storage/sql/upgrades/147.sql diff --git a/sql/upgrades/148.sql b/swh/storage/sql/upgrades/148.sql similarity index 100% rename from sql/upgrades/148.sql rename to swh/storage/sql/upgrades/148.sql diff --git a/sql/upgrades/149.sql b/swh/storage/sql/upgrades/149.sql similarity index 100% rename from sql/upgrades/149.sql rename to swh/storage/sql/upgrades/149.sql diff --git a/sql/upgrades/150.sql b/swh/storage/sql/upgrades/150.sql similarity index 100% rename from sql/upgrades/150.sql rename to swh/storage/sql/upgrades/150.sql diff --git a/sql/upgrades/151.sql b/swh/storage/sql/upgrades/151.sql similarity index 100% rename from sql/upgrades/151.sql rename to swh/storage/sql/upgrades/151.sql diff --git a/sql/upgrades/152.sql b/swh/storage/sql/upgrades/152.sql similarity index 100% rename from sql/upgrades/152.sql rename to swh/storage/sql/upgrades/152.sql diff --git a/sql/upgrades/153.sql b/swh/storage/sql/upgrades/153.sql similarity index 100% rename from sql/upgrades/153.sql rename to swh/storage/sql/upgrades/153.sql diff --git a/sql/upgrades/154.sql b/swh/storage/sql/upgrades/154.sql similarity index 100% rename from sql/upgrades/154.sql rename to swh/storage/sql/upgrades/154.sql diff --git a/sql/upgrades/155.sql b/swh/storage/sql/upgrades/155.sql similarity index 100% rename from sql/upgrades/155.sql rename to swh/storage/sql/upgrades/155.sql diff --git a/sql/upgrades/156.sql b/swh/storage/sql/upgrades/156.sql similarity index 100% rename from sql/upgrades/156.sql rename to swh/storage/sql/upgrades/156.sql diff --git a/sql/upgrades/157.sql b/swh/storage/sql/upgrades/157.sql similarity index 100% rename from sql/upgrades/157.sql rename to swh/storage/sql/upgrades/157.sql diff --git a/sql/upgrades/158.sql b/swh/storage/sql/upgrades/158.sql similarity index 100% rename from sql/upgrades/158.sql rename to swh/storage/sql/upgrades/158.sql diff --git a/sql/upgrades/159.sql b/swh/storage/sql/upgrades/159.sql similarity index 100% rename from sql/upgrades/159.sql rename to swh/storage/sql/upgrades/159.sql diff --git a/sql/upgrades/160.sql b/swh/storage/sql/upgrades/160.sql similarity index 100% rename from sql/upgrades/160.sql rename to swh/storage/sql/upgrades/160.sql diff --git a/sql/upgrades/161.sql b/swh/storage/sql/upgrades/161.sql similarity index 100% rename from sql/upgrades/161.sql rename to swh/storage/sql/upgrades/161.sql diff --git a/sql/upgrades/162.sql b/swh/storage/sql/upgrades/162.sql similarity index 100% rename from sql/upgrades/162.sql rename to swh/storage/sql/upgrades/162.sql diff --git a/sql/upgrades/163.sql b/swh/storage/sql/upgrades/163.sql similarity index 100% rename from sql/upgrades/163.sql rename to swh/storage/sql/upgrades/163.sql diff --git a/sql/upgrades/164.sql b/swh/storage/sql/upgrades/164.sql similarity index 100% rename from sql/upgrades/164.sql rename to swh/storage/sql/upgrades/164.sql diff --git a/sql/upgrades/165.sql b/swh/storage/sql/upgrades/165.sql similarity index 100% rename from sql/upgrades/165.sql rename to swh/storage/sql/upgrades/165.sql diff --git a/sql/upgrades/166.sql b/swh/storage/sql/upgrades/166.sql similarity index 100% rename from sql/upgrades/166.sql rename to swh/storage/sql/upgrades/166.sql diff --git a/sql/upgrades/167.sql b/swh/storage/sql/upgrades/167.sql similarity index 100% rename from sql/upgrades/167.sql rename to swh/storage/sql/upgrades/167.sql diff --git a/sql/upgrades/168.sql b/swh/storage/sql/upgrades/168.sql similarity index 100% rename from sql/upgrades/168.sql rename to swh/storage/sql/upgrades/168.sql diff --git a/sql/upgrades/169.sql b/swh/storage/sql/upgrades/169.sql similarity index 100% rename from sql/upgrades/169.sql rename to swh/storage/sql/upgrades/169.sql diff --git a/sql/upgrades/170.sql b/swh/storage/sql/upgrades/170.sql similarity index 100% rename from sql/upgrades/170.sql rename to swh/storage/sql/upgrades/170.sql diff --git a/sql/upgrades/171.sql b/swh/storage/sql/upgrades/171.sql similarity index 100% rename from sql/upgrades/171.sql rename to swh/storage/sql/upgrades/171.sql diff --git a/sql/upgrades/172.sql b/swh/storage/sql/upgrades/172.sql similarity index 100% rename from sql/upgrades/172.sql rename to swh/storage/sql/upgrades/172.sql diff --git a/sql/upgrades/173.sql b/swh/storage/sql/upgrades/173.sql similarity index 100% rename from sql/upgrades/173.sql rename to swh/storage/sql/upgrades/173.sql diff --git a/sql/upgrades/174.sql b/swh/storage/sql/upgrades/174.sql similarity index 100% rename from sql/upgrades/174.sql rename to swh/storage/sql/upgrades/174.sql diff --git a/sql/upgrades/175.sql b/swh/storage/sql/upgrades/175.sql similarity index 100% rename from sql/upgrades/175.sql rename to swh/storage/sql/upgrades/175.sql diff --git a/sql/upgrades/176.sql b/swh/storage/sql/upgrades/176.sql similarity index 100% rename from sql/upgrades/176.sql rename to swh/storage/sql/upgrades/176.sql diff --git a/sql/upgrades/177.sql b/swh/storage/sql/upgrades/177.sql similarity index 100% rename from sql/upgrades/177.sql rename to swh/storage/sql/upgrades/177.sql diff --git a/sql/upgrades/178.sql b/swh/storage/sql/upgrades/178.sql similarity index 100% rename from sql/upgrades/178.sql rename to swh/storage/sql/upgrades/178.sql diff --git a/sql/upgrades/179.sql b/swh/storage/sql/upgrades/179.sql similarity index 100% rename from sql/upgrades/179.sql rename to swh/storage/sql/upgrades/179.sql diff --git a/sql/upgrades/180.sql b/swh/storage/sql/upgrades/180.sql similarity index 100% rename from sql/upgrades/180.sql rename to swh/storage/sql/upgrades/180.sql diff --git a/sql/upgrades/181.sql b/swh/storage/sql/upgrades/181.sql similarity index 100% rename from sql/upgrades/181.sql rename to swh/storage/sql/upgrades/181.sql diff --git a/sql/upgrades/182.sql b/swh/storage/sql/upgrades/182.sql similarity index 100% rename from sql/upgrades/182.sql rename to swh/storage/sql/upgrades/182.sql