diff --git a/.gitignore b/.gitignore index 21c4bfbf..31bd6c3d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,15 +1,20 @@ *.pyc *.sw? *~ .coverage .eggs/ __pycache__ *.egg-info/ version.txt /analysis.org /swh/deposit/fixtures/private_data.yaml /swh/deposit.json /test.json /swh/test db.sqlite3 /.noseids +*.tgz +*.zip +*.tar.gz +*.tar.bz2 +*.tar.lzma diff --git a/PKG-INFO b/PKG-INFO index 96b7393e..69b6a6b6 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,11 +1,10 @@ -Metadata-Version: 2.1 +Metadata-Version: 1.0 Name: swh.deposit -Version: 0.0.43 +Version: 0.0.44 Summary: Software Heritage Deposit Server Home-page: https://forge.softwareheritage.org/source/swh-deposit/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Description: UNKNOWN Platform: UNKNOWN -Provides-Extra: loader diff --git a/bin/swh-deposit b/bin/swh-deposit new file mode 100755 index 00000000..89366915 --- /dev/null +++ b/bin/swh-deposit @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +# Use: ./swh-deposit --help +# +# Documentation: https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html + +python3 -m swh.deposit.client.cli $@ diff --git a/debian/control b/debian/control index 6503a32f..5bb1ce24 100644 --- a/debian/control +++ b/debian/control @@ -1,46 +1,56 @@ Source: swh-deposit Maintainer: Software Heritage developers Section: python Priority: optional Build-Depends: debhelper (>= 9), dh-python (>= 2), python3-setuptools, python3-all, python3-nose, python3-django-nose, python3-vcversioner, python3-swh.core (>= 0.0.36~), python3-swh.model (>= 0.0.21~), python3-swh.loader.core (>= 0.0.27~), python3-swh.loader.tar (>= 0.0.32~), python3-swh.scheduler (>= 0.0.19~), python3-django, python3-click, python3-vcversioner, python3-djangorestframework, python3-djangorestframework-xml, python3-requests, + python3-lxml, patool Standards-Version: 3.9.6 Homepage: https://forge.softwareheritage.org/source/swh-deposit/ Package: python3-swh.deposit Architecture: all Depends: python3-swh.core (>= 0.0.36~), python3-swh.model (>= 0.0.21~), python3-swh.scheduler (>= 0.0.19~), patool, ${misc:Depends}, ${python3:Depends} Description: Software Heritage Deposit Server +Package: python3-swh.deposit.client +Architecture: all +Depends: python3-swh.core (>= 0.0.36~), + python3-swh.model (>= 0.0.21~), + python3-requests, + python3-lxml, + ${misc:Depends}, ${python3:Depends} +Description: Software Heritage Deposit Api Client + Package: python3-swh.deposit.loader Conflict: python3-swh.deposit.injection Architecture: all -Depends: python3-swh.core (>= 0.0.36~), +Depends: python3-swh.deposit.client (= ${binary:Version}), + python3-swh.core (>= 0.0.36~), python3-swh.model (>= 0.0.21~), python3-swh.loader.core (>= 0.0.27~), python3-swh.loader.tar (>= 0.0.32~), python3-swh.scheduler (>= 0.0.19~), - python3-requests, ${misc:Depends}, ${python3:Depends} Description: Software Heritage Deposit Loader diff --git a/debian/rules b/debian/rules index a4600d2a..77f54459 100755 --- a/debian/rules +++ b/debian/rules @@ -1,19 +1,22 @@ #!/usr/bin/make -f export PYBUILD_NAME=swh.deposit export PYBUILD_TEST_ARGS=--with-doctest -sv -a !db,!fs %: dh $@ --with python3 --buildsystem=pybuild override_dh_install: dh_install rm -v $(CURDIR)/debian/python3-*/usr/lib/python*/dist-packages/swh/__init__.py for pyvers in $(shell py3versions -vr); do \ mkdir -p $(CURDIR)/debian/python3-swh.deposit.loader/usr/lib/python$$pyvers/dist-packages/swh/deposit/loader ; \ mv $(CURDIR)/debian/python3-swh.deposit/usr/lib/python$$pyvers/dist-packages/swh/deposit/loader/* \ $(CURDIR)/debian/python3-swh.deposit.loader/usr/lib/python$$pyvers/dist-packages/swh/deposit/loader/ ; \ + mkdir -p $(CURDIR)/debian/python3-swh.deposit.client/usr/lib/python$$pyvers/dist-packages/swh/deposit/client ; \ + mv $(CURDIR)/debian/python3-swh.deposit/usr/lib/python$$pyvers/dist-packages/swh/deposit/client/* \ + $(CURDIR)/debian/python3-swh.deposit.client/usr/lib/python$$pyvers/dist-packages/swh/deposit/client/ ; \ done override_dh_auto_test: diff --git a/docs/blueprint.rst b/docs/blueprint.rst new file mode 100644 index 00000000..1fa91cd9 --- /dev/null +++ b/docs/blueprint.rst @@ -0,0 +1,114 @@ +Use cases +--------- + + +Deposit creation +~~~~~~~~~~~~~~~~ + +From client's deposit repository server to SWH's repository server: + +1. The client requests for the server's abilities and its associated collection + (GET query to the *SD/service document uri*) + +2. The server answers the client with the service document which gives the + *collection uri* (also known as *COL/collection IRI*). + +3. The client sends a deposit (optionally a zip archive, some metadata or both) + through the *collection uri*. + + This can be done in: + + * one POST request (metadata + archive). + * one POST request (metadata or archive) + other PUT or POST request to the + *update uris* (*edit-media iri* or *edit iri*) + + 1. Server validates the client's input or returns detailed error if any + + 2. Server stores information received (metadata or software archive source + code or both) + +4. The server notifies the client it acknowledged the client's request. An + ``http 201 Created`` response with a deposit receipt in the body response is + sent back. That deposit receipt will hold the necessary information to + eventually complete the deposit later on if it was incomplete (also known as + status ``partial``). + +Schema representation +^^^^^^^^^^^^^^^^^^^^^ + +.. raw:: html + + + +.. figure:: /images/deposit-create-chart.png + :alt: + + +Updating an existing deposit +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +5. Client updates existing deposit through the *update uris* (one or more POST + or PUT requests to either the *edit-media iri* or *edit iri*). + + 1. Server validates the client's input or returns detailed error if any + + 2. Server stores information received (metadata or software archive source + code or both) + + This would be the case for example if the client initially posted a + ``partial`` deposit (e.g. only metadata with no archive, or an archive + without metadata, or a splitted archive because the initial one exceeded + the limit size imposed by swh repository deposit) + +Schema representation +^^^^^^^^^^^^^^^^^^^^^ + +.. raw:: html + + + +.. figure:: /images/deposit-update-chart.png + :alt: + +Deleting deposit (or associated archive, or associated metadata) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +6. Deposit deletion is possible as long as the deposit is still in ``partial`` + state. + + 1. Server validates the client's input or returns detailed error if any + 2. Server actually delete information according to request + +Schema representation +^^^^^^^^^^^^^^^^^^^^^ + +.. raw:: html + + + +.. figure:: /images/deposit-delete-chart.png + :alt: + +Client asks for operation status +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +7. Operation status can be read through a GET query to the *state iri*. + +Server: Triggering deposit checks +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Once the status ``deposited`` is reached for a deposit, checks for the +associated archive(s) and metadata will be triggered. If those checks +fail, the status is changed to ``rejected`` and nothing more happens +there. Otherwise, the status is changed to ``verified``. + +Server: Triggering deposit load +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Once the status ``verified`` is reached for a deposit, loading the +deposit with its associated metadata will be triggered. + +The loading will result on status update, either ``done`` or ``failed`` +(depending on the loading's status). + +This is described in the `loading document <./spec-loading.html>`__. diff --git a/docs/dev-info.md b/docs/dev-info.md deleted file mode 100644 index bce40fe7..00000000 --- a/docs/dev-info.md +++ /dev/null @@ -1,162 +0,0 @@ -# Develop on swh-deposit - -There are multiple modes to run and test the server locally: -- development-like (automatic reloading when code changes) -- production-like (no reloading) -- integration tests (no side effects) - -Except for the tests which are mostly side effects free (except for -the database access), the other modes will need some configuration -files (up to 2) to run properly. - -## Database - -swh-deposit uses a database to store the state of a deposit. -The default db is expected to be called swh-deposit-dev. - -To simplify the use, the following makefile targets can be used: - -### schema - -``` Shell -make db-create db-prepare db-migrate -``` - -### data - -Once the db is created, you need some data to be injected (request -types, client, collection, etc...): - -``` Shell -make db-load-data db-load-private-data -``` - -The private data are about having a user (`hal`) with a password -(`hal`) who can access a collection (`hal`). - -Add the following to `../private-data.yaml`: - -``` YAML -- model: deposit.depositclient - fields: - user_ptr_id: 1 - collections: - - 1 -- model: auth.User - pk: 1 - fields: - first_name: hal - last_name: hal - username: hal - password: "pbkdf2_sha256$30000$8lxjoGc9PiBm$DO22vPUJCTM17zYogBgBg5zr/97lH4pw10Mqwh85yUM=" -- model: deposit.depositclient - fields: - user_ptr_id: 1 - collections: - - 1 - url: https://hal.inria.fr - -``` - -### drop - -For information, you can drop the db: - -``` Shell -make db-drop -``` - -## Development-like environment - -Development-like environment needs one configuration file to work -properly. - -### Configuration - -**`{/etc/softwareheritage | ~/.config/swh | ~/.swh}`/deposit/server.yml**: - -``` YAML -# dev option for running the server locally -host: 127.0.0.1 -port: 5006 - -# production -authentication: - activated: true - white-list: - GET: - - / - -# 20 Mib max size -max_upload_size: 20971520 - -``` - -### Run - -Run the local server, using the default configuration file: - -``` Shell -make run-dev -``` - -## Production-like environment - -Production-like environment needs two configuration files to work -properly. - -This is more close to what's actually running in production. - -### Configuration - -This expects the same file describes in the previous chapter. Plus, -an additional private **settings.yml** file containing secret -information that is not in the source code repository. - -**`{/etc/softwareheritage | ~/.config/swh | ~/.swh}`/deposit/private.yml**: - -``` YAML -secret_key: production-local -db: - name: swh-deposit-dev -``` - -A production configuration file would look like: - -``` YAML -secret_key: production-secret-key -db: - name: swh-deposit-dev - host: db - port: 5467 - user: user - password: user-password -``` - -### Run - -``` Shell -make run -``` - -Note: This expects gunicorn3 package installed on the system - -## Tests - -To run the tests: -``` Shell -make test -``` - -As explained, those tests are mostly side-effect free. The db part is -dealt with by django. The remaining part which patches those -side-effect behavior is dealt with in the -`swh/deposit/tests/__init__.py` module. - -## Sum up - -Prepare everything for your user to run: - -``` Shell -make db-drop db-create db-prepare db-migrate db-load-private-data run-dev -``` diff --git a/docs/dev-info.rst b/docs/dev-info.rst new file mode 100644 index 00000000..6e0a02bc --- /dev/null +++ b/docs/dev-info.rst @@ -0,0 +1,174 @@ +Hacking on swh-deposit +====================== + +There are multiple modes to run and test the server locally: + +* development-like (automatic reloading when code changes) +* production-like (no reloading) +* integration tests (no side effects) + +Except for the tests which are mostly side effects free (except for the +database access), the other modes will need some configuration files (up to 2) +to run properly. + +Database +-------- + +swh-deposit uses a database to store the state of a deposit. The default +db is expected to be called swh-deposit-dev. + +To simplify the use, the following makefile targets can be used: + +schema +~~~~~~ + +.. code:: shell + + make db-create db-prepare db-migrate + +data +~~~~ + +Once the db is created, you need some data to be injected (request +types, client, collection, etc...): + +.. code:: shell + + make db-load-data db-load-private-data + +The private data are about having a user (``hal``) with a password +(``hal``) who can access a collection (``hal``). + +Add the following to ``../private-data.yaml``: + +.. code:: yaml + + - model: deposit.depositclient + fields: + user_ptr_id: 1 + collections: + - 1 + - model: auth.User + pk: 1 + fields: + first_name: hal + last_name: hal + username: hal + password: "pbkdf2_sha256$30000$8lxjoGc9PiBm$DO22vPUJCTM17zYogBgBg5zr/97lH4pw10Mqwh85yUM=" + - model: deposit.depositclient + fields: + user_ptr_id: 1 + collections: + - 1 + url: https://hal.inria.fr + +drop +~~~~ + +For information, you can drop the db: + +.. code:: shell + + make db-drop + +Development-like environment +---------------------------- + +Development-like environment needs one configuration file to work +properly. + +Configuration +~~~~~~~~~~~~~ + +**``{/etc/softwareheritage | ~/.config/swh | ~/.swh}``/deposit/server.yml**: + +.. code:: yaml + + # dev option for running the server locally + host: 127.0.0.1 + port: 5006 + + # production + authentication: + activated: true + white-list: + GET: + - / + + # 20 Mib max size + max_upload_size: 20971520 + +Run +~~~ + +Run the local server, using the default configuration file: + +.. code:: shell + + make run-dev + +Production-like environment +--------------------------- + +Production-like environment needs two configuration files to work +properly. + +This is more close to what's actually running in production. + +Configuration +~~~~~~~~~~~~~ + +This expects the same file describes in the previous chapter. Plus, an +additional private **settings.yml** file containing secret information +that is not in the source code repository. + +**``{/etc/softwareheritage | ~/.config/swh | ~/.swh}``/deposit/private.yml**: + +.. code:: yaml + + secret_key: production-local + db: + name: swh-deposit-dev + +A production configuration file would look like: + +.. code:: yaml + + secret_key: production-secret-key + db: + name: swh-deposit-dev + host: db + port: 5467 + user: user + password: user-password + +Run +~~~ + +.. code:: shell + + make run + +Note: This expects gunicorn3 package installed on the system + +Tests +----- + +To run the tests: + +.. code:: shell + + make test + +As explained, those tests are mostly side-effect free. The db part is +dealt with by django. The remaining part which patches those side-effect +behavior is dealt with in the ``swh/deposit/tests/__init__.py`` module. + +Sum up +------ + +Prepare everything for your user to run: + +.. code:: shell + + make db-drop db-create db-prepare db-migrate db-load-private-data run-dev diff --git a/docs/getting-started.md b/docs/getting-started.md deleted file mode 100644 index 83a1435a..00000000 --- a/docs/getting-started.md +++ /dev/null @@ -1,333 +0,0 @@ -# Getting Started - -This is a getting started to demonstrate the deposit api use case with -a shell client. - -The api is rooted at https://deposit.softwareheritage.org. - -For more details, see the [main documentation](./index.html). - -## Requirements - -You need to be referenced on SWH's client list to have: -- a credential (needed for the basic authentication step). -- an associated collection - -[Contact us for more information.](https://www.softwareheritage.org/contact/) - -## Demonstration - -For the rest of the document, we will: -- reference `` as the client and `` as its -associated authentication password. -- use curl as example on how to request the api. -- present the main deposit use cases. - -The use cases are: - -- one single deposit step: The user posts in one query (one deposit) a - software source code archive and associated metadata (deposit is - finalized with status `deposited`). - - This will demonstrate the multipart query. - -- another 3-steps deposit (which can be extended as more than 2 - steps): - 1. Create an incomplete deposit (status `partial`) - 2. Update a deposit (and finalize it, so the status becomes - `deposited`) - 3. Check the deposit's state - - This will demonstrate the stateful nature of the sword protocol. - -Those use cases share a common part, they must start by requesting the -`service document iri` (internationalized resource identifier) for -information about the collection's location. - -### Common part - Start with the service document - -First, to determine the *collection iri* onto which deposit data, the -client needs to ask the server where is its *collection* located. That -is the role of the *service document iri*. - -For example: - -``` Shell -curl -i --user : https://deposit.softwareheritage.org/1/servicedocument/ -``` - -If everything went well, you should have received a response similar -to this: - -``` Shell -HTTP/1.0 200 OK -Server: WSGIServer/0.2 CPython/3.5.3 -Content-Type: application/xml - - - - - 2.0 - 209715200 - - - The Software Heritage (SWH) Archive - - Software Collection - application/zip - application/x-tar - Collection Policy - Software Heritage Archive - Collect, Preserve, Share - false - http://purl.org/net/sword/package/SimpleZip - https://deposit.softwareheritage.org/1// - - - -``` - -Explaining the response: -- `HTTP/1.0 200 OK`: the query is successful and returns a body response -- `Content-Type: application/xml`: The body response is in xml format -- `body response`: it is a service document describing that the client - `` has a collection named ``. That - collection is available at the *collection iri* - `/1//` (through POST query). - -At this level, if something went wrong, this should be authentication related. -So the response would have been a 401 Unauthorized access. -Something like: - -``` Shell -curl -i https://deposit.softwareheritage.org/1// -HTTP/1.0 401 Unauthorized -Server: WSGIServer/0.2 CPython/3.5.3 -Content-Type: application/xml -WWW-Authenticate: Basic realm="" -X-Frame-Options: SAMEORIGIN - - - - Access to this api needs authentication - processing failed - - -``` - -### Single deposit - -A single deposit translates to a multipart deposit request. - -This means, in swh's deposit's terms, sending exactly one POST query -with: -- 1 archive (content-type `application/zip` or `application/x-tar`) -- 1 atom xml content (`content-type: application/atom+xml;type=entry`) - -The supported archive, for now are limited to zip files. Those -archives are expected to contain some form of software source -code. The atom entry content is some xml defining metadata about that -software. - -Example of minimal atom entry file: - -``` XML - - - Title - urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a - 2005-10-07T17:17:08Z - Contributor - The abstract - - - The abstract - Access Rights - Alternative Title - Date Available - Bibliographic Citation - Contributor - Description - Has Part - Has Version - Identifier - Is Part Of - Publisher - References - Rights Holder - Source - Title - Type - -``` - -Once the files are ready for deposit, we want to do the actual deposit -in one shot. - -For this, we need to provide: -- the contents and their associated correct content-types -- either the header `In-Progress` to false (meaning, it's finished -after this query) or nothing (the server will assume it's not in -progress if not present). -- Optionally, the `Slug` header, which is a reference to a unique -identifier the client knows about and wants to provide us. - -You can do this with the following command: - -``` Shell -curl -i --user : \ - -F "file=@deposit.zip;type=application/zip;filename=payload" \ - -F "atom=@atom-entry.xml;type=application/atom+xml;charset=UTF-8" \ - -H 'In-Progress: false' \ - -H 'Slug: some-external-id' \ - -XPOST https://deposit.softwareheritage.org/1// -``` - -You just posted a deposit to the collection -https://deposit.softwareheritage.org/1//. - -If everything went well, you should have received a response similar -to this: - -``` Shell -HTTP/1.0 201 Created -Server: WSGIServer/0.2 CPython/3.5.3 -Location: /1//10/metadata/ -Content-Type: application/xml - - - 9 - Sept. 26, 2017, 10:11 a.m. - payload - deposited - - - - - - - - - - - http://purl.org/net/sword/package/SimpleZip - -``` - -Explaining this response: -- `HTTP/1.0 201 Created`: the deposit is successful -- `Location: /1//10/metadata/`: the EDIT-SE-IRI through which we can - update a deposit -- body response: it is a deposit receipt detailing all endpoints - available to manipulate the deposit (update, replace, delete, - etc...) It also explains the deposit identifier to be 9 (which is - useful for the remaining example). - -Note: As the deposit is in `deposited` status, you cannot actually -update anything after this query. Well, the client can try, but it -will be answered with a 403 forbidden answer. - -### Multi-steps deposit - -#### Create a deposit - -We will use the collection IRI again as the starting point. - -We need to explicitely give to the server information about: -- the deposit's completeness (through header `In-Progress` to true, as - we want to do in multiple steps now). -- archive's md5 hash (through header `Content-MD5`) -- upload's type (through the headers `Content-Disposition` and - `Content-Type`) - -The following command: - -``` Shell -curl -i --user : \ - --data-binary @swh/deposit.tar.gz \ - -H 'In-Progress: true' \ - -H 'Content-MD5: 0faa1ecbf9224b9bf48a7c691b8c2b6f' \ - -H 'Content-Disposition: attachment; filename=[deposit.tar.gz]' \ - -H 'Slug: some-external-id' \ - -H 'Packaging: http://purl.org/net/sword/package/SimpleZIP' \ - -H 'Content-type: application/zip' \ - -XPOST https://deposit.softwareheritage.org/1// -``` - -The expected answer is the same as the previous sample. - -#### Update deposit's metadata - -To update a deposit, we can either add some more archives, some more -metadata or replace existing ones. - -As we don't have defined metadata yet (except for the `slug` header), -we can add some to the `EDIT-SE-IRI` endpoint (/1//10/metadata/). -That information is extracted from the deposit receipt sample. - -Using here the same atom-entry.xml file presented in previous chapter. - -For example, here is the command to update deposit metadata: - -``` Shell -curl -i --user : --data-binary @atom-entry.xml \ --H 'In-Progress: true' \ --H 'Slug: some-external-id' \ --H 'Content-Type: application/atom+xml;type=entry' \ --XPOST https://deposit.softwareheritage.org/1//10/metadata/ -HTTP/1.0 201 Created -Server: WSGIServer/0.2 CPython/3.5.3 -Location: /1//10/metadata/ -Content-Type: application/xml - - - 10 - Sept. 26, 2017, 10:32 a.m. - None - partial - - - - - - - - - - - http://purl.org/net/sword/package/SimpleZip - -``` - -#### Check the deposit's state - -You need to check the STATE-IRI endpoint (/1//10/status/). - -``` Shell -curl -i --user : https://deposit.softwareheritage.org/1//10/status/ -HTTP/1.0 200 OK -Date: Wed, 27 Sep 2017 08:25:53 GMT -Content-Type: application/xml -``` - -Response: - -``` XML - - 9 - deposited - deposit is fully received and ready for loading - - -``` diff --git a/docs/getting-started.rst b/docs/getting-started.rst new file mode 100644 index 00000000..e6c5ecb5 --- /dev/null +++ b/docs/getting-started.rst @@ -0,0 +1,291 @@ +Getting Started +=============== + +This is a guide for how to prepare and push a software deposit with +the swh-deposit commands. + +The api is rooted at https://deposit.softwareheritage.org. + +For more details, see the `main documentation <./index.html>`__. + +Requirements +------------ + +You need to be referenced on SWH's client list to have: + +* credentials (needed for the basic authentication step) + + - in this document we reference ```` as the client's name and + ```` as its associated authentication password. + + * an associated collection + + +`Contact us for more +information. `__ + +Prepare a deposit +----------------- +* compress the files in a supported archive format: + + - zip: common zip archive (no multi-disk zip files). + - tar: tar archive without compression or optionally any of the + following compression algorithm gzip (.tar.gz, .tgz), bzip2 + (.tar.bz2) , or lzma (.tar.lzma) +* prepare a metadata file (`more details <./metadata.html>`__.): + + - specify metadata schema/vocabulary (CodeMeta is recommended) + - specify *MUST* metadata (url, authors, software name and + the external\_identifier) + - add all available information under the compatible metadata term + + An example of an atom entry file with CodeMeta terms: + +.. code:: xml + + + + Je suis GPL + 12345 + forge.softwareheritage.org/source/jesuisgpl/ + Yes, this is another implementation of + "Hello, world!” when you run it. + + GPL + https://www.gnu.org/licenses/gpl.html + + + Reuben Thomas + Maintainer + + + Sami Kerola + Maintainer + + + + +Push deposit +------------ +You can push a deposit with: + +* a one single deposit (archive + metadata): + + The user posts in one query a software + source code archive and associated metadata. + The deposit is directly marked with status ``deposited``. +* a multisteps deposit: + + 1. Create an incomplete deposit (marked with status ``partial``) + 2. Add data to a deposit (in multiple requests if needed) + 3. Finalize deposit (the status becomes ``deposited``) + + +Single deposit +^^^^^^^^^^^^^^ + + +Once the files are ready for deposit, we want to do the actual deposit +in one shot, sending exactly one POST query: + +* 1 archive (content-type ``application/zip`` or ``application/x-tar``) +* 1 metadata file in atom xml format (``content-type: application/atom+xml;type=entry``) + +For this, we need to provide the: + +* arguments: ``--username 'name' --password 'pass'`` as credentials +* archive's path (example: ``--archive path/to/archive-name.tgz``) : +* (optionally) metadata file's path ``--metadata + path/to/file.metadata.xml``. If not provided, the archive's filename + will be used to determine the metadata file, e.g: + ``path/to/archive-name.tgz.metadata.xml`` +* (optionally) ``--slug 'your-id'`` argument, a reference to a + unique identifier the client uses for the software object. + +You can do this with the following command: + +minimal deposit + +.. code:: shell + + $ swh-deposit ---username name --password secret \ + --archive je-suis-gpl.tgz + +with client's external identifier (``slug``) + +.. code:: shell + + $ swh-deposit --username name --password secret \ + --archive je-suis-gpl.tgz \ + --slug 123456 + +to a specific client's collection + +.. code:: shell + + $ swh-deposit --username name --password secret \ + --archive je-suis-gpl.tgz \ + --collection 'second-collection' + + + +You just posted a deposit to your collection on Software Heritage + + +If everything went well, the successful response will contain the +elements below: + +.. code:: shell + + { + 'deposit_status': 'deposited', + 'deposit_id': '7', + 'deposit_date': 'Jan. 29, 2018, 12:29 p.m.' + } + +Note: As the deposit is in ``deposited`` status, you can no longer +update the deposit after this query. It will be answered with a 403 +forbidden answer. + +If something went wrong, an equivalent response will be given with the +`error` and `detail` keys explaining the issue, e.g.: + +.. code:: shell + + { + 'error': 'Unknown collection name xyz', + 'detail': None, + 'deposit_status': None, + 'deposit_status_detail': None, + 'deposit_swh_id': None, + 'status': 404 + } + + + +multisteps deposit +^^^^^^^^^^^^^^^^^^^^^^^^^ +The steps to create a multisteps deposit: + +1. Create an incomplete deposit +~~~~~~~~~~~~~~~~~~~ +First use the ``--partial`` argument to declare there is more to come + +.. code:: shell + + $ swh-deposit --username name --password secret --partial \ + --archive foo.tar.gz + + +2. Add content or metadata to the deposit +~~~~~~~~~~~~~~~~~~~ +Continue the deposit by using the ``--deposit-id`` argument given as a response +for the first step. You can continue adding content or metadata while you use +the ``--partial`` argument. + +.. code:: shell + + $ swh-deposit --username name --password secret --partial \ + --archive add-foo.tar.gz \ + --deposit-id 42 + + +In case you want to add only one new archive without metadata: + +.. code:: shell + + $ swh-deposit --username name --password secret --partial \ + --archive add-foo.tar.gz \ + --archive-deposit + --deposit-id 42 + +If you want to add only metadata, use: + +.. code:: shell + + $ swh-deposit --username name --password secret --partial \ + --metadata add-foo.tar.gz.metadata.xml \ + --metadata-deposit + --deposit-id 42 + +3. Finalize deposit +~~~~~~~~~~~~~~~~~~~ +On your last addition, by not declaring it as ``--partial``, the +deposit will be considered as completed and its status will be changed +to ``deposited``. + + + +Update deposit +---------------- +* replace deposit: + + - only possible if the deposit status is ``partial`` and + ``--deposit-id `` is provided + - by using the ``--replace`` flag + - ``--metadata-deposit`` replaces associated existing metadata + - ``--archive-deposit`` replaces associated archive(s) + - by default, with no flag or both, you'll replace associated + metadata and archive(s) + +.. code:: shell + + $ swh-deposit --username name --password secret --replace\ + --deposit-id 11 \ + --archive updated-je-suis-gpl.tar.gz + +* update a loaded deposit with a new version: + + - by using the external-id with the ``--slug`` argument, you will + link the new deposit with its parent deposit + +.. code:: shell + + $ swh-deposit --username name --password secret --slug '123456' \ + --archive je-suis-gpl-v2.tgz + + + +Check the deposit's status +-------------------------- + +You can check the status of the deposit by using the ``--deposit-id`` argument: + +.. code:: shell + +$ swh-deposit --username name --password secret --deposit-id '11' --status + +.. code:: json + + { + 'deposit_id': '11', + 'deposit_status': 'deposited', + 'deposit_swh_id': None, + 'deposit_status_detail': 'Deposit is ready for additional checks \ + (tarball ok, metadata, etc...)' + } + +The different statuses: + +- *partial* : multipart deposit is still ongoing +- *deposited*: deposit completed +- *rejected*: deposit failed the checks +- *verified*: content and metadata verified +- *loading*: loading in-progress +- *done*: loading completed successfully +- *failed*: the deposit loading has failed + +When the deposit has been loaded into the archive, the status will be +marked ``done``. In the response, will also be available the +. For example: + +.. code:: json + + { + 'deposit_id': '11', + 'deposit_status': 'done', + 'deposit_swh_id': 'swh:1:rev:34898aa991c90b447c27d2ac1fc09f5c8f12783e', + 'deposit_status_detail': 'The deposit has been successfully \ + loaded into the Software Heritage archive' + } diff --git a/docs/index.rst b/docs/index.rst index 9ec3e948..98965b86 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,22 +1,22 @@ .. _swh-deposit: Software Heritage Deposit ========================= .. toctree:: - :maxdepth: 3 + :maxdepth: 1 :caption: Contents: - getting-started.md - spec-api.md - metadata.md - spec-loading.md - dev-info.md - sys-info.md + getting-started.rst + spec-api.rst + metadata.rst + spec-loading.rst + dev-info.rst + sys-info.rst Indices and tables ================== * :ref:`genindex` * :ref:`modindex` * :ref:`search` diff --git a/docs/metadata.md b/docs/metadata.md deleted file mode 100644 index c3cb6073..00000000 --- a/docs/metadata.md +++ /dev/null @@ -1,166 +0,0 @@ -# Deposit metadata - -When making a software deposit into the SWH archive, one can add information -describing the software artifact and the software project. -and the metadata will be translated to the [CodeMeta v.2](https://doi.org/10.5063/SCHEMA/CODEMETA-2.0) vocabulary -if possible. - -## Metadata requirements - -MUST -- **the schema/vocabulary** used *MUST* be specified with a persistent url -(DublinCore, DOAP, CodeMeta, etc.) -```XML - -or - -or - -``` -- **the url** representing the location of the source *MUST* be provided -under the url tag. The url will be used for creating an origin object in the -archive. -```XML -www.url-example.com -or -www.url-example.com -or -www.url-example.com -``` -- **the external_identifier** *MUST* be provided as an identifier -- **the name** of the software deposit *MUST* be provided -[atom:title, codemeta:name, dcterms:title] -- **the authors** of the software deposit *MUST* be provided - - -SHOULD -- **the external_identifier** *SHOULD* match the Slug external-identifier in -the header -- **the description** of the software deposit *SHOULD* be provided -[codemeta:description] - short or long description of the software -- **the license/s** of the software deposit *SHOULD* be provided -[codemeta:license] - - -MAY -- other metadata *MAY* be added with terms defined by the schema in use. - -## Examples -### Using only Atom -```XML - - - Awesome Compiler - urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a - 1785io25c695 - 2017-10-07T15:17:08Z - some awesome author - -``` -### Using Atom with CodeMeta -```XML - - - Awesome Compiler - urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a - 1785io25c695 - 1785io25c695 - origin url - other identifier, DOI, ARK - Domain - - description - key-word 1 - key-word 2 - creation date - publication date - comment - - article name - article id - - - Collaboration/Projet - project name - id - - see also - Sponsor A - Sponsor B - Platform/OS - dependencies - Version - active - - license - url spdx - - .Net Framework 3.0 - Python2.3 - - author1 - Inria - UPMC - - - author2 - Inria - UPMC - - http://code.com - language 1 - language 2 - http://issuetracker.com - -``` -### Using Atom with DublinCore and CodeMeta (multi-schema entry) -``` XML - - - Awesome Compiler - hal - urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a - %s - hal-01587361 - doi:10.5281/zenodo.438684 - The assignment problem - AffectationRO - author - [INFO] Computer Science [cs] - [INFO.INFO-RO] Computer Science [cs]/Operations Research [cs.RO] - SOFTWARE - Project in OR: The assignment problemA java implementation for the assignment problem first release - description fr - 2015-06-01 - 2017-10-19 - en - - - origin url - - 1.0.0 - key word - Comment - Rfrence interne - - link - Sponsor - - Platform/OS - dependencies - Ended - - license - url spdx - - - http://code.com - language 1 - language 2 - -``` diff --git a/docs/metadata.rst b/docs/metadata.rst new file mode 100644 index 00000000..543128a3 --- /dev/null +++ b/docs/metadata.rst @@ -0,0 +1,187 @@ +Deposit metadata +================ + +When making a software deposit into the SWH archive, one can add +information describing the software artifact and the software project. + + +Metadata requirements +--------------------- + +- **the schema/vocabulary** used *MUST* be specified with a persistent url + (DublinCore, DOAP, CodeMeta, etc.) + + .. code:: xml + + + or + + or + + +- **the url** representing the location of the source *MUST* be provided under + the url tag. The url will be used for creating an origin object in the + archive. + + .. code:: xml + + www.url-example.com + or + www.url-example.com + or + www.url-example.com + +- **the external\_identifier** *MUST* be provided as an identifier + +- **the name** of the software deposit *MUST* be provided [atom:title, + codemeta:name, dcterms:title] + +- **the authors** of the software deposit *MUST* be provided + +- **the external\_identifier** *SHOULD* match the Slug external-identifier in + the header + +- **the description** of the software deposit *SHOULD* be provided + [codemeta:description] + +- short or long description of the software - **the license/s** of the software + deposit *SHOULD* be provided [codemeta:license] + +- other metadata *MAY* be added with terms defined by the schema in use. + +Examples +-------- + +Using only Atom +~~~~~~~~~~~~~~~ + +.. code:: xml + + + + Awesome Compiler + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 1785io25c695 + 2017-10-07T15:17:08Z + some awesome author + + +Using Atom with CodeMeta +~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code:: xml + + + + Awesome Compiler + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 1785io25c695 + 1785io25c695 + origin url + other identifier, DOI, ARK + Domain + + description + key-word 1 + key-word 2 + creation date + publication date + comment + + article name + article id + + + Collaboration/Projet + project name + id + + see also + Sponsor A + Sponsor B + Platform/OS + dependencies + Version + active + + license + url spdx + + .Net Framework 3.0 + Python2.3 + + author1 + Inria + UPMC + + + author2 + Inria + UPMC + + http://code.com + language 1 + language 2 + http://issuetracker.com + + +Using Atom with DublinCore and CodeMeta (multi-schema entry) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code:: xml + + + + Awesome Compiler + hal + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + %s + hal-01587361 + doi:10.5281/zenodo.438684 + The assignment problem + AffectationRO + author + [INFO] Computer Science [cs] + [INFO.INFO-RO] Computer Science [cs]/Operations Research [cs.RO] + SOFTWARE + Project in OR: The assignment problemA java implementation for the assignment problem first release + description fr + 2015-06-01 + 2017-10-19 + en + + + origin url + + 1.0.0 + key word + Comment + Rfrence interne + + link + Sponsor + + Platform/OS + dependencies + Ended + + license + url spdx + + + http://code.com + language 1 + language 2 + + +Note +---- +We aim on harmonizing the metadata from different origins and thus +metadata will be translated to the `CodeMeta +v.2 `__ vocabulary if +possible. diff --git a/docs/spec-api.md b/docs/spec-api.md deleted file mode 100644 index b57785d3..00000000 --- a/docs/spec-api.md +++ /dev/null @@ -1,810 +0,0 @@ -# API Specification - -This is [Software Heritage](https://www.softwareheritage.org)'s -[SWORD 2.0](http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html) Server -implementation. - -**S.W.O.R.D** (**S**imple **W**eb-Service **O**ffering **R**epository -**D**eposit) is an interoperability standard for digital file deposit. - -This implementation will permit interaction between a client (a -repository) and a server (SWH repository) to permit deposits of -software source code archives and associated metadata. - -*Note:* - - In the following document, we will use the `archive` or `software - source code archive` interchangeably. - - The supported archive formats are: - - zip: common zip archive (no multi-disk zip files). - - tar: tar archive without compression or optionally any of the - following compression algorithm gzip (.tar.gz, .tgz), bzip2 - (.tar.bz2) , or lzma (.tar.lzma) - - -## Collection - -SWORD defines a `collection` concept. In SWH's case, this collection -refers to a group of deposits. A `deposit` is some form of software -source code archive(s) associated with metadata. - -*Note:* It may be multiple archives if one archive is too big and must be -splitted into multiple smaller ones. - -### Example - -As part of the -[HAL](https://hal.archives-ouvertes.fr/)-[SWH](https://www.softwareheritage.org) -collaboration, we define a `HAL collection` to which the `hal` client -will have access to. - -## Limitations - -We will not have a fully compliant SWORD 2.0 protocol at first, so -voluntary implementation shortcomings can exist, for example, only zip -tarballs will be accepted. - -Other more permanent limitations exists: -- upload limitation of 100Mib -- no mediation - -## Endpoints - -Here are the defined endpoints this document will refer to from this -point on: - -- `/1/servicedocument/` *service document iri* (a.k.a [SD-IRI](#sd-iri-the-service-document-iri)) - - *Goal:* For a client to discover its collection's location - -- `/1//` *collection iri* (a.k.a [COL-IRI](#col-iri-the-collection-iri)) - - *Goal:*: create deposit to a collection - -- `/1///media/` *update iri* (a.k.a [EM-IRI](#em-iri-the-atom-edit-media-iri)) - - *Goal:*: Add or replace archive(s) to a deposit - -- `/1///metadata/` *update iri* (a.k.a [EDIT-IRI](#edit-iri-the-atom-entry-edit-iri) merged with [SE-IRI](#se-iri-the-sword-edit-iri)) - - *Goal:*: Add or replace metadata (and optionally archive(s) to a - deposit - - -- `/1///status/` *state iri* (a.k.a [STATE-IRI](#state-iri-the-sword-statement-iri)) - - *Goal:*: Display deposit's status in regards to loading - -- `/1///content/` *content iri* (a.k.a [CONT-FILE-IRI](#cont-iri-the-content-iri)) - - *Goal:*: Display information on the content's representation in the - sword server - -## Use cases - -### Deposit creation - -From client's deposit repository server to SWH's repository server: - -[1.] The client requests for the server's abilities and its associated -collection (GET query to the *SD/service document uri*) - -[2.] The server answers the client with the service document which gives - the *collection uri* (also known as *COL/collection IRI*). - -[3.] The client sends a deposit (optionally a zip archive, some metadata -or both) through the *collection uri*. - -This can be done in: -- one POST request (metadata + archive). -- one POST request (metadata or archive) + other PUT or POST request - to the *update uris* (*edit-media iri* or *edit iri*) - - [3.1.] Server validates the client's input or returns detailed error if any - - [3.2.] Server stores information received (metadata or software - archive source code or both) - -[4.] The server notifies the client it acknowledged the client's -request. An `http 201 Created` response with a deposit receipt in the -body response is sent back. That deposit receipt will hold the -necessary information to eventually complete the deposit later on if -it was incomplete (also known as status `partial`). - -#### Schema representation - - - -![](/images/deposit-create-chart.png) - -### Updating an existing deposit - -[5.] Client updates existing deposit through the *update uris* (one or -more POST or PUT requests to either the *edit-media iri* or *edit -iri*). - - [5.1.] Server validates the client's input or returns detailed error - if any - - [5.2.] Server stores information received (metadata or software - archive source code or both) - -This would be the case for example if the client initially posted a -`partial` deposit (e.g. only metadata with no archive, or an archive -without metadata, or a splitted archive because the initial one -exceeded the limit size imposed by swh repository deposit) - -#### Schema representation - - - -![](/images/deposit-update-chart.png) - -### Deleting deposit (or associated archive, or associated metadata) - -[6.] Deposit deletion is possible as long as the deposit is still in - `partial` state. - - [6.1.] Server validates the client's input or returns detailed error - if any - - [6.2.] Server actually delete information according to request - -#### Schema representation - - - -![](/images/deposit-delete-chart.png) - -### Client asks for operation status - -[7.] Operation status can be read through a GET query to the *state - iri*. - -### Server: Triggering deposit checks - -Once the status `deposited` is reached for a deposit, checks for the -associated archive(s) and metadata will be triggered. If those checks -fail, the status is changed to `rejected` and nothing more happens -there. Otherwise, the status is changed to `verified`. - -### Server: Triggering deposit load - -Once the status `verified` is reached for a deposit, loading the -deposit with its associated metadata will be triggered. - -The loading will result on status update, either `done` or `failed` -(depending on the loading's status). - -This is described in the [loading document](./spec-loading.html). - -## API overview - -API access is over HTTPS. - -The API is protected through basic authentication. - -The API endpoints are rooted at -[https://deposit.softwareheritage.org/1/](https://deposit.softwareheritage.org/1/). - -Data is sent and received as XML (as specified in the SWORD 2.0 specification). - -In the following chapters, we will described the different endpoints -[through the use cases described previously.](#use-cases) - -### [2] Service document - -Endpoint: GET /1/servicedocument/ - -This is the starting endpoint for the client to discover its initial -collection. The answer to this query will describes: -- the server's abilities -- connected client's collection information - -Also known as: [SD-IRI - The Service Document IRI](#sd-iri-the-service-document-iri). - -#### Sample request - -``` Shell -GET https://deposit.softwareheritage.org/1/servicedocument/ HTTP/1.1 -Host: deposit.softwareheritage.org -``` - -The server returns its abilities with the service document in xml format: -- protocol sword version v2 -- accepted mime types: application/zip (zip), application/x-tar (tar - archive with any of the following optional compression algorithm - gzip, bzip2, or lzma) -- upload max size accepted. Beyond that point, it's expected the - client splits its tarball into multiple ones -- the collection the client can act upon (swh supports only one - software collection per client) -- mediation is not supported -- etc... - -The current answer for example for the -[hal archive](https://hal.archives-ouvertes.fr/) is: - -``` XML - - - - 2.0 - 20971520 - - - The Software Heritage (SWH) archive - - SWH Software Archive - application/zip - application/x-tar - Collection Policy - Software Heritage Archive - false - false - Collect, Preserve, Share - http://purl.org/net/sword/package/SimpleZip - https://deposit.softwareheritage.org/1/hal/ - - - -``` - -### [3|5] Deposit creation/update - -The client can send deposit creation/update through a series of -deposit requests to the following endpoints: -- *collection iri* (COL-IRI) to initialize a deposit -- *update iris* (EM-IRI, EDIT-SE-IRI) to complete/finalize a deposit - -The deposit creation/update can also happens in one request. - -The deposit request can contain: -- an archive holding the software source code (binary upload) -- an envelop with metadata describing information regarding a deposit - (atom entry deposit) -- or both (multipart deposit, exactly one archive and one envelop). - -#### Request Types - -##### Binary deposit - -The client can deposit a binary archive, supplying the following headers: -- Content-Type (text): accepted mimetype -- Content-Length (int): tarball size -- Content-MD5 (text): md5 checksum hex encoded of the tarball -- Content-Disposition (text): attachment; filename=[filename] ; the filename - parameter must be text (ascii) -- Packaging (IRI): http://purl.org/net/sword/package/SimpleZip -- In-Progress (bool): true to specify it's not the last request, false - to specify it's a final request and the server can go on with - processing the request's information (if not provided, this is - considered false, so final). - -This is a single zip archive deposit. Almost no metadata is associated -with the archive except for the unique external identifier. - -*Note:* This kind of deposit should be `partial` (In-Progress: True) as -almost no metadata can be associated with the uploaded archive. - -##### API endpoints concerned - -POST /1// Create a first deposit with one - archive -PUT /1///media/ Replace existing archives -POST /1///media/ Add new archive - -##### Sample request - -``` Shell -curl -i -u hal: \ - --data-binary @swh/deposit.zip \ - -H 'In-Progress: false' -H 'Content-MD5: 0faa1ecbf9224b9bf48a7c691b8c2b6f' \ - -H 'Content-Disposition: attachment; filename=[deposit.zip]' \ - -H 'Slug: some-external-id' \ - -H 'Packaging: http://purl.org/net/sword/package/SimpleZIP' \ - -H 'Content-type: application/zip' \ - -XPOST https://deposit.softwareheritage.org/1/hal/ -``` - -#### Atom entry deposit - -The client can deposit an xml body holding metadata information on the -deposit. - -*Note:* This kind of deposit is mostly expected to be `partial` -(In-Progress: True) since no archive will be associated to those -metadata. - -##### API endpoints concerned - -POST /1// Create a first atom deposit entry -PUT /1///metadata/ Replace existing metadata -POST /1///metadata/ Add new metadata to deposit - -##### Sample request - -Sample query: - -``` Shell -curl -i -u hal: --data-binary @atom-entry.xml \ --H 'In-Progress: false' \ --H 'Slug: some-external-id' \ --H 'Content-Type: application/atom+xml;type=entry' \ --XPOST https://deposit.softwareheritage.org/1/hal/ - -HTTP/1.0 201 Created -Date: Tue, 26 Sep 2017 10:32:35 GMT -Server: WSGIServer/0.2 CPython/3.5.3 -Vary: Accept, Cookie -Allow: GET, POST, PUT, DELETE, HEAD, OPTIONS -Location: /1/hal/10/metadata/ -X-Frame-Options: SAMEORIGIN -Content-Type: application/xml - - - 10 - Sept. 26, 2017, 10:32 a.m. - None - deposited - - - - - - - - - - - http://purl.org/net/sword/package/SimpleZip - -``` - -Sample body: - -``` XML - - Title - urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a - 2005-10-07T17:17:08Z - Contributor - The abstract - - - The abstract - Access Rights - Alternative Title - Date Available - Bibliographic Citation # noqa - Contributor - Description - Has Part - Has Version - Identifier - Is Part Of - Publisher - References - Rights Holder - Source - Title - Type - - -``` - -#### One request deposit / Multipart deposit - -The one request deposit is a single request containing both the -metadata (as atom entry attachment) and the archive (as payload -attachment). Thus, it is a multipart deposit. - -Client provides: -- Content-Disposition (text): header of type 'attachment' on the Entry - Part with a name parameter set to 'atom' -- Content-Disposition (text): header of type 'attachment' on the Media - Part with a name parameter set to payload and a filename parameter - (the filename will be expressed in ASCII). -- Content-MD5 (text): md5 checksum hex encoded of the tarball -- Packaging (text): http://purl.org/net/sword/package/SimpleZip - (packaging format used on the Media Part) -- In-Progress (bool): true|false; true means `partial` upload and we can expect - other requests in the future, false means the deposit is done. -- add metadata formats or foreign markup to the atom:entry element - -##### API endpoints concerned - -POST /1// Create a full deposit (metadata + archive) -PUT /1///metadata/ Replace existing metadata and archive -POST /1///metadata/ Add new metadata and archive to deposit - -##### Sample request - -Sample query: - -``` Shell -curl -i -u hal: \ - -F "file=@../deposit.json;type=application/zip;filename=payload" \ - -F "atom=@../atom-entry.xml;type=application/atom+xml;charset=UTF-8" \ - -H 'In-Progress: false' \ - -H 'Slug: some-external-id' \ - -XPOST https://deposit.softwareheritage.org/1/hal/ - -HTTP/1.0 201 Created -Date: Tue, 26 Sep 2017 10:11:55 GMT -Server: WSGIServer/0.2 CPython/3.5.3 -Vary: Accept, Cookie -Allow: GET, POST, PUT, DELETE, HEAD, OPTIONS -Location: /1/hal/9/metadata/ -X-Frame-Options: SAMEORIGIN -Content-Type: application/xml - - - 9 - Sept. 26, 2017, 10:11 a.m. - payload - deposited - - - - - - - - - - - http://purl.org/net/sword/package/SimpleZip - -``` - -Sample content: - -``` XML -POST deposit HTTP/1.1 -Host: deposit.softwareheritage.org -Content-Length: [content length] -Content-Type: multipart/related; - boundary="===============1605871705=="; - type="application/atom+xml" -In-Progress: false -MIME-Version: 1.0 - -Media Post ---===============1605871705== -Content-Type: application/atom+xml; charset="utf-8" -Content-Disposition: attachment; name="atom" -MIME-Version: 1.0 - - - - Title - hal-or-other-archive-id - 2005-10-07T17:17:08Z - Contributor - - - The abstract - Access Rights - Alternative Title - Date Available - Bibliographic Citation # noqa - Contributor - Description - Has Part - Has Version - Identifier - Is Part Of - Publisher - References - Rights Holder - Source - Title - Type - ---===============1605871705== -Content-Type: application/zip -Content-Disposition: attachment; name=payload; filename=[filename] -Packaging: http://purl.org/net/sword/package/SimpleZip -Content-MD5: [md5-digest] -MIME-Version: 1.0 - -[...binary package data...] ---===============1605871705==-- -``` - -## Deposit Creation - server point of view - -The server receives the request(s) and does minimal checking on the -input prior to any saving operations. - -### [3|5|6.1] Validation of the header and body request - -Any kind of errors can happen, here is the list depending on the -situation: - -- common errors: - - 401 (unauthenticated) if a client does not provide credential or - provide wrong ones - - 403 (forbidden) if a client tries access to a collection it does - not own - - 404 (not found) if a client tries access to an unknown collection - - 404 (not found) if a client tries access to an unknown deposit - - 415 (unsupported media type) if a wrong media type is - provided to the endpoint - -- archive/binary deposit: - - 403 (forbidden) if the length of the archive exceeds the - max size configured - - 412 (precondition failed) if the length or hash provided - mismatch the reality of the archive. - - 415 (unsupported media type) if a wrong media type is - provided - -- multipart deposit: - - 412 (precondition failed) if the md5 hash provided mismatch the - reality of the archive - - 415 (unsupported media type) if a wrong media type is - provided - -- Atom entry deposit: - - 400 (bad request) if the request's body is empty (for creation only) - -### [3|5|6.2] Server uploads the content in a temporary location - -Using an objstorage, the server stores the archive in a temporary -location. It's deemed temporary the time the deposit is completed -(status becomes `deposited`) and the loading finishes. - -The server also persists requests' information in a database. - -### [4] Servers answers the client - -If everything went well, the server answers either with a 200, 201 or -204 response (depending on the actual endpoint) - -A `http 200` response is returned for GET endpoints. - -A `http 201 Created` response is returned for POST endpoints. The -body holds the deposit receipt. The headers holds the EDIT-IRI in the -Location header of the response. - -A `http 204 No Content` response is returned for PUT, DELETE -endpoints. - -If something went wrong, the server answers with one of the -[error status code and associated message mentioned](#possible errors)). - - -### [5] Deposit Update - -The client previously deposited a `partial` document (through an -archive, metadata, or both). The client wants to update information -for that previous deposit (possibly in multiple steps as well). - -The important thing to note here is that, as long as the deposit is in -status `partial`, the loading did not start. Thus, the client can -update information (replace or add new archive, new metadata, even -delete) for that same `partial` deposit. - -When the deposit status changes to `deposited`, the client can -no longer change the deposit's information (a 403 will be returned in -that case). - -Then aggregation of all those deposit's information will later be used -for the actual loading. - -Providing the collection name, and the identifier of the previous -deposit id received from the deposit receipt, the client executes a -POST or PUT request on the *update iris*. - -After validation of the body request, the server: -- uploads such content in a temporary location - -- answers the client an `http 204 (No content)`. In the Location - header of the response lies an iri to permit further update. - -- Asynchronously, the server will inject the archive uploaded and the - associated metadata. An operation status endpoint *state iri* - permits the client to query the loading operation status. - -#### Possible update endpoints - -PUT /1///media/ Replace existing archives for the deposit -POST /1///media/ Add new archives to the deposit -PUT /1///metadata/ Replace existing metadata (and possible archives) -POST /1///metadata/ Add new metadata - -### [6] Deposit Removal - -As long as the deposit's status remains `partial`, it's possible to -remove the deposit entirely or remove only the deposit's archive(s). - -If the deposit has been removed, further querying that deposit will -return a *404* response. - -If the deposit's archive(s) has been removed, we can still ensue other -query to update that deposit. - -### Operation Status - -Providing a collection name and a deposit id, the client asks the -operation status of a prior deposit. - -URL: GET /1///status/ - -This returns: -- *201* response with the actual status -- *404* if the deposit does not exist (or no longer does) - -## Possible errors - -### sword:ErrorContent - -IRI: `http://purl.org/net/sword/error/ErrorContent` - -The supplied format is not the same as that identified in the -Packaging header and/or that supported by the server Associated HTTP - -Associated HTTP status: *415 (Unsupported Media Type)* - -### sword:ErrorChecksumMismatch - -IRI: `http://purl.org/net/sword/error/ErrorChecksumMismatch` - -Checksum sent does not match the calculated checksum. - -Associated HTTP status: *412 Precondition Failed* - -### sword:ErrorBadRequest - -IRI: `http://purl.org/net/sword/error/ErrorBadRequest` - -Some parameters sent with the POST/PUT were not understood. - -Associated HTTP status: *400 Bad Request* - -### sword:MediationNotAllowed - -IRI: `http://purl.org/net/sword/error/MediationNotAllowed` - -Used where a client has attempted a mediated deposit, but this is not -supported by the server. - -Associated HTTP status: *412 Precondition Failed* - -### sword:MethodNotAllowed - -IRI: `http://purl.org/net/sword/error/MethodNotAllowed` - -Used when the client has attempted one of the HTTP update verbs (POST, -PUT, DELETE) but the server has decided not to respond to such -requests on the specified resource at that time. - -Associated HTTP Status: *405 Method Not Allowed* - -### sword:MaxUploadSizeExceeded - -IRI: `http://purl.org/net/sword/error/MaxUploadSizeExceeded` - -Used when the client has attempted to supply to the server a file -which exceeds the server's maximum upload size limit - -Associated HTTP Status: *413 (Request Entity Too Large)* - -### sword:Unauthorized - -IRI: `http://purl.org/net/sword/error/ErrorUnauthorized` - -The access to the api is through authentication. - -Associated HTTP status: *401* - -### sword:Forbidden - -IRI: `http://purl.org/net/sword/error/ErrorForbidden` - -The action is forbidden (access to another collection for example). - -Associated HTTP status: *403* - -## Nomenclature - -SWORD uses IRI notion, Internationalized Resource Identifier. In this -chapter, we will describe SWH's IRIs. - -### SD-IRI - The Service Document IRI - -The Service Document IRI. This is the IRI from which the client can -discover its collection IRI. - -HTTP verbs supported: *GET* - -### Col-IRI - The Collection IRI - -The software collection associated to one user. - -The SWORD Collection IRI is the IRI to which the initial deposit will -take place, and which is listed in the Service Document. - -Following our previous example, this is: -https://deposit.softwareheritage.org/1/hal/. - -HTTP verbs supported: *POST* - -### Cont-IRI - The Content IRI - -This is the endpoint which permits the client to retrieve -representations of the object as it resides in the SWORD server. - -This will display information about the content and its associated -metadata. - -HTTP verbs supported: *GET* - -*Note:* We also refer to it as *Cont-File-IRI*. - -### EM-IRI - The Atom Edit Media IRI - -This is the endpoint to upload other related archives for the same -deposit. - -It is used to change a `partial` deposit in regards of archives, in -particular: -- replace existing archives with new ones -- add new archives -- delete archives from a deposit - -Example use case: -A first archive to put exceeds the deposit's limit size. -The client can thus split the archives in multiple ones. -Post a first `partial` archive to the Col-IRI (with In-Progress: - -True). Then, in order to complete the deposit, POST the other -remaining archives to the EM-IRI (the last one with the In-Progress -header to False). - -HTTP verbs supported: *POST*, *PUT*, *DELETE* - -### Edit-IRI - The Atom Entry Edit IRI - -This is the endpoint to change a `partial` deposit in regards of -metadata. In particular: -- replace existing metadata (and archives) with new ones -- add new metadata (and archives) -- delete deposit - -HTTP verbs supported: *POST*, *PUT*, *DELETE* - -*Note:* We also refer to it as *Edit-SE-IRI*. - -### SE-IRI - The SWORD Edit IRI - -The sword specification permits to merge this with EDIT-IRI, so we -did. - -*Note:* We also refer to it as *Edit-SE-IRI*. - -### State-IRI - The SWORD Statement IRI - -This is the IRI which can be used to retrieve a description of the -object from the sword server, including the structure of the object -and its state. This will be used as the operation status endpoint. - -HTTP verbs supported: *GET* - -## Sources - -- [SWORD v2 specification](http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html) -- [arxiv documentation](https://arxiv.org/help/submit_sword) -- [Dataverse example](http://guides.dataverse.org/en/4.3/api/sword.html) -- [SWORD used on HAL](https://api.archives-ouvertes.fr/docs/sword) -- [xml examples for CCSD](https://github.com/CCSDForge/HAL/tree/master/Sword) diff --git a/docs/spec-api.rst b/docs/spec-api.rst new file mode 100644 index 00000000..07c84649 --- /dev/null +++ b/docs/spec-api.rst @@ -0,0 +1,750 @@ +API Specification +================= + +This is `Software Heritage `__'s +`SWORD +2.0 `__ +Server implementation. + +**S.W.O.R.D** (**S**\ imple **W**\ eb-Service **O**\ ffering +**R**\ epository **D**\ eposit) is an interoperability standard for +digital file deposit. + +This implementation will permit interaction between a client (a repository) and +a server (SWH repository) to push deposits of software source code archives +with associated metadata. + +*Note:* + +* In the following document, we will use the ``archive`` or ``software source + code archive`` interchangeably. +* The supported archive formats are: + + * zip: common zip archive (no multi-disk zip files). + * tar: tar archive without compression or optionally any of the following + compression algorithm gzip (.tar.gz, .tgz), bzip2 (.tar.bz2) , or lzma + (.tar.lzma) + +Collection +---------- + +SWORD defines a ``collection`` concept. In SWH's case, this collection +refers to a group of deposits. A ``deposit`` is some form of software +source code archive(s) associated with metadata. +By default the client's collection will have the client's name. + +Limitations +----------- +* upload limitation of 100Mib +* no mediation + +API overview +------------ + +API access is over HTTPS. + +The API is protected through basic authentication. + +The API endpoints are rooted at https://deposit.softwareheritage.org/1/. + +Data is sent and received as XML (as specified in the SWORD 2.0 +specification). + +Endpoints +--------- + +* ``/1/servicedocument/`` *service document iri* (a.k.a `SD-IRI + <#sd-iri-the-service-document-iri>`__) + + *Goal:* For a client to discover its collection's location + +* ``/1//`` *collection iri* (a.k.a `COL-IRI + <#col-iri-the-collection-iri>`__) + + *Goal:*: create deposit to a collection + +* ``/1///media/`` *update iri* (a.k.a + `EM-IRI <#em-iri-the-atom-edit-media-iri>`__) + + *Goal:*: Add or replace archive(s) to a deposit + +* ``/1///metadata/`` *update iri* (a.k.a `EDIT-IRI + <#edit-iri-the-atom-entry-edit-iri>`__ merged with `SE-IRI + <#se-iri-the-sword-edit-iri>`__) + + *Goal:*: Add or replace metadata (and optionally archive(s) to a deposit + +* ``/1///status/`` *state iri* (a.k.a `STATE-IRI + <#state-iri-the-sword-statement-iri>`__) + + *Goal:*: Display deposit's status in regards to loading + +* ``/1///content/`` *content iri* (a.k.a + `CONT-FILE-IRI <#cont-iri-the-content-iri>`__) + + *Goal:*: Display information on the content's representation in the sword + server + + +Service document request +~~~~~~~~~~~~~~~~~~~~~~~~ + +Endpoint: GET /1/servicedocument/ + +This is the starting endpoint for the client to discover its initial +collection. The answer to this query will describes: + +* the server's abilities +* connected client's collection information + + Also known as: `SD-IRI - The Service Document IRI + <#sd-iri-the-service-document-iri>`__. + +Sample request +^^^^^^^^^^^^^^ + +.. code:: shell + + GET https://deposit.softwareheritage.org/1/servicedocument/ HTTP/1.1 + Host: deposit.softwareheritage.org + +The server returns its abilities with the service document in xml format: + +* protocol sword version v2 +* accepted mime types: application/zip (zip), application/x-tar (tar archive + with any of the following optional compression algorithm gzip, bzip2, or + lzma) +* upload max size accepted. Beyond that point, it's expected the client splits + its tarball into multiple ones +* the collection the client can act upon (swh supports only one software + collection per client) +* mediation is not supported + +The current answer for example for the `HAL archive +`__ is: + +.. code:: xml + + + + + 2.0 + 20971520 + + + The Software Heritage (SWH) archive + + SWH Software Archive + application/zip + application/x-tar + Collection Policy + Software Heritage Archive + false + false + Collect, Preserve, Share + http://purl.org/net/sword/package/SimpleZip + https://deposit.softwareheritage.org/1/hal/ + + + + +Deposit creation/update +~~~~~~~~~~~~~~~~~~~~~~~ + +The client can send deposit creation/update through a series of deposit +requests to the following endpoints: + +* *collection iri* (COL-IRI) to initialize a deposit +* *update iris* (EM-IRI, EDIT-SE-IRI) to complete/finalize a deposit + +The deposit creation/update can also happens in one request. + +The deposit request can contain: + +* an archive holding the software source code (binary upload) +* an envelop with metadata describing information regarding a deposit (atom + entry deposit) +* or both (multipart deposit, exactly one archive and one envelop). + +Request Types +^^^^^^^^^^^^^ + +Binary deposit +'''''''''''''' + +The client can deposit a binary archive, supplying the following +headers: + +* Content-Type (text): accepted mimetype +* Content-Length (int): tarball size +* Content-MD5 (text): md5 checksum hex encoded of the tarball +* Content-Disposition (text): attachment; filename=[filename] ; the filename + parameter must be text (ascii) +* Packaging (IRI): http://purl.org/net/sword/package/SimpleZip +* In-Progress (bool): true to specify it's not the last request, false to + specify it's a final request and the server can go on with processing the + request's information (if not provided, this is considered false, so final). + +This is a single zip archive deposit. Almost no metadata is associated +with the archive except for the unique external identifier. + +*Note:* This kind of deposit should be ``partial`` (In-Progress: True) +as almost no metadata can be associated with the uploaded archive. + +API endpoints concerned +''''''''''''''''''''''' + +POST /1// Create a first deposit with one archive PUT /1///media/ +Replace existing archives POST /1///media/ Add new archive + +Sample request +'''''''''''''' + +.. code:: shell + + curl -i -u hal: \ + --data-binary @swh/deposit.zip \ + -H 'In-Progress: false' -H 'Content-MD5: 0faa1ecbf9224b9bf48a7c691b8c2b6f' \ + -H 'Content-Disposition: attachment; filename=[deposit.zip]' \ + -H 'Slug: some-external-id' \ + -H 'Packaging: http://purl.org/net/sword/package/SimpleZIP' \ + -H 'Content-type: application/zip' \ + -XPOST https://deposit.softwareheritage.org/1/hal/ + +Atom entry deposit +^^^^^^^^^^^^^^^^^^ + +The client can deposit an xml body holding metadata information on the +deposit. + +*Note:* This kind of deposit is mostly expected to be ``partial`` +(In-Progress: True) since no archive will be associated to those +metadata. + +API endpoints concerned +''''''''''''''''''''''' + +POST /1// Create a first atom deposit entry PUT /1///metadata/ Replace +existing metadata POST /1///metadata/ Add new metadata to deposit + +Sample request +'''''''''''''' + +Sample query: + +.. code:: shell + + curl -i -u hal: --data-binary @atom-entry.xml \ + -H 'In-Progress: false' \ + -H 'Slug: some-external-id' \ + -H 'Content-Type: application/atom+xml;type=entry' \ + -XPOST https://deposit.softwareheritage.org/1/hal/ + + HTTP/1.0 201 Created + Date: Tue, 26 Sep 2017 10:32:35 GMT + Server: WSGIServer/0.2 CPython/3.5.3 + Vary: Accept, Cookie + Allow: GET, POST, PUT, DELETE, HEAD, OPTIONS + Location: /1/hal/10/metadata/ + X-Frame-Options: SAMEORIGIN + Content-Type: application/xml + + + 10 + Sept. 26, 2017, 10:32 a.m. + None + deposited + + + + + + + + + + + http://purl.org/net/sword/package/SimpleZip + + +Sample body: + +.. code:: xml + + + Title + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 2005-10-07T17:17:08Z + Contributor + The abstract + + + The abstract + Access Rights + Alternative Title + Date Available + Bibliographic Citation # noqa + Contributor + Description + Has Part + Has Version + Identifier + Is Part Of + Publisher + References + Rights Holder + Source + Title + Type + + + +One request deposit / Multipart deposit +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The one request deposit is a single request containing both the metadata +(as atom entry attachment) and the archive (as payload attachment). +Thus, it is a multipart deposit. + +Client provides: + +* Content-Disposition (text): header of type 'attachment' on the Entry Part + with a name parameter set to 'atom' +* Content-Disposition (text): header of type 'attachment' on the Media Part + with a name parameter set to payload and a filename parameter (the filename + will be expressed in ASCII). +* Content-MD5 (text): md5 checksum hex encoded of the tarball +* Packaging (text): http://purl.org/net/sword/package/SimpleZip (packaging + format used on the Media Part) +* In-Progress (bool): true\|false; true means ``partial`` upload and we can + expect other requests in the future, false means the deposit is done. +* add metadata formats or foreign markup to the atom:entry element + +API endpoints concerned +''''''''''''''''''''''' + +POST /1// Create a full deposit (metadata + archive) PUT /1///metadata/ +Replace existing metadata and archive POST /1///metadata/ Add new +metadata and archive to deposit + +Sample request +'''''''''''''' + +Sample query: + +.. code:: shell + + curl -i -u hal: \ + -F "file=@../deposit.json;type=application/zip;filename=payload" \ + -F "atom=@../atom-entry.xml;type=application/atom+xml;charset=UTF-8" \ + -H 'In-Progress: false' \ + -H 'Slug: some-external-id' \ + -XPOST https://deposit.softwareheritage.org/1/hal/ + + HTTP/1.0 201 Created + Date: Tue, 26 Sep 2017 10:11:55 GMT + Server: WSGIServer/0.2 CPython/3.5.3 + Vary: Accept, Cookie + Allow: GET, POST, PUT, DELETE, HEAD, OPTIONS + Location: /1/hal/9/metadata/ + X-Frame-Options: SAMEORIGIN + Content-Type: application/xml + + + 9 + Sept. 26, 2017, 10:11 a.m. + payload + deposited + + + + + + + + + + + http://purl.org/net/sword/package/SimpleZip + + +Sample content: + +.. code:: xml + + POST deposit HTTP/1.1 + Host: deposit.softwareheritage.org + Content-Length: [content length] + Content-Type: multipart/related; + boundary="===============1605871705=="; + type="application/atom+xml" + In-Progress: false + MIME-Version: 1.0 + + Media Post + --===============1605871705== + Content-Type: application/atom+xml; charset="utf-8" + Content-Disposition: attachment; name="atom" + MIME-Version: 1.0 + + + + Title + hal-or-other-archive-id + 2005-10-07T17:17:08Z + Contributor + + + The abstract + Access Rights + Alternative Title + Date Available + Bibliographic Citation # noqa + Contributor + Description + Has Part + Has Version + Identifier + Is Part Of + Publisher + References + Rights Holder + Source + Title + Type + + --===============1605871705== + Content-Type: application/zip + Content-Disposition: attachment; name=payload; filename=[filename] + Packaging: http://purl.org/net/sword/package/SimpleZip + Content-MD5: [md5-digest] + MIME-Version: 1.0 + + [...binary package data...] + --===============1605871705==-- + +Deposit Creation - server point of view +--------------------------------------- + +The server receives the request(s) and does minimal checking on the +input prior to any saving operations. + +Validation of the header and body request +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Any kind of errors can happen, here is the list depending on the +situation: + +* common errors: + + * 401 (unauthenticated) if a client does not provide credential or provide + wrong ones + * 403 (forbidden) if a client tries access to a collection it does not own + * 404 (not found) if a client tries access to an unknown collection + * 404 (not found) if a client tries access to an unknown deposit + * 415 (unsupported media type) if a wrong media type is provided to the + endpoint + +* archive/binary deposit: + + * 403 (forbidden) if the length of the archive exceeds the max size + configured + * 412 (precondition failed) if the length or hash provided mismatch the + reality of the archive. + * 415 (unsupported media type) if a wrong media type is provided + +* multipart deposit: + + * 412 (precondition failed) if the md5 hash provided mismatch the reality of + the archive + * 415 (unsupported media type) if a wrong media type is provided + +* Atom entry deposit: + + * 400 (bad request) if the request's body is empty (for creation only) + +[3\|5\|6.2] Server uploads the content in a temporary location +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Using an objstorage, the server stores the archive in a temporary +location. It's deemed temporary the time the deposit is completed +(status becomes ``deposited``) and the loading finishes. + +The server also persists requests' information in a database. + +[4] Servers answers the client +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If everything went well, the server answers either with a 200, 201 or +204 response (depending on the actual endpoint) + +A ``http 200`` response is returned for GET endpoints. + +A ``http 201 Created`` response is returned for POST endpoints. The body +holds the deposit receipt. The headers holds the EDIT-IRI in the +Location header of the response. + +A ``http 204 No Content`` response is returned for PUT, DELETE +endpoints. + +If something went wrong, the server answers with one of the `error +status code and associated message mentioned <#possible%20errors>`__). + +[5] Deposit Update +~~~~~~~~~~~~~~~~~~ + +The client previously deposited a ``partial`` document (through an +archive, metadata, or both). The client wants to update information for +that previous deposit (possibly in multiple steps as well). + +The important thing to note here is that, as long as the deposit is in +status ``partial``, the loading did not start. Thus, the client can +update information (replace or add new archive, new metadata, even +delete) for that same ``partial`` deposit. + +When the deposit status changes to ``deposited``, the client can no +longer change the deposit's information (a 403 will be returned in that +case). + +Then aggregation of all those deposit's information will later be used +for the actual loading. + +Providing the collection name, and the identifier of the previous +deposit id received from the deposit receipt, the client executes a POST +or PUT request on the *update iris*. + +After validation of the body request, the server: + +- uploads such content in a temporary location + +- answers the client an ``http 204 (No content)``. In the Location header of + the response lies an iri to permit further update. + +- Asynchronously, the server will inject the archive uploaded and the + associated metadata. An operation status endpoint *state iri* permits the + client to query the loading operation status. + +Possible update endpoints +^^^^^^^^^^^^^^^^^^^^^^^^^ + +PUT /1///media/ Replace existing archives for the deposit POST +/1///media/ Add new archives to the deposit PUT /1///metadata/ Replace +existing metadata (and possible archives) POST /1///metadata/ Add new +metadata + +[6] Deposit Removal +~~~~~~~~~~~~~~~~~~~ + +As long as the deposit's status remains ``partial``, it's possible to +remove the deposit entirely or remove only the deposit's archive(s). + +If the deposit has been removed, further querying that deposit will +return a *404* response. + +If the deposit's archive(s) has been removed, we can still ensue other +query to update that deposit. + +Operation Status +~~~~~~~~~~~~~~~~ + +Providing a collection name and a deposit id, the client asks the +operation status of a prior deposit. + +URL: GET /1///status/ + +This returns: + +* *201* response with the actual status +* *404* if the deposit does not exist (or no longer does) + + Possible errors +---------------- + +sword:ErrorContent +~~~~~~~~~~~~~~~~~~ + +IRI: ``http://purl.org/net/sword/error/ErrorContent`` + +The supplied format is not the same as that identified in the Packaging +header and/or that supported by the server Associated HTTP + +Associated HTTP status: *415 (Unsupported Media Type)* + +sword:ErrorChecksumMismatch +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +IRI: ``http://purl.org/net/sword/error/ErrorChecksumMismatch`` + +Checksum sent does not match the calculated checksum. + +Associated HTTP status: *412 Precondition Failed* + +sword:ErrorBadRequest +~~~~~~~~~~~~~~~~~~~~~ + +IRI: ``http://purl.org/net/sword/error/ErrorBadRequest`` + +Some parameters sent with the POST/PUT were not understood. + +Associated HTTP status: *400 Bad Request* + +sword:MediationNotAllowed +~~~~~~~~~~~~~~~~~~~~~~~~~ + +IRI: ``http://purl.org/net/sword/error/MediationNotAllowed`` + +Used where a client has attempted a mediated deposit, but this is not +supported by the server. + +Associated HTTP status: *412 Precondition Failed* + +sword:MethodNotAllowed +~~~~~~~~~~~~~~~~~~~~~~ + +IRI: ``http://purl.org/net/sword/error/MethodNotAllowed`` + +Used when the client has attempted one of the HTTP update verbs (POST, +PUT, DELETE) but the server has decided not to respond to such requests +on the specified resource at that time. + +Associated HTTP Status: *405 Method Not Allowed* + +sword:MaxUploadSizeExceeded +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +IRI: ``http://purl.org/net/sword/error/MaxUploadSizeExceeded`` + +Used when the client has attempted to supply to the server a file which +exceeds the server's maximum upload size limit + +Associated HTTP Status: *413 (Request Entity Too Large)* + +sword:Unauthorized +~~~~~~~~~~~~~~~~~~ + +IRI: ``http://purl.org/net/sword/error/ErrorUnauthorized`` + +The access to the api is through authentication. + +Associated HTTP status: *401* + +sword:Forbidden +~~~~~~~~~~~~~~~ + +IRI: ``http://purl.org/net/sword/error/ErrorForbidden`` + +The action is forbidden (access to another collection for example). + +Associated HTTP status: *403* + +Nomenclature +------------ + +SWORD uses IRI notion, Internationalized Resource Identifier. In this +chapter, we will describe SWH's IRIs. + +SD-IRI - The Service Document IRI +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The Service Document IRI. This is the IRI from which the client can +discover its collection IRI. + +HTTP verbs supported: *GET* + +Col-IRI - The Collection IRI +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The software collection associated to one user. + +The SWORD Collection IRI is the IRI to which the initial deposit will +take place, and which is listed in the Service Document. + +Following our previous example, this is: +https://deposit.softwareheritage.org/1/hal/. + +HTTP verbs supported: *POST* + +Cont-IRI - The Content IRI +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is the endpoint which permits the client to retrieve +representations of the object as it resides in the SWORD server. + +This will display information about the content and its associated +metadata. + +HTTP verbs supported: *GET* + +*Note:* We also refer to it as *Cont-File-IRI*. + +EM-IRI - The Atom Edit Media IRI +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is the endpoint to upload other related archives for the same +deposit. + +It is used to change a ``partial`` deposit in regards of archives, in +particular: + +* replace existing archives with new ones +* add new archives +* delete archives from a deposit + +Example use case: A first archive to put exceeds the deposit's limit +size. The client can thus split the archives in multiple ones. Post a +first ``partial`` archive to the Col-IRI (with In-Progress: + +True). Then, in order to complete the deposit, POST the other remaining +archives to the EM-IRI (the last one with the In-Progress header to +False). + +HTTP verbs supported: *POST*, *PUT*, *DELETE* + +Edit-IRI - The Atom Entry Edit IRI +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is the endpoint to change a ``partial`` deposit in regards of +metadata. In particular: + +* replace existing metadata (and archives) with new ones +* add new metadata (and archives) +* delete deposit + +HTTP verbs supported: *POST*, *PUT*, *DELETE* + +*Note:* We also refer to it as *Edit-SE-IRI*. + +SE-IRI - The SWORD Edit IRI +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The sword specification permits to merge this with EDIT-IRI, so we did. + +*Note:* We also refer to it as *Edit-SE-IRI*. + +State-IRI - The SWORD Statement IRI +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is the IRI which can be used to retrieve a description of the +object from the sword server, including the structure of the object and +its state. This will be used as the operation status endpoint. + +HTTP verbs supported: *GET* + +Sources +------- + +* `SWORD v2 specification + `__ +* `arxiv documentation `__ +* `Dataverse example `__ +* `SWORD used on HAL `__ +* `xml examples for CCSD `__ diff --git a/docs/spec-loading.md b/docs/spec-loading.md deleted file mode 100644 index ef8780eb..00000000 --- a/docs/spec-loading.md +++ /dev/null @@ -1,221 +0,0 @@ -# Loading specification (draft) - -This part discusses the deposit loading part on the server side. - -## Tarball Loading - -The `swh-loader-tar` module is already able to inject tarballs in swh -with very limited metadata (mainly the origin). - -The loading of the deposit will use the deposit's associated data: -- the metadata -- the archive(s) - -We will use the `synthetic` revision notion. - -To that revision will be associated the metadata. Those will be -included in the hash computation, thus resulting in a unique -identifier. - -### Loading mapping - -Some of those metadata will also be included in the `origin_metadata` -table. - -``` -origin | https://hal.inria.fr/hal-id | -------------------------------------|----------------------------------------| -origin_visit | 1 :reception_date | -origin_metadata | aggregated metadata | -occurrence & occurrence_history | branch: client's version n° (e.g hal) | -revision | synthetic_revision (tarball) | -directory | upper level of the uncompressed archive| -``` - -### Questions raised concerning loading - -- A deposit has one origin, yet an origin can have multiple deposits? - -No, an origin can have multiple requests for the same deposit. -Which should end up in one single deposit (when the client pushes its final -request saying deposit 'done' through the header In-Progress). - -Only update of existing 'partial' deposit is permitted. -Other than that, the deposit 'update' operation. - -To create a new version of a software (already deposited), the client -must prior to this create a new deposit. - - -Illustration First deposit loading: - -HAL's deposit 01535619 = SWH's deposit **01535619-1** - - + 1 origin with url:https://hal.inria.fr/medihal-01535619 - - + 1 synthetic revision - - + 1 directory - -HAL's update on deposit 01535619 = SWH's deposit **01535619-2** - -(*with HAL updates can only be on the metadata and a new version is required -if the content changes) - - + 1 origin with url:https://hal.inria.fr/medihal-01535619 - - + new synthetic revision (with new metadata) - - + same directory - -HAL's deposit 01535619-v2 = SWH's deposit **01535619-v2-1** - - + same origin - - + new revision - - + new directory - - - -## Technical details - -### Requirements - -- one dedicated database to store the deposit's state - swh-deposit - -- one dedicated temporary objstorage to store archives before - loading - -- one client to test the communication with SWORD protocol - -### Deposit reception schema - -- SWORD imposes the use of basic authentication, so we need a way to -authenticate client. Also, a client can access collections: - -**deposit_client** table: - - id (bigint): Client's identifier - - username (str): Client's username - - password (pass): Client's crypted password - - collections ([id]): List of collections the client can access - -- Collections group deposits together: - -**deposit_collection** table: - - id (bigint): Collection's identifier - - name (str): Collection's human readable name - -- A deposit is the main object the repository is all about: - -**deposit** table: - - id (bigint): deposit's identifier - - reception_date (date): First deposit's reception date - - complete_data (date): Date when the deposit is deemed complete and ready for loading - - collection (id): The collection the deposit belongs to - - external id (text): client's internal identifier (e.g hal's id, etc...). - - client_id (id) : Client which did the deposit - - swh_id (str) : swh identifier result once the loading is complete - - status (enum): The deposit's current status - -- As mentioned, a deposit can have a status, whose possible values - are: - -``` text - 'partial', -- the deposit is new or partially received since it - -- can be done in multiple requests - 'expired', -- deposit has been there too long and is now deemed - -- ready to be garbage collected - 'deposited' -- deposit complete, it is ready to be checked to ensure data consistency - 'verified', -- deposit is fully received, checked, and ready for loading - 'loading', -- loading is ongoing on swh's side - 'done', -- loading is successful - 'failed' -- loading is a failure -``` - -A deposit is stateful and can be made in multiple requests: - -**deposit_request** table: - - id (bigint): identifier - - type (id): deposit request's type (possible values: 'archive', 'metadata') - - deposit_id (id): deposit whose request belongs to - - metadata: metadata associated to the request - - date (date): date of the requests - -Information sent along a request are stored in a `deposit_request` -row. - -They can be either of type `metadata` (atom entry, multipart's atom -entry part) or of type `archive` (binary upload, multipart's binary -upload part). - -When the deposit is complete (status `deposited`), those `metadata` -and `archive` deposit requests will be read and aggregated. They will -then be sent as parameters to the loading routine. - -During loading, some of those metadata are kept in the -`origin_metadata` table and some other are stored in the `revision` -table (see [metadata loading](#metadata-loading)). - -The only update actions occurring on the deposit table are in regards -of: -- status changing: - - `partial` -> {`expired`/`deposited`}, - - `deposited` -> {`rejected`/`verified`}, - - `verified` -> `loading` - - `loading` -> {`done`/`failed`} -- `complete_date` when the deposit is finalized (when the status is - changed to `deposited`) -- `swh-id` is populated once we have the loading result - -#### SWH Identifier returned - - The synthetic revision id - - e.g: 47dc6b4636c7f6cba0df83e3d5490bf4334d987e - -### Scheduling loading - -All `archive` and `metadata` deposit requests should be aggregated -before loading. - -The loading should be scheduled via the scheduler's api. - -Only `deposited` deposit are concerned by the loading. - -When the loading is done and successful, the deposit entry is -updated: -- `status` is updated to `done` -- `swh-id` is populated with the resulting hash - (cf. [swh identifier](#swh-identifier-returned)) -- `complete_date` is updated to the loading's finished time - -When the loading is failed, the deposit entry is updated: -- `status` is updated to `failed` -- `swh-id` and `complete_data` remains as is - -*Note:* As a further improvement, we may prefer having a retry policy -with graceful delays for further scheduling. - -### Metadata loading - -- the metadata received with the deposit should be kept in the -`origin_metadata` table before translation as part of the loading -process and an indexation process should be scheduled. - -- provider_id and tool_id are resolved by the prepare_metadata method in the -loader-core - -- the origin_metadata entry is sent to storage by the send_origin_metadata in -the loader-core - - -origin_metadata table: -``` -id bigint PK -origin bigint -discovery_date date -provider_id bigint FK // (from provider table) -tool_id bigint FK // indexer_configuration_id tool used for extraction -metadata jsonb // before translation -``` diff --git a/docs/spec-loading.rst b/docs/spec-loading.rst new file mode 100644 index 00000000..21c7a0f1 --- /dev/null +++ b/docs/spec-loading.rst @@ -0,0 +1,222 @@ +Loading specification (draft) +============================= + +This part discusses the deposit loading part on the server side. + +Tarball Loading +--------------- + +The ``swh-loader-tar`` module is already able to inject tarballs in swh +with very limited metadata (mainly the origin). + +The loading of the deposit will use the deposit's associated data: + +* the metadata +* the archive(s) + +We will use the ``synthetic`` revision notion. + +To that revision will be associated the metadata. Those will be included +in the hash computation, thus resulting in a unique identifier. + +Loading mapping +~~~~~~~~~~~~~~~ + +Some of those metadata will also be included in the ``origin_metadata`` +table. + +:: + + origin | https://hal.inria.fr/hal-id | + ------------------------------------|----------------------------------------| + origin_visit | 1 :reception_date | + origin_metadata | aggregated metadata | + occurrence & occurrence_history | branch: client's version n° (e.g hal) | + revision | synthetic_revision (tarball) | + directory | upper level of the uncompressed archive| + +Questions raised concerning loading +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- A deposit has one origin, yet an origin can have multiple deposits? + +No, an origin can have multiple requests for the same deposit. Which +should end up in one single deposit (when the client pushes its final +request saying deposit 'done' through the header In-Progress). + +Only update of existing 'partial' deposit is permitted. Other than that, +the deposit 'update' operation. + +To create a new version of a software (already deposited), the client +must prior to this create a new deposit. + +Illustration First deposit loading: + +HAL's deposit 01535619 = SWH's deposit **01535619-1** + +:: + + + 1 origin with url:https://hal.inria.fr/medihal-01535619 + + + 1 synthetic revision + + + 1 directory + +HAL's update on deposit 01535619 = SWH's deposit **01535619-2** + +(\*with HAL updates can only be on the metadata and a new version is +required if the content changes) + +:: + + + 1 origin with url:https://hal.inria.fr/medihal-01535619 + + + new synthetic revision (with new metadata) + + + same directory + +HAL's deposit 01535619-v2 = SWH's deposit **01535619-v2-1** + +:: + + + same origin + + + new revision + + + new directory + +Technical details +----------------- + +Requirements +~~~~~~~~~~~~ + +* one dedicated database to store the deposit's state - swh-deposit +* one dedicated temporary objstorage to store archives before loading +* one client to test the communication with SWORD protocol + +Deposit reception schema +~~~~~~~~~~~~~~~~~~~~~~~~ + +* SWORD imposes the use of basic authentication, so we need a way to + authenticate client. Also, a client can access collections: + + **deposit\_client** table: - id (bigint): Client's identifier - username + (str): Client's username - password (pass): Client's crypted password - + collections ([id]): List of collections the client can access + +* Collections group deposits together: + + **deposit\_collection** table: - id (bigint): Collection's identifier - name + (str): Collection's human readable name + +* A deposit is the main object the repository is all about: + + **deposit** table: + + * id (bigint): deposit's identifier + * reception\_date (date): First deposit's reception date + * complete\_data (date): Date when the deposit is deemed complete and ready + for loading + * collection (id): The collection the deposit belongs to + * external id (text): client's internal identifier (e.g hal's id, etc...). + * client\_id (id) : Client which did the deposit + * swh\_id (str) : swh identifier result once the loading is complete + * status (enum): The deposit's current status + +- As mentioned, a deposit can have a status, whose possible values are: + + .. code:: text + + 'partial', -- the deposit is new or partially received since it + -- can be done in multiple requests + 'expired', -- deposit has been there too long and is now deemed + -- ready to be garbage collected + 'deposited' -- deposit complete, it is ready to be checked to ensure data consistency + 'verified', -- deposit is fully received, checked, and ready for loading + 'loading', -- loading is ongoing on swh's side + 'done', -- loading is successful + 'failed' -- loading is a failure + +* A deposit is stateful and can be made in multiple requests: + + **deposit\_request** table: + * id (bigint): identifier + * type (id): deposit request's type (possible values: 'archive', 'metadata') + * deposit\_id (id): deposit whose request belongs to + * metadata: metadata associated to the request + * date (date): date of the requests + + Information sent along a request are stored in a ``deposit_request`` row. + + They can be either of type ``metadata`` (atom entry, multipart's atom entry + part) or of type ``archive`` (binary upload, multipart's binary upload part). + + When the deposit is complete (status ``deposited``), those ``metadata`` and + ``archive`` deposit requests will be read and aggregated. They will then be + sent as parameters to the loading routine. + + During loading, some of those metadata are kept in the ``origin_metadata`` + table and some other are stored in the ``revision`` table (see `metadata + loading <#metadata-loading>`__). + + The only update actions occurring on the deposit table are in regards of: - + status changing: - ``partial`` -> {``expired``/``deposited``}, - + ``deposited`` -> {``rejected``/``verified``}, - ``verified`` -> ``loading`` - + ``loading`` -> {``done``/``failed``} - ``complete_date`` when the deposit is + finalized (when the status is changed to ``deposited``) - ``swh-id`` is + populated once we have the loading result + +SWH Identifier returned +^^^^^^^^^^^^^^^^^^^^^^^ + +:: + + The synthetic revision id + + e.g.: swh:1:rev:47dc6b4636c7f6cba0df83e3d5490bf4334d987e + +Scheduling loading +~~~~~~~~~~~~~~~~~~ + +All ``archive`` and ``metadata`` deposit requests should be aggregated before +loading. + +The loading should be scheduled via the scheduler's api. + +Only ``deposited`` deposit are concerned by the loading. + +When the loading is done and successful, the deposit entry is updated: - +``status`` is updated to ``done`` - ``swh-id`` is populated with the resulting +hash (cf. `swh identifier <#swh-identifier-returned>`__) - ``complete_date`` is +updated to the loading's finished time + +When the loading is failed, the deposit entry is updated: - ``status`` is +updated to ``failed`` - ``swh-id`` and ``complete_data`` remains as is + +*Note:* As a further improvement, we may prefer having a retry policy with +graceful delays for further scheduling. + +Metadata loading +~~~~~~~~~~~~~~~~ + +- the metadata received with the deposit should be kept in the + ``origin_metadata`` table before translation as part of the loading process + and an indexation process should be scheduled. + +- provider\_id and tool\_id are resolved by the prepare\_metadata method in the + loader-core + +- the origin\_metadata entry is sent to storage by the send\_origin\_metadata + in the loader-core + +origin\_metadata table: + +:: + + id bigint PK + origin bigint + discovery_date date + provider_id bigint FK // (from provider table) + tool_id bigint FK // indexer_configuration_id tool used for extraction + metadata jsonb // before translation diff --git a/docs/sys-info.md b/docs/sys-info.md deleted file mode 100644 index 25ab8dca..00000000 --- a/docs/sys-info.md +++ /dev/null @@ -1,47 +0,0 @@ -# Bootstrap swh-deposit on production - -As usual, the debian packaged is created and uploaded to the swh -debian repository. Once the package is installed, we need to do a few -things in regards to the database. - -## Prepare the database setup (existence, connection, etc...). - -This is defined through the packaged `swh.deposit.settings.production` -module and the expected **/etc/softwareheritage/deposit/private.yml**. - -As usual, the expected configuration files are deployed through our -puppet manifest (cf. puppet-environment/swh-site, -puppet-environment/swh-role, puppet-environment/swh-profile) - -## Migrate/bootstrap the db schema - -``` Shell -sudo django-admin migrate --settings=swh.deposit.settings.production -``` - -## Load minimum defaults data - -``` Shell -sudo django-admin loaddata --settings=swh.deposit.settings.production deposit_data -``` - -This adds the minimal: -- deposit request type 'archive' and 'metadata' -- 'hal' collection - -Note: swh.deposit.fixtures.deposit_data is packaged - -## Add client and collection - -``` Shell -python3 -m swh.deposit.create_user --platform production \ - --collection \ - --username \ - --password -``` - -This adds a user `` which can access the collection -``. The password will be used for the authentication -access to the deposit api. - -Note: This creation procedure needs to be improved. diff --git a/docs/sys-info.rst b/docs/sys-info.rst new file mode 100644 index 00000000..582fbc7c --- /dev/null +++ b/docs/sys-info.rst @@ -0,0 +1,51 @@ +Deployment of the swh-deposit +============================= + +As usual, the debian packaged is created and uploaded to the swh debian +repository. Once the package is installed, we need to do a few things in +regards to the database. + +Prepare the database setup (existence, connection, etc...). +----------------------------------------------------------- + +This is defined through the packaged ``swh.deposit.settings.production`` +module and the expected **/etc/softwareheritage/deposit/private.yml**. + +As usual, the expected configuration files are deployed through our +puppet manifest (cf. puppet-environment/swh-site, +puppet-environment/swh-role, puppet-environment/swh-profile) + +Migrate/bootstrap the db schema +------------------------------- + +.. code:: shell + + sudo django-admin migrate --settings=swh.deposit.settings.production + +Load minimum defaults data +-------------------------- + +.. code:: shell + + sudo django-admin loaddata --settings=swh.deposit.settings.production deposit_data + +This adds the minimal: - deposit request type 'archive' and 'metadata' - +'hal' collection + +Note: swh.deposit.fixtures.deposit\_data is packaged + +Add client and collection +------------------------- + +.. code:: shell + + python3 -m swh.deposit.create_user --platform production \ + --collection \ + --username \ + --password + +This adds a user ```` which can access the collection +````. The password will be used for the authentication +access to the deposit api. + +Note: This creation procedure needs to be improved. diff --git a/requirements.txt b/requirements.txt index ac9fbfe5..0d52e480 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ vcversioner click Django djangorestframework djangorestframework-xml +lxml diff --git a/setup.py b/setup.py index 7dabf5d6..cfbbc0ca 100644 --- a/setup.py +++ b/setup.py @@ -1,33 +1,29 @@ from setuptools import setup, find_packages def parse_requirements(): requirements = [] for reqf in ('requirements.txt', 'requirements-swh.txt'): with open(reqf) as f: for line in f.readlines(): line = line.strip() if not line or line.startswith('#'): continue requirements.append(line) return requirements setup( name='swh.deposit', description='Software Heritage Deposit Server', author='Software Heritage developers', author_email='swh-devel@inria.fr', url='https://forge.softwareheritage.org/source/swh-deposit/', packages=find_packages(), - scripts=[], # scripts to package + scripts=['bin/swh-deposit'], # scripts to package install_requires=parse_requirements(), - extras_require={ - 'loader': ['swh.loader.core >= 0.0.25', - 'swh.scheduler >= 0.0.19', - 'requests'], - }, + extras_require={}, setup_requires=['vcversioner'], vcversioner={}, include_package_data=True, ) diff --git a/swh.deposit.egg-info/PKG-INFO b/swh.deposit.egg-info/PKG-INFO index 96b7393e..69b6a6b6 100644 --- a/swh.deposit.egg-info/PKG-INFO +++ b/swh.deposit.egg-info/PKG-INFO @@ -1,11 +1,10 @@ -Metadata-Version: 2.1 +Metadata-Version: 1.0 Name: swh.deposit -Version: 0.0.43 +Version: 0.0.44 Summary: Software Heritage Deposit Server Home-page: https://forge.softwareheritage.org/source/swh-deposit/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Description: UNKNOWN Platform: UNKNOWN -Provides-Extra: loader diff --git a/swh.deposit.egg-info/SOURCES.txt b/swh.deposit.egg-info/SOURCES.txt index b0b158e4..637d96ae 100644 --- a/swh.deposit.egg-info/SOURCES.txt +++ b/swh.deposit.egg-info/SOURCES.txt @@ -1,143 +1,146 @@ .gitignore AUTHORS LICENSE MANIFEST.in Makefile Makefile.local README.md requirements-swh.txt requirements.txt setup.py version.txt bin/Makefile bin/content.sh bin/create_deposit.sh bin/create_deposit_atom.sh bin/create_deposit_with_metadata.sh bin/default-setup bin/download-deposit-archive.sh bin/home.sh bin/replace-deposit-archive.sh bin/service-document.sh bin/status.sh +bin/swh-deposit bin/update-deposit-with-another-archive.sh bin/update-status.sh debian/changelog debian/compat debian/control debian/copyright debian/rules debian/source/format docs/.gitignore docs/Makefile +docs/blueprint.rst docs/conf.py -docs/dev-info.md -docs/getting-started.md +docs/dev-info.rst +docs/getting-started.rst docs/index.rst -docs/metadata.md -docs/spec-api.md -docs/spec-loading.md -docs/sys-info.md +docs/metadata.rst +docs/spec-api.rst +docs/spec-loading.rst +docs/sys-info.rst docs/_static/.placeholder docs/_templates/.placeholder docs/images/deposit-create-chart.png docs/images/deposit-delete-chart.png docs/images/deposit-update-chart.png resources/deposit/server.yml swh/__init__.py swh/manage.py swh.deposit.egg-info/PKG-INFO swh.deposit.egg-info/SOURCES.txt swh.deposit.egg-info/dependency_links.txt swh.deposit.egg-info/requires.txt swh.deposit.egg-info/top_level.txt swh/deposit/__init__.py swh/deposit/apps.py swh/deposit/auth.py swh/deposit/config.py swh/deposit/create_user.py swh/deposit/errors.py swh/deposit/models.py swh/deposit/parsers.py swh/deposit/signals.py swh/deposit/urls.py swh/deposit/wsgi.py swh/deposit/api/__init__.py swh/deposit/api/common.py swh/deposit/api/deposit.py swh/deposit/api/deposit_content.py swh/deposit/api/deposit_status.py swh/deposit/api/deposit_update.py swh/deposit/api/service_document.py swh/deposit/api/urls.py swh/deposit/api/private/__init__.py swh/deposit/api/private/deposit_check.py swh/deposit/api/private/deposit_read.py swh/deposit/api/private/deposit_update_status.py swh/deposit/api/private/urls.py +swh/deposit/client/__init__.py +swh/deposit/client/cli.py swh/deposit/fixtures/__init__.py swh/deposit/fixtures/deposit_data.yaml swh/deposit/loader/__init__.py swh/deposit/loader/checker.py -swh/deposit/loader/client.py swh/deposit/loader/loader.py swh/deposit/loader/scheduler.py swh/deposit/loader/tasks.py swh/deposit/migrations/0001_initial.py swh/deposit/migrations/0002_depositrequest_archive.py swh/deposit/migrations/0003_temporaryarchive.py swh/deposit/migrations/0004_delete_temporaryarchive.py swh/deposit/migrations/0005_auto_20171019_1436.py swh/deposit/migrations/0006_depositclient_url.py swh/deposit/migrations/0007_auto_20171129_1609.py swh/deposit/migrations/0008_auto_20171130_1513.py swh/deposit/migrations/0009_deposit_parent.py swh/deposit/migrations/0010_auto_20180110_0953.py swh/deposit/migrations/0011_auto_20180115_1510.py swh/deposit/migrations/__init__.py swh/deposit/settings/__init__.py swh/deposit/settings/common.py swh/deposit/settings/development.py swh/deposit/settings/production.py swh/deposit/settings/testing.py swh/deposit/static/robots.txt swh/deposit/static/css/bootstrap-responsive.min.css swh/deposit/static/css/style.css swh/deposit/static/img/arrow-up-small.png swh/deposit/static/img/swh-logo-deposit.png swh/deposit/static/img/swh-logo-deposit.svg swh/deposit/static/img/icons/swh-logo-32x32.png swh/deposit/static/img/icons/swh-logo-deposit-180x180.png swh/deposit/static/img/icons/swh-logo-deposit-192x192.png swh/deposit/static/img/icons/swh-logo-deposit-270x270.png swh/deposit/templates/__init__.py swh/deposit/templates/homepage.html swh/deposit/templates/layout.html swh/deposit/templates/deposit/__init__.py swh/deposit/templates/deposit/content.xml swh/deposit/templates/deposit/deposit_receipt.xml swh/deposit/templates/deposit/error.xml swh/deposit/templates/deposit/service_document.xml swh/deposit/templates/deposit/status.xml swh/deposit/templates/rest_framework/api.html swh/deposit/tests/__init__.py swh/deposit/tests/common.py swh/deposit/tests/api/__init__.py swh/deposit/tests/api/test_common.py swh/deposit/tests/api/test_deposit.py swh/deposit/tests/api/test_deposit_atom.py swh/deposit/tests/api/test_deposit_binary.py swh/deposit/tests/api/test_deposit_check.py swh/deposit/tests/api/test_deposit_delete.py swh/deposit/tests/api/test_deposit_multipart.py swh/deposit/tests/api/test_deposit_read_archive.py swh/deposit/tests/api/test_deposit_read_metadata.py swh/deposit/tests/api/test_deposit_status.py swh/deposit/tests/api/test_deposit_update.py swh/deposit/tests/api/test_deposit_update_status.py swh/deposit/tests/api/test_service_document.py swh/deposit/tests/loader/__init__.py swh/deposit/tests/loader/common.py swh/deposit/tests/loader/test_checker.py swh/deposit/tests/loader/test_client.py swh/deposit/tests/loader/test_loader.py \ No newline at end of file diff --git a/swh.deposit.egg-info/requires.txt b/swh.deposit.egg-info/requires.txt index 27956dd7..d7707788 100644 --- a/swh.deposit.egg-info/requires.txt +++ b/swh.deposit.egg-info/requires.txt @@ -1,15 +1,11 @@ Django click djangorestframework djangorestframework-xml +lxml swh.core>=0.0.36 swh.loader.core>=0.0.27 swh.loader.tar>=0.0.32 swh.model>=0.0.21 swh.scheduler>=0.0.19 vcversioner - -[loader] -requests -swh.loader.core>=0.0.25 -swh.scheduler>=0.0.19 diff --git a/swh/deposit/api/deposit_status.py b/swh/deposit/api/deposit_status.py index dbd1a36d..c75ffea6 100644 --- a/swh/deposit/api/deposit_status.py +++ b/swh/deposit/api/deposit_status.py @@ -1,50 +1,51 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.shortcuts import render from rest_framework import status from .common import SWHBaseDeposit from ..errors import NOT_FOUND, make_error_response from ..errors import make_error_response_from_dict from ..models import DEPOSIT_STATUS_DETAIL, Deposit class SWHDepositStatus(SWHBaseDeposit): """Deposit status. What's known as 'State IRI' in the sword specification. HTTP verbs supported: GET """ def get(self, req, collection_name, deposit_id, format=None): checks = self.checks(req, collection_name, deposit_id) if 'error' in checks: return make_error_response_from_dict(req, checks['error']) try: deposit = Deposit.objects.get(pk=deposit_id) if deposit.collection.name != collection_name: raise Deposit.DoesNotExist except Deposit.DoesNotExist: return make_error_response( req, NOT_FOUND, 'deposit %s does not belong to collection %s' % ( deposit_id, collection_name)) context = { 'deposit_id': deposit.id, 'status': deposit.status, 'status_detail': DEPOSIT_STATUS_DETAIL[deposit.status], + 'swh_id': None, } if deposit.swh_id: context['swh_id'] = deposit.swh_id return render(req, 'deposit/status.xml', context=context, content_type='application/xml', status=status.HTTP_200_OK) diff --git a/swh/deposit/api/deposit_update.py b/swh/deposit/api/deposit_update.py index 7c2a210b..4a0d5975 100644 --- a/swh/deposit/api/deposit_update.py +++ b/swh/deposit/api/deposit_update.py @@ -1,158 +1,155 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from rest_framework import status from .common import SWHPostDepositAPI, SWHPutDepositAPI, SWHDeleteDepositAPI from .common import ACCEPT_ARCHIVE_CONTENT_TYPES from ..config import CONT_FILE_IRI, EDIT_SE_IRI, EM_IRI from ..errors import make_error_response, BAD_REQUEST from ..parsers import SWHFileUploadZipParser, SWHFileUploadTarParser from ..parsers import SWHAtomEntryParser from ..parsers import SWHMultiPartParser class SWHUpdateArchiveDeposit(SWHPostDepositAPI, SWHPutDepositAPI, SWHDeleteDepositAPI): """Deposit request class defining api endpoints for sword deposit. What's known as 'EM IRI' in the sword specification. HTTP verbs supported: PUT, POST, DELETE """ parser_classes = (SWHFileUploadZipParser, SWHFileUploadTarParser, ) def process_put(self, req, headers, collection_name, deposit_id): """Replace existing content for the existing deposit. - +header: Metadata-relevant (to extract metadata from the archive) - source: http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html #protocoloperations_editingcontent_binary Returns: 204 No content """ if req.content_type not in ACCEPT_ARCHIVE_CONTENT_TYPES: msg = 'Packaging format supported is restricted to %s' % ( ', '.join(ACCEPT_ARCHIVE_CONTENT_TYPES)) return make_error_response(req, BAD_REQUEST, msg) return self._binary_upload(req, headers, collection_name, deposit_id=deposit_id, replace_archives=True) def process_post(self, req, headers, collection_name, deposit_id): """Add new content to the existing deposit. - +header: Metadata-relevant (to extract metadata from the archive) - source: http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html #protocoloperations_addingcontent_mediaresource Returns: 201 Created Headers: Location: [Cont-File-IRI] Body: [optional Deposit Receipt] """ - if req.content_type != 'application/zip': - return make_error_response(req, BAD_REQUEST, - 'Only application/zip is supported!') + if req.content_type not in ACCEPT_ARCHIVE_CONTENT_TYPES: + msg = 'Packaging format supported is restricted to %s' % ( + ', '.join(ACCEPT_ARCHIVE_CONTENT_TYPES)) + return make_error_response(req, BAD_REQUEST, msg) return (status.HTTP_201_CREATED, CONT_FILE_IRI, self._binary_upload(req, headers, collection_name, deposit_id)) def process_delete(self, req, collection_name, deposit_id): """Delete content (archives) from existing deposit. source: http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html #protocoloperations_deletingcontent Returns: 204 Created """ return self._delete_archives(collection_name, deposit_id) class SWHUpdateMetadataDeposit(SWHPostDepositAPI, SWHPutDepositAPI, SWHDeleteDepositAPI): """Deposit request class defining api endpoints for sword deposit. What's known as 'Edit IRI' (and SE IRI) in the sword specification. HTTP verbs supported: POST (SE IRI), PUT (Edit IRI), DELETE """ parser_classes = (SWHMultiPartParser, SWHAtomEntryParser) def process_put(self, req, headers, collection_name, deposit_id): """Replace existing deposit's metadata/archive with new ones. source: - http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html #protocoloperations_editingcontent_metadata - http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html #protocoloperations_editingcontent_multipart Returns: 204 No content """ if req.content_type.startswith('multipart/'): return self._multipart_upload(req, headers, collection_name, deposit_id=deposit_id, replace_archives=True, replace_metadata=True) return self._atom_entry(req, headers, collection_name, deposit_id=deposit_id, replace_metadata=True) def process_post(self, req, headers, collection_name, deposit_id): """Add new metadata/archive to existing deposit. source: - http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html #protocoloperations_addingcontent_metadata - http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html #protocoloperations_addingcontent_multipart This also deals with an empty post corner case to finalize a deposit. Returns: In optimal case for a multipart and atom-entry update, a 201 Created response. The body response will hold a deposit. And the response headers will contain an entry 'Location' with the EM-IRI. For the empty post case, this returns a 200. """ if req.content_type.startswith('multipart/'): return (status.HTTP_201_CREATED, EM_IRI, self._multipart_upload(req, headers, collection_name, deposit_id=deposit_id)) # check for final empty post # source: http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html # #continueddeposit_complete if headers['content-length'] == 0 and headers['in-progress'] is False: data = self._empty_post(req, headers, collection_name, deposit_id) return (status.HTTP_200_OK, EDIT_SE_IRI, data) return (status.HTTP_201_CREATED, EM_IRI, self._atom_entry(req, headers, collection_name, deposit_id=deposit_id)) def process_delete(self, req, collection_name, deposit_id): """Delete the container (deposit). Source: http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html #protocoloperations_deleteconteiner """ return self._delete_deposit(collection_name, deposit_id) diff --git a/swh/deposit/api/private/deposit_check.py b/swh/deposit/api/private/deposit_check.py index a142069c..9a44ab4c 100644 --- a/swh/deposit/api/private/deposit_check.py +++ b/swh/deposit/api/private/deposit_check.py @@ -1,183 +1,183 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json import patoolib from rest_framework import status from ..common import SWHGetDepositAPI, SWHPrivateAPIView from ...config import DEPOSIT_STATUS_VERIFIED, DEPOSIT_STATUS_REJECTED from ...config import ARCHIVE_TYPE, METADATA_TYPE from ...models import Deposit, DepositRequest class SWHChecksDeposit(SWHGetDepositAPI, SWHPrivateAPIView): """Dedicated class to read a deposit's raw archives content. Only GET is supported. """ def _deposit_requests(self, deposit, request_type): """Given a deposit, yields its associated deposit_request Args: deposit (Deposit): Deposit to list requests for request_type (str): Archive or metadata type Yields: deposit requests of type request_type associated to the deposit """ deposit_requests = DepositRequest.objects.filter( type=self.deposit_request_types[request_type], deposit=deposit).order_by('id') for deposit_request in deposit_requests: yield deposit_request def _check_deposit_archives(self, deposit): """Given a deposit, check each deposit request of type archive. Args: The deposit to check archives for Returns True if all archives are ok, False otherwise. """ requests = list(self._deposit_requests( deposit, request_type=ARCHIVE_TYPE)) if len(requests) == 0: # no associated archive is refused return False for dr in requests: check = self._check_archive(dr.archive.path) if not check: return False return True def _check_archive(self, archive_path): """Check that a given archive is actually ok for reading. Args: archive_path (str): Archive to check Returns: True if archive is successfully read, False otherwise. """ try: patoolib.test_archive(archive_path, verbosity=-1) - except: + except Exception: return False else: return True def _metadata_get(self, deposit): """Given a deposit, aggregate all metadata requests. Args: The deposit to check metadata for. Returns: True if the deposit's associated metadata are ok, False otherwise. """ metadata = {} for dr in self._deposit_requests(deposit, request_type=METADATA_TYPE): metadata.update(dr.metadata) return metadata def _check_metadata(self, metadata): """Check to execute on all metadata for mandatory field presence. Args: metadata (dict): Metadata to actually check Returns: True if metadata is ok, False otherwise. """ required_fields = (('url',), ('external_identifier',), ('name', 'title'), ('author',)) result = all(any(name in field for field in metadata for name in possible_names) for possible_names in required_fields) return result def _check_url(self, client_domain, metadata): """Check compatibility between client_domain and url field in metadata Args: client_domain (str): url associated with the deposit's client metadata (dict): Metadata where to find url Returns: True if url is ok, False otherwise. """ metadata_urls = [] for field in metadata: if 'url' in field: metadata_urls.append(metadata[field]) return any(client_domain in url for url in metadata_urls) def process_get(self, req, collection_name, deposit_id): """Build a unique tarball from the multiple received and stream that content to the client. Args: req (Request): collection_name (str): Collection owning the deposit deposit_id (id): Deposit concerned by the reading Returns: Tuple status, stream of content, content-type """ deposit = Deposit.objects.get(pk=deposit_id) client_domain = deposit.client.domain metadata = self._metadata_get(deposit) problems = [] # will check each deposit's associated request (both of type # archive and metadata) for errors archives_status = self._check_deposit_archives(deposit) if not archives_status: problems.append('archive(s)') metadata_status = self._check_metadata(metadata) if not metadata_status: problems.append('metadata') url_status = self._check_url(client_domain, metadata) if not url_status: problems.append('url') deposit_status = archives_status and metadata_status and url_status # if any problems arose, the deposit is rejected if not deposit_status: deposit.status = DEPOSIT_STATUS_REJECTED response = { 'status': deposit.status, 'details': 'Some %s failed the checks.' % ( ' and '.join(problems), ), } else: deposit.status = DEPOSIT_STATUS_VERIFIED response = { 'status': deposit.status, } deposit.save() return status.HTTP_200_OK, json.dumps(response), 'application/json' diff --git a/swh/deposit/api/service_document.py b/swh/deposit/api/service_document.py index 9ecac083..0b04103a 100644 --- a/swh/deposit/api/service_document.py +++ b/swh/deposit/api/service_document.py @@ -1,29 +1,33 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.shortcuts import render +from django.core.urlresolvers import reverse from .common import SWHBaseDeposit, ACCEPT_PACKAGINGS from .common import ACCEPT_ARCHIVE_CONTENT_TYPES +from ..config import COL_IRI from ..models import DepositClient, DepositCollection class SWHServiceDocument(SWHBaseDeposit): def get(self, req, *args, **kwargs): client = DepositClient.objects.get(username=req.user) - collections = [] + collections = {} + for col_id in client.collections: col = DepositCollection.objects.get(pk=col_id) - collections.append(col) + col_uri = req.build_absolute_uri(reverse(COL_IRI, args=[col.name])) + collections[col.name] = col_uri context = { 'max_upload_size': self.config['max_upload_size'], 'accept_packagings': ACCEPT_PACKAGINGS, 'accept_content_types': ACCEPT_ARCHIVE_CONTENT_TYPES, 'collections': collections, } return render(req, 'deposit/service_document.xml', context, content_type='application/xml') diff --git a/swh/deposit/client/__init__.py b/swh/deposit/client/__init__.py new file mode 100644 index 00000000..363ebd50 --- /dev/null +++ b/swh/deposit/client/__init__.py @@ -0,0 +1,591 @@ +# Copyright (C) 2017-2018 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +"""Module in charge of defining an swh-deposit client + +""" + +import hashlib +import os +import requests + +from abc import ABCMeta, abstractmethod +from lxml import etree + +from swh.core.config import SWHConfig + + +class BaseApiDepositClient(SWHConfig): + """Deposit client base class + + """ + CONFIG_BASE_FILENAME = 'deposit/client' + DEFAULT_CONFIG = { + 'url': ('str', 'http://localhost:5006'), + 'auth': ('dict', {}), # with optional 'username'/'password' keys + } + + def __init__(self, config=None, _client=requests): + super().__init__() + if config is None: + self.config = super().parse_config_file() + else: + self.config = config + + self._client = _client + self.base_url = self.config['url'] + auth = self.config['auth'] + if auth == {}: + self.auth = None + else: + self.auth = (auth['username'], auth['password']) + + def do(self, method, url, *args, **kwargs): + """Internal method to deal with requests, possibly with basic http + authentication. + + Args: + method (str): supported http methods as in self._methods' keys + + Returns: + The request's execution + + """ + if hasattr(self._client, method): + method_fn = getattr(self._client, method) + else: + raise ValueError('Development error, unsupported method %s' % ( + method)) + + if self.auth: + kwargs['auth'] = self.auth + + full_url = '%s%s' % (self.base_url.rstrip('/'), url) + return method_fn(full_url, *args, **kwargs) + + +class ApiDepositClient(BaseApiDepositClient): + """Private API deposit client to: + + - read a given deposit's archive(s) + - read a given deposit's metadata + - update a given deposit's status + + """ + def archive_get(self, archive_update_url, archive_path, log=None): + """Retrieve the archive from the deposit to a local directory. + + Args: + archive_update_url (str): The full deposit archive(s)'s raw content + to retrieve locally + + archive_path (str): the local archive's path where to store + the raw content + + Returns: + The archive path to the local archive to load. + Or None if any problem arose. + + """ + r = self.do('get', archive_update_url, stream=True) + if r.ok: + with open(archive_path, 'wb') as f: + for chunk in r.iter_content(): + f.write(chunk) + + return archive_path + + msg = 'Problem when retrieving deposit archive at %s' % ( + archive_update_url, ) + if log: + log.error(msg) + + raise ValueError(msg) + + def metadata_get(self, metadata_url, log=None): + """Retrieve the metadata information on a given deposit. + + Args: + metadata_url (str): The full deposit metadata url to retrieve + locally + + Returns: + The dictionary of metadata for that deposit or None if any + problem arose. + + """ + r = self.do('get', metadata_url) + if r.ok: + return r.json() + + msg = 'Problem when retrieving metadata at %s' % metadata_url + if log: + log.error(msg) + + raise ValueError(msg) + + def status_update(self, update_status_url, status, + revision_id=None): + """Update the deposit's status. + + Args: + update_status_url (str): the full deposit's archive + status (str): The status to update the deposit with + revision_id (str/None): the revision's identifier to update to + + """ + payload = {'status': status} + if revision_id: + payload['revision_id'] = revision_id + + self.do('put', update_status_url, json=payload) + + def check(self, check_url, log=None): + """Check the deposit's associated data (metadata, archive(s)) + + Args: + check_url (str): the full deposit's check url + + """ + r = self.do('get', check_url) + if r.ok: + data = r.json() + return data['status'] + + msg = 'Problem when checking deposit %s' % check_url + if log: + log.error(msg) + + raise ValueError(msg) + + +class BaseDepositClient(BaseApiDepositClient, metaclass=ABCMeta): + """Base Deposit client to access the public api. + + """ + def __init__(self, config, error_msg=None, empty_result={}): + super().__init__(config) + self.error_msg = error_msg + self.empty_result = empty_result + + @abstractmethod + def compute_url(self, *args, **kwargs): + """Compute api url endpoint to query.""" + pass + + @abstractmethod + def compute_method(self, *args, **kwargs): + """Http method to use on the url""" + pass + + @abstractmethod + def parse_result_ok(self, xml_content): + """Given an xml result from the api endpoint, parse it and returns a + dict. + + """ + pass + + def compute_information(self, *args, **kwargs): + """Compute some more information given the inputs (e.g http headers, + ...) + + """ + return {} + + def parse_result_error(self, xml_content): + """Given an error response in xml, parse it into a dict. + + Returns: + dict with following keys: + + 'error': The error message + 'detail': Some more detail about the error if any + + """ + tree = etree.fromstring(xml_content.encode('utf-8')) + vals = tree.xpath('/x:error/y:summary', namespaces={ + 'x': 'http://purl.org/net/sword/', + 'y': 'http://www.w3.org/2005/Atom' + }) + summary = vals[0].text + if summary: + summary = summary.strip() + + vals = tree.xpath( + '/x:error/x:verboseDescription', + namespaces={'x': 'http://purl.org/net/sword/'}) + if vals: + detail = vals[0].text.strip() + else: + detail = None + + return {'error': summary, 'detail': detail} + + def do_execute(self, method, url, info): + """Execute the http query to url using method and info information. + + By default, execute a simple query to url with the http + method. Override this in daughter class to improve the + default behavior if needed. + + """ + return self.do(method, url) + + def execute(self, *args, **kwargs): + """Main endpoint to prepare and execute the http query to the api. + + """ + url = self.compute_url(*args, **kwargs) + method = self.compute_method(*args, **kwargs) + info = self.compute_information(*args, **kwargs) + + try: + r = self.do_execute(method, url, info) + except Exception as e: + msg = self.error_msg % (url, e) + r = self.empty_result + r.update({ + 'error': msg, + }) + return r + else: + if r.ok: + if int(r.status_code) == 204: # 204 returns no body + return {'status': r.status_code} + else: + return self.parse_result_ok(r.text) + else: + error = self.parse_result_error(r.text) + empty = self.empty_result + error.update(empty) + error.update({ + 'status': r.status_code, + }) + return error + + +class ServiceDocumentDepositClient(BaseDepositClient): + """Service Document information retrieval. + + """ + def __init__(self, config): + super().__init__(config, + error_msg='Service document failure at %s: %s', + empty_result={'collection': None}) + + def compute_url(self, *args, **kwargs): + return '/servicedocument/' + + def compute_method(self, *args, **kwargs): + return 'get' + + def parse_result_ok(self, xml_content): + """Parse service document's success response. + + """ + tree = etree.fromstring(xml_content.encode('utf-8')) + collections = tree.xpath( + '/x:service/x:workspace/x:collection', + namespaces={'x': 'http://www.w3.org/2007/app'}) + items = dict(collections[0].items()) + collection = items['href'].rsplit(self.base_url)[1] + return { + 'collection': collection + } + + +class StatusDepositClient(BaseDepositClient): + """Status information on a deposit. + + """ + def __init__(self, config): + super().__init__(config, + error_msg='Status check failure at %s: %s', + empty_result={ + 'deposit_status': None, + 'deposit_status_detail': None, + 'deposit_swh_id': None, + }) + + def compute_url(self, collection, deposit_id): + return '/%s/%s/status/' % (collection, deposit_id) + + def compute_method(self, *args, **kwargs): + return 'get' + + def parse_result_ok(self, xml_content): + """Given an xml content as string, returns a deposit dict. + + """ + tree = etree.fromstring(xml_content.encode('utf-8')) + vals = tree.xpath( + '/x:entry/x:deposit_id', + namespaces={'x': 'http://www.w3.org/2005/Atom'}) + deposit_id = vals[0].text + + vals = tree.xpath( + '/x:entry/x:deposit_status', + namespaces={'x': 'http://www.w3.org/2005/Atom'}) + deposit_status = vals[0].text + + vals = tree.xpath( + '/x:entry/x:deposit_status_detail', + namespaces={'x': 'http://www.w3.org/2005/Atom'}) + deposit_status_detail = vals[0].text + + vals = tree.xpath( + '/x:entry/x:deposit_swh_id', + namespaces={'x': 'http://www.w3.org/2005/Atom'}) + if vals: + deposit_swh_id = vals[0].text + else: + deposit_swh_id = None + + return { + 'deposit_id': deposit_id, + 'deposit_status': deposit_status, + 'deposit_status_detail': deposit_status_detail, + 'deposit_swh_id': deposit_swh_id, + } + + +class BaseCreateDepositClient(BaseDepositClient): + """Deposit client base class to post new deposit. + + """ + def __init__(self, config): + super().__init__(config, + error_msg='Post Deposit failure at %s: %s', + empty_result={ + 'deposit_id': None, + 'deposit_status': None, + }) + + def compute_url(self, collection, *args, **kwargs): + return '/%s/' % collection + + def compute_method(self, *args, **kwargs): + return 'post' + + def parse_result_ok(self, xml_content): + """Given an xml content as string, returns a deposit dict. + + """ + tree = etree.fromstring(xml_content.encode('utf-8')) + vals = tree.xpath( + '/x:entry/x:deposit_id', + namespaces={'x': 'http://www.w3.org/2005/Atom'}) + deposit_id = vals[0].text + + vals = tree.xpath( + '/x:entry/x:deposit_status', + namespaces={'x': 'http://www.w3.org/2005/Atom'}) + deposit_status = vals[0].text + + vals = tree.xpath( + '/x:entry/x:deposit_date', + namespaces={'x': 'http://www.w3.org/2005/Atom'}) + deposit_date = vals[0].text + + return { + 'deposit_id': deposit_id, + 'deposit_status': deposit_status, + 'deposit_date': deposit_date, + } + + def _compute_information(self, collection, filepath, in_progress, slug, + is_archive=True): + """Given a filepath, compute necessary information on that file. + + Args: + filepath (str): Path to a file + is_archive (bool): is it an archive or not? + + Returns: + dict with keys: + 'content-type': content type associated + 'md5sum': md5 sum + 'filename': filename + """ + filename = os.path.basename(filepath) + + if is_archive: + md5sum = hashlib.md5(open(filepath, 'rb').read()).hexdigest() + extension = filename.split('.')[-1] + if 'zip' in extension: + content_type = 'application/zip' + else: + content_type = 'application/x-tar' + else: + content_type = None + md5sum = None + + return { + 'slug': slug, + 'in_progress': in_progress, + 'content-type': content_type, + 'md5sum': md5sum, + 'filename': filename, + 'filepath': filepath, + } + + def compute_information(self, collection, filepath, in_progress, slug, + is_archive=True, **kwargs): + info = self._compute_information(collection, filepath, in_progress, + slug, is_archive=is_archive) + info['headers'] = self.compute_headers(info) + return info + + def do_execute(self, method, url, info): + with open(info['filepath'], 'rb') as f: + return self.do(method, url, data=f, headers=info['headers']) + + +class CreateArchiveDepositClient(BaseCreateDepositClient): + """Post an archive (binary) deposit client.""" + def compute_headers(self, info): + return { + 'SLUG': info['slug'], + 'CONTENT_MD5': info['md5sum'], + 'IN-PROGRESS': str(info['in_progress']), + 'CONTENT-TYPE': info['content-type'], + 'CONTENT-DISPOSITION': 'attachment; filename=%s' % ( + info['filename'], ), + } + + +class UpdateArchiveDepositClient(CreateArchiveDepositClient): + """Update (add/replace) an archive (binary) deposit client.""" + def compute_url(self, collection, *args, deposit_id=None, **kwargs): + return '/%s/%s/media/' % (collection, deposit_id) + + def compute_method(self, *args, replace=False, **kwargs): + return 'put' if replace else 'post' + + +class CreateMetadataDepositClient(BaseCreateDepositClient): + """Post a metadata deposit client.""" + def compute_headers(self, info): + return { + 'SLUG': info['slug'], + 'IN-PROGRESS': str(info['in_progress']), + 'CONTENT-TYPE': 'application/atom+xml;type=entry', + } + + +class UpdateMetadataDepositClient(CreateMetadataDepositClient): + """Update (add/replace) a metadata deposit client.""" + def compute_url(self, collection, *args, deposit_id=None, **kwargs): + return '/%s/%s/metadata/' % (collection, deposit_id) + + def compute_method(self, *args, replace=False, **kwargs): + return 'put' if replace else 'post' + + +class CreateMultipartDepositClient(BaseCreateDepositClient): + """Create a multipart deposit client.""" + def _multipart_info(self, info, info_meta): + files = [ + ('file', + (info['filename'], + open(info['filepath'], 'rb'), + info['content-type'])), + ('atom', + (info_meta['filename'], + open(info_meta['filepath'], 'rb'), + 'application/atom+xml')), + ] + + headers = { + 'SLUG': info['slug'], + 'CONTENT_MD5': info['md5sum'], + 'IN-PROGRESS': str(info['in_progress']), + } + + return files, headers + + def compute_information(self, collection, archive_path, metadata_path, + in_progress, slug, **kwargs): + info = self._compute_information( + collection, archive_path, in_progress, slug) + info_meta = self._compute_information( + collection, metadata_path, in_progress, slug, is_archive=False) + files, headers = self._multipart_info(info, info_meta) + return {'files': files, 'headers': headers} + + def do_execute(self, method, url, info): + return self.do( + method, url, files=info['files'], headers=info['headers']) + + +class UpdateMultipartDepositClient(CreateMultipartDepositClient): + """Update a multipart deposit client.""" + def compute_url(self, collection, *args, deposit_id=None, **kwargs): + return '/%s/%s/metadata/' % (collection, deposit_id) + + def compute_method(self, *args, replace=False, **kwargs): + return 'put' if replace else 'post' + + +class PublicApiDepositClient(BaseApiDepositClient): + """Public api deposit client.""" + def service_document(self, log=None): + """Retrieve service document endpoint's information.""" + return ServiceDocumentDepositClient(self.config).execute() + + def deposit_status(self, collection, deposit_id, log=None): + """Retrieve status information on a deposit.""" + return StatusDepositClient(self.config).execute( + collection, deposit_id) + + def deposit_create(self, collection, slug, archive_path=None, + metadata_path=None, in_progress=False, log=None): + """Create a new deposit (archive, metadata, both as multipart).""" + if archive_path and not metadata_path: + return CreateArchiveDepositClient(self.config).execute( + collection, archive_path, in_progress, slug) + elif not archive_path and metadata_path: + return CreateMetadataDepositClient(self.config).execute( + collection, metadata_path, in_progress, slug, + is_archive=False) + else: + return CreateMultipartDepositClient(self.config).execute( + collection, archive_path, metadata_path, in_progress, + slug) + + def deposit_update(self, collection, deposit_id, slug, archive_path=None, + metadata_path=None, in_progress=False, + replace=False, log=None): + """Update (add/replace) existing deposit (archive, metadata, both).""" + r = self.deposit_status(collection, deposit_id, log=log) + if 'error' in r: + return r + + status = r['deposit_status'] + if status != 'partial': + return { + 'error': "You can only act on deposit with status 'partial'", + 'detail': "The deposit %s has status '%s'" % ( + deposit_id, status), + 'deposit_status': status, + 'deposit_id': deposit_id, + } + if archive_path and not metadata_path: + r = UpdateArchiveDepositClient(self.config).execute( + collection, archive_path, in_progress, slug, + deposit_id=deposit_id, replace=replace, log=log) + elif not archive_path and metadata_path: + r = UpdateMetadataDepositClient(self.config).execute( + collection, metadata_path, in_progress, slug, + deposit_id=deposit_id, replace=replace, log=log) + else: + r = UpdateMultipartDepositClient(self.config).execute( + collection, archive_path, metadata_path, in_progress, + slug, deposit_id=deposit_id, replace=replace, log=log) + + if 'error' in r: + return r + return self.deposit_status(collection, deposit_id, log=log) diff --git a/swh/deposit/client/cli.py b/swh/deposit/client/cli.py new file mode 100755 index 00000000..91ef9820 --- /dev/null +++ b/swh/deposit/client/cli.py @@ -0,0 +1,294 @@ +# Copyright (C) 2018 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + + +"""Script to demonstrate software deposit scenario to +https://deposit.sofwareheritage.org. + +Use: python3 -m swh.deposit.client.cli --help + +Documentation: https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html # noqa + +""" + +import os +import click +import logging +import uuid + + +from . import PublicApiDepositClient + + +class InputError(ValueError): + """Input script error + + """ + pass + + +def generate_slug(prefix='swh-sample'): + """Generate a slug (sample purposes). + + """ + return '%s-%s' % (prefix, uuid.uuid4()) + + +def parse_cli_options(username, password, archive, metadata, + archive_deposit, metadata_deposit, + collection, slug, partial, deposit_id, replace, + url, status): + """Parse the cli options and make sure the combination is acceptable*. + If not, an InputError exception is raised explaining the issue. + + By acceptable, we mean: + + - A multipart deposit (create or update) needs both an + existing software archive and an existing metadata file + + - A binary deposit (create/update) needs an existing + software archive + + - A metadata deposit (create/update) needs an existing + metadata file + + - A deposit update needs a deposit_id to be provided + + This won't prevent all failure cases though. The remaining + errors are already dealt with the underlying api client. + + Raises: + InputError explaining the issue + + Returns: + dict with the following keys: + + 'archive': the software archive to deposit + 'username': username + 'password': associated password + 'metadata': the metadata file to deposit + 'collection': the username's associated client + 'slug': the slug or external id identifying the deposit to make + 'partial': if the deposit is partial or not + 'client': instantiated class + 'url': deposit's server main entry point + 'deposit_type': deposit's type (binary, multipart, metadata) + 'deposit_id': optional deposit identifier + + """ + if status and not deposit_id: + raise InputError("Deposit id must be provided for status check") + + if status and deposit_id: # status is higher priority over deposit + archive_deposit = False + metadata_deposit = False + + if archive_deposit and metadata_deposit: + # too many flags use, remove redundant ones (-> multipart deposit) + archive_deposit = False + metadata_deposit = False + + if archive_deposit and archive and not os.path.exists(archive): + raise InputError('Software Archive %s must exist!' % archive) + + if archive and not metadata: + metadata = '%s.metadata.xml' % archive + + if metadata_deposit: + archive = None + + if archive_deposit: + metadata = None + + if metadata_deposit and not metadata: + raise InputError( + "Metadata deposit filepath must be provided for metadata deposit") + + if not status and metadata_deposit and not os.path.exists(metadata): + raise InputError('Software Archive metadata %s must exist!' % metadata) + + if not status and not archive and not metadata: + raise InputError( + 'Please provide an actionable command. See --help for more ' + 'information.') + + if replace and not deposit_id: + raise InputError( + 'To update an existing deposit, you must provide its id') + + client = PublicApiDepositClient({ + 'url': url, + 'auth': { + 'username': username, + 'password': password + }, + }) + + if not collection: + # retrieve user's collection + sd_content = client.service_document() + if 'error' in sd_content: + raise InputError('Service document retrieval: %s' % ( + sd_content['error'], )) + collection = sd_content['collection'].replace('/', '') + + if not slug: + # generate slug + slug = generate_slug() + + return { + 'archive': archive, + 'username': username, + 'password': password, + 'metadata': metadata, + 'collection': collection, + 'slug': slug, + 'partial': partial, + 'client': client, + 'url': url, + 'deposit_id': deposit_id, + 'replace': replace, + } + + +def deposit_status(config, dry_run, log): + log.debug('Status deposit') + client = config['client'] + collection = config['collection'] + deposit_id = config['deposit_id'] + if not dry_run: + r = client.deposit_status(collection, deposit_id, log) + return r + return {} + + +def deposit_create(config, dry_run, log): + """Delegate the actual deposit to the deposit client. + + """ + log.debug('Create deposit') + + client = config['client'] + collection = config['collection'] + archive_path = config['archive'] + metadata_path = config['metadata'] + slug = config['slug'] + in_progress = config['partial'] + if not dry_run: + r = client.deposit_create(collection, slug, archive_path, + metadata_path, in_progress, log) + return r + return {} + + +def deposit_update(config, dry_run, log): + """Delegate the actual deposit to the deposit client. + + """ + log.debug('Update deposit') + + client = config['client'] + collection = config['collection'] + deposit_id = config['deposit_id'] + archive_path = config['archive'] + metadata_path = config['metadata'] + slug = config['slug'] + in_progress = config['partial'] + replace = config['replace'] + if not dry_run: + r = client.deposit_update(collection, deposit_id, slug, archive_path, + metadata_path, in_progress, replace, log) + return r + return {} + + +@click.command() +@click.option('--username', required=1, + help="(Mandatory) User's name") +@click.option('--password', required=1, + help="(Mandatory) User's associated password") +@click.option('--archive', + help='(Optional) Software archive to deposit') +@click.option('--metadata', + help="(Optional) Path to xml metadata file. If not provided, this will use a file named .metadata.xml") # noqa +@click.option('--archive-deposit/--no-archive-deposit', default=False, + help='(Optional) Software archive only deposit') +@click.option('--metadata-deposit/--no-metadata-deposit', default=False, + help='(Optional) Metadata only deposit') +@click.option('--collection', + help="(Optional) User's collection. If not provided, this will be fetched.") # noqa +@click.option('--slug', + help="""(Optional) External system information identifier. If not provided, it will be generated""") # noqa +@click.option('--partial/--no-partial', default=False, + help='(Optional) The deposit will be partial, other deposits will have to take place to finalize it.') # noqa +@click.option('--deposit-id', default=None, + help='(Optional) Update an existing partial deposit with its identifier') # noqa +@click.option('--replace/--no-replace', default=False, + help='(Optional) Update by replacing existing metadata to a deposit') # noqa +@click.option('--url', default='http://deposit.softwareheritage.org/1', + help="(Optional) Deposit server api endpoint. By default, https://deposit.softwareheritage.org/1") # noqa +@click.option('--status/--no-status', default=False, + help="(Optional) Deposit's status") +@click.option('--dry-run/--no-dry-run', default=False, + help='(Optional) No-op deposit') +@click.option('--verbose/--no-verbose', default=False, + help='Verbose mode') +def main(username, password, archive=None, metadata=None, + archive_deposit=False, metadata_deposit=False, + collection=None, slug=None, partial=False, deposit_id=None, + replace=False, status=False, + url='https://deposit.softwareheritage.org/1', dry_run=True, + verbose=False): + """Software Heritage Deposit client - Create (or update partial) +deposit through the command line. + +More documentation can be found at +https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html. + + """ + + log = logging.getLogger('swh-deposit') + log.addHandler(logging.StreamHandler()) + _loglevel = logging.DEBUG if verbose else logging.INFO + log.setLevel(_loglevel) + + if dry_run: + log.info("**DRY RUN**") + + config = {} + + try: + log.debug('Parsing cli options') + config = parse_cli_options( + username, password, archive, metadata, archive_deposit, + metadata_deposit, collection, slug, partial, deposit_id, + replace, url, status) + + except InputError as e: + msg = 'Problem during parsing options: %s' % e + r = { + 'error': msg, + } + log.info(r) + return 1 + + if verbose: + log.info("Parsed configuration: %s" % ( + config, )) + + deposit_id = config['deposit_id'] + + if status and deposit_id: + r = deposit_status(config, dry_run, log) + elif not status and deposit_id: + r = deposit_update(config, dry_run, log) + elif not status and not deposit_id: + r = deposit_create(config, dry_run, log) + + log.info(r) + + +if __name__ == '__main__': + main() diff --git a/swh/deposit/loader/checker.py b/swh/deposit/loader/checker.py index 34ffd018..cb21ee81 100644 --- a/swh/deposit/loader/checker.py +++ b/swh/deposit/loader/checker.py @@ -1,20 +1,20 @@ -# Copyright (C) 2017 The Software Heritage developers +# Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from .client import DepositClient +from ..client import ApiDepositClient class DepositChecker(): """Deposit checker implementation. Trigger deposit's checks through the private api. """ def __init__(self, client=None): super().__init__() - self.client = client if client else DepositClient() + self.client = client if client else ApiDepositClient() def check(self, deposit_check_url): return self.client.check(deposit_check_url) diff --git a/swh/deposit/loader/client.py b/swh/deposit/loader/client.py deleted file mode 100644 index 4830a0ef..00000000 --- a/swh/deposit/loader/client.py +++ /dev/null @@ -1,150 +0,0 @@ -# Copyright (C) 2017 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -"""Module in charge of defining a swh-deposit client - -""" - -import requests -from swh.core.config import SWHConfig - - -class DepositClient(SWHConfig): - """Deposit client to: - - - read a given deposit's archive(s) - - read a given deposit's metadata - - update a given deposit's status - - """ - CONFIG_BASE_FILENAME = 'deposit/client' - DEFAULT_CONFIG = { - 'url': ('str', 'http://localhost:5006'), - 'auth': ('dict', {}) # with optional 'username'/'password' keys - } - - def __init__(self, config=None, _client=requests): - super().__init__() - if config is None: - self.config = super().parse_config_file() - else: - self.config = config - - self._client = _client - self.base_url = self.config['url'] - auth = self.config['auth'] - if auth == {}: - self.auth = None - else: - self.auth = (auth['username'], auth['password']) - - def do(self, method, url, *args, **kwargs): - """Internal method to deal with requests, possibly with basic http - authentication. - - Args: - method (str): supported http methods as in self._methods' keys - - Returns: - The request's execution - - """ - if hasattr(self._client, method): - method_fn = getattr(self._client, method) - else: - raise ValueError('Development error, unsupported method %s' % ( - method)) - - if self.auth: - kwargs['auth'] = self.auth - - full_url = '%s%s' % (self.base_url.rstrip('/'), url) - return method_fn(full_url, *args, **kwargs) - - def archive_get(self, archive_update_url, archive_path, log=None): - """Retrieve the archive from the deposit to a local directory. - - Args: - archive_update_url (str): The full deposit archive(s)'s raw content - to retrieve locally - - archive_path (str): the local archive's path where to store - the raw content - - Returns: - The archive path to the local archive to load. - Or None if any problem arose. - - """ - r = self.do('get', archive_update_url, stream=True) - if r.ok: - with open(archive_path, 'wb') as f: - for chunk in r.iter_content(): - f.write(chunk) - - return archive_path - - msg = 'Problem when retrieving deposit archive at %s' % ( - archive_update_url, ) - if log: - log.error(msg) - - raise ValueError(msg) - - def metadata_get(self, metadata_url, log=None): - """Retrieve the metadata information on a given deposit. - - Args: - metadata_url (str): The full deposit metadata url to retrieve - locally - - Returns: - The dictionary of metadata for that deposit or None if any - problem arose. - - """ - r = self.do('get', metadata_url) - if r.ok: - return r.json() - - msg = 'Problem when retrieving metadata at %s' % metadata_url - if log: - log.error(msg) - - raise ValueError(msg) - - def status_update(self, update_status_url, status, - revision_id=None): - """Update the deposit's status. - - Args: - update_status_url (str): the full deposit's archive - status (str): The status to update the deposit with - revision_id (str/None): the revision's identifier to update to - - """ - payload = {'status': status} - if revision_id: - payload['revision_id'] = revision_id - - self.do('put', update_status_url, json=payload) - - def check(self, check_url, log=None): - """Check the deposit's associated data (metadata, archive(s)) - - Args: - check_url (str): the full deposit's check url - - """ - r = self.do('get', check_url) - if r.ok: - data = r.json() - return data['status'] - - msg = 'Problem when checking deposit %s' % check_url - if log: - log.error(msg) - - raise ValueError(msg) diff --git a/swh/deposit/loader/loader.py b/swh/deposit/loader/loader.py index 316ab5ee..22315d15 100644 --- a/swh/deposit/loader/loader.py +++ b/swh/deposit/loader/loader.py @@ -1,129 +1,129 @@ -# Copyright (C) 2017 The Software Heritage developers +# Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import datetime import os import tempfile from swh.model import hashutil from swh.loader.tar import loader from swh.loader.core.loader import SWHLoader -from .client import DepositClient +from ..client import ApiDepositClient class DepositLoader(loader.TarLoader): """Deposit loader implementation. This is a subclass of the :class:TarLoader as the main goal of this class is to first retrieve the deposit's tarball contents as one and its associated metadata. Then provide said tarball to be loaded by the TarLoader. This will: - retrieves the deposit's archive locally - provide the archive to be loaded by the tar loader - clean up the temporary location used to retrieve the archive locally - update the deposit's status accordingly """ CONFIG_BASE_FILENAME = 'loader/deposit' ADDITIONAL_CONFIG = { 'extraction_dir': ('str', '/tmp/swh.deposit.loader/'), } def __init__(self, client=None): super().__init__( logging_class='swh.deposit.loader.loader.DepositLoader') - self.client = client if client else DepositClient() + self.client = client if client else ApiDepositClient() def load(self, *, archive_url, deposit_meta_url, deposit_update_url): SWHLoader.load( self, archive_url=archive_url, deposit_meta_url=deposit_meta_url, deposit_update_url=deposit_update_url) def prepare(self, *, archive_url, deposit_meta_url, deposit_update_url): """Prepare the loading by first retrieving the deposit's raw archive content. """ self.deposit_update_url = deposit_update_url self.client.status_update(deposit_update_url, 'loading') temporary_directory = tempfile.TemporaryDirectory() self.temporary_directory = temporary_directory archive_path = os.path.join(temporary_directory.name, 'archive.zip') archive = self.client.archive_get( archive_url, archive_path, log=self.log) metadata = self.client.metadata_get( deposit_meta_url, log=self.log) origin = metadata['origin'] visit_date = datetime.datetime.now(tz=datetime.timezone.utc) revision = metadata['revision'] occurrence = metadata['occurrence'] self.origin_metadata = metadata['origin_metadata'] self.prepare_metadata() super().prepare(tar_path=archive, origin=origin, visit_date=visit_date, revision=revision, occurrences=[occurrence]) def store_metadata(self): """Storing the origin_metadata during the load processus. Provider_id and tool_id are resolved during the prepare() method. """ origin_id = self.origin_id visit_date = self.visit_date provider_id = self.origin_metadata['provider']['provider_id'] tool_id = self.origin_metadata['tool']['tool_id'] metadata = self.origin_metadata['metadata'] try: self.send_origin_metadata(origin_id, visit_date, provider_id, tool_id, metadata) - except: + except Exception: self.log.exception('Problem when storing origin_metadata') raise def post_load(self, success=True): """Updating the deposit's status according to its loading status. If not successful, we update its status to 'failed'. Otherwise, we update its status to 'done' and pass along its associated revision. """ try: if not success: self.client.status_update(self.deposit_update_url, status='failed') return # first retrieve the new revision [rev_id] = self.objects['revision'].keys() if rev_id: rev_id_hex = hashutil.hash_to_hex(rev_id) # then update the deposit's status to success with its # revision-id self.client.status_update(self.deposit_update_url, status='done', revision_id=rev_id_hex) - except: + except Exception: self.log.exception( 'Problem when trying to update the deposit\'s status') def cleanup(self): """Clean up temporary directory where we retrieved the tarball. """ super().cleanup() self.temporary_directory.cleanup() diff --git a/swh/deposit/templates/deposit/service_document.xml b/swh/deposit/templates/deposit/service_document.xml index aec0d822..7d60eded 100644 --- a/swh/deposit/templates/deposit/service_document.xml +++ b/swh/deposit/templates/deposit/service_document.xml @@ -1,24 +1,24 @@ 2.0 {{ max_upload_size }} The Software Heritage (SWH) Archive - {% for collection in collections %} - {{ collection.name }} Software Collection + {% for col_name, col_uri in collections.items %} + {{ col_name }} Software Collection {% for accept_content_type in accept_content_types %}{{ accept_content_type }} {% endfor %}Collection Policy Software Heritage Archive Collect, Preserve, Share false false {% for accept_packaging in accept_packagings %}{{ accept_packaging }} - {% endfor %}https://deposit.softwareheritage.org/1/{{ collection.name }}/ + {% endfor %}{{ col_uri }} {% endfor %} diff --git a/swh/deposit/tests/api/test_service_document.py b/swh/deposit/tests/api/test_service_document.py index 453d16a1..6d57ed39 100644 --- a/swh/deposit/tests/api/test_service_document.py +++ b/swh/deposit/tests/api/test_service_document.py @@ -1,102 +1,102 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.core.urlresolvers import reverse from nose.tools import istest from rest_framework import status from rest_framework.test import APITestCase from swh.deposit.tests import TEST_CONFIG from swh.deposit.config import SD_IRI from ..common import BasicTestCase, WithAuthTestCase class ServiceDocumentNoAuthCase(APITestCase, BasicTestCase): """Service document endpoints are protected with basic authentication. """ @istest def service_document_no_authentication_fails(self): """Without authentication, service document endpoint should return 401 """ url = reverse(SD_IRI) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_401_UNAUTHORIZED) @istest def service_document_with_http_accept_should_not_break(self): """Without auth, sd endpoint through browser should return 401 """ url = reverse(SD_IRI) # when response = self.client.get( url, HTTP_ACCEPT='text/html,application/xml;q=9,*/*,q=8') self.assertEqual(response.status_code, status.HTTP_401_UNAUTHORIZED) class ServiceDocumentCase(APITestCase, WithAuthTestCase, BasicTestCase): def assertResponseOk(self, response): self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEquals(response.content.decode('utf-8'), ''' 2.0 %s The Software Heritage (SWH) Archive - + %s Software Collection application/zip application/x-tar Collection Policy Software Heritage Archive Collect, Preserve, Share false false http://purl.org/net/sword/package/SimpleZip - https://deposit.softwareheritage.org/1/%s/ + http://testserver/1/%s/ ''' % (TEST_CONFIG['max_upload_size'], self.username, self.username, self.username)) # noqa @istest def service_document(self): """With authentication, service document list user's collection """ url = reverse(SD_IRI) # when response = self.client.get(url) # then self.assertResponseOk(response) @istest def service_document_with_http_accept_header(self): """With authentication, with browser, sd list user's collection """ url = reverse(SD_IRI) # when response = self.client.get( url, HTTP_ACCEPT='text/html,application/xml;q=9,*/*,q=8') self.assertResponseOk(response) diff --git a/swh/deposit/tests/common.py b/swh/deposit/tests/common.py index 31ac3181..fae9364d 100644 --- a/swh/deposit/tests/common.py +++ b/swh/deposit/tests/common.py @@ -1,466 +1,466 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import base64 import hashlib import os import shutil import tempfile from django.core.urlresolvers import reverse from django.test import TestCase from io import BytesIO from nose.plugins.attrib import attr from rest_framework import status from swh.deposit.config import COL_IRI, EM_IRI, EDIT_SE_IRI from swh.deposit.models import DepositClient, DepositCollection, Deposit from swh.deposit.models import DepositRequest from swh.deposit.models import DepositRequestType from swh.deposit.parsers import parse_xml from swh.deposit.settings.testing import MEDIA_ROOT from swh.core import tarball def create_arborescence_zip(root_path, archive_name, filename, content, up_to_size=None): """Build an archive named archive_name in the root_path. This archive contains one file named filename with the content content. Returns: dict with the keys: - dir: the directory of that archive - path: full path to the archive - sha1sum: archive's sha1sum - length: archive's length """ os.makedirs(root_path, exist_ok=True) archive_path_dir = tempfile.mkdtemp(dir=root_path) dir_path = os.path.join(archive_path_dir, archive_name) os.mkdir(dir_path) filepath = os.path.join(dir_path, filename) - l = len(content) + _length = len(content) count = 0 batch_size = 128 with open(filepath, 'wb') as f: f.write(content) if up_to_size: # fill with blank content up to a given size - count += l + count += _length while count < up_to_size: f.write(b'0'*batch_size) count += batch_size zip_path = dir_path + '.zip' zip_path = tarball.compress(zip_path, 'zip', dir_path) with open(zip_path, 'rb') as f: length = 0 sha1sum = hashlib.sha1() md5sum = hashlib.md5() data = b'' for chunk in f: sha1sum.update(chunk) md5sum.update(chunk) length += len(chunk) data += chunk return { 'dir': archive_path_dir, 'name': archive_name, 'data': data, 'path': zip_path, 'sha1sum': sha1sum.hexdigest(), 'md5sum': md5sum.hexdigest(), 'length': length, } @attr('fs') class FileSystemCreationRoutine(TestCase): """Mixin intended for tests needed to tamper with archives. """ def setUp(self): """Define the test client and other test variables.""" super().setUp() self.root_path = '/tmp/swh-deposit/test/build-zip/' os.makedirs(self.root_path, exist_ok=True) self.archive = create_arborescence_zip( self.root_path, 'archive1', 'file1', b'some content in file') self.atom_entry = b""" Awesome Compiler urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 1785io25c695 2017-10-07T15:17:08Z some awesome author https://hal-test.archives-ouvertes.fr """ def tearDown(self): super().tearDown() shutil.rmtree(self.root_path) def create_simple_binary_deposit(self, status_partial=True): response = self.client.post( reverse(COL_IRI, args=[self.collection.name]), content_type='application/zip', data=self.archive['data'], CONTENT_LENGTH=self.archive['length'], HTTP_MD5SUM=self.archive['md5sum'], HTTP_SLUG='external-id', HTTP_IN_PROGRESS=status_partial, HTTP_CONTENT_DISPOSITION='attachment; filename=%s' % ( self.archive['name'], )) # then assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) deposit_id = response_content[ '{http://www.w3.org/2005/Atom}deposit_id'] return deposit_id def create_complex_binary_deposit(self, status_partial=False): deposit_id = self.create_simple_binary_deposit( status_partial=True) # Add a second archive to the deposit # update its status to DEPOSIT_STATUS_VERIFIED response = self.client.post( reverse(EM_IRI, args=[self.collection.name, deposit_id]), content_type='application/zip', data=self.archive2['data'], CONTENT_LENGTH=self.archive2['length'], HTTP_MD5SUM=self.archive2['md5sum'], HTTP_SLUG='external-id', HTTP_IN_PROGRESS=status_partial, HTTP_CONTENT_DISPOSITION='attachment; filename=filename1.zip') # then assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) deposit_id = response_content[ '{http://www.w3.org/2005/Atom}deposit_id'] return deposit_id def update_binary_deposit(self, deposit_id, status_partial=False): # update existing deposit with atom entry metadata response = self.client.post( reverse(EDIT_SE_IRI, args=[self.collection.name, deposit_id]), content_type='application/atom+xml;type=entry', data=self.codemeta_entry_data1, HTTP_SLUG='external-id', HTTP_IN_PROGRESS=status_partial) # then # assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) deposit_id = response_content[ '{http://www.w3.org/2005/Atom}deposit_id'] return deposit_id @attr('fs') class BasicTestCase(TestCase): """Mixin intended for data setup purposes (user, collection, etc...) """ def setUp(self): """Define the test client and other test variables.""" super().setUp() # expanding diffs in tests self.maxDiff = None # basic minimum test data deposit_request_types = {} # Add deposit request types for deposit_request_type in ['archive', 'metadata']: drt = DepositRequestType(name=deposit_request_type) drt.save() deposit_request_types[deposit_request_type] = drt _name = 'hal' _provider_url = 'https://hal-test.archives-ouvertes.fr/' _domain = 'archives-ouvertes.fr/' # set collection up _collection = DepositCollection(name=_name) _collection.save() # set user/client up _client = DepositClient.objects.create_user(username=_name, password=_name, provider_url=_provider_url, domain=_domain) _client.collections = [_collection.id] _client.save() self.collection = _collection self.user = _client self.username = _name self.userpass = _name self.deposit_request_types = deposit_request_types def tearDown(self): super().tearDown() # Clean up uploaded files in temporary directory (tests have # their own media root folder) if os.path.exists(MEDIA_ROOT): for d in os.listdir(MEDIA_ROOT): shutil.rmtree(os.path.join(MEDIA_ROOT, d)) class WithAuthTestCase(TestCase): """Mixin intended for testing the api with basic authentication. """ def setUp(self): super().setUp() _token = '%s:%s' % (self.username, self.userpass) token = base64.b64encode(_token.encode('utf-8')) authorization = 'Basic %s' % token.decode('utf-8') self.client.credentials(HTTP_AUTHORIZATION=authorization) def tearDown(self): super().tearDown() self.client.credentials() class CommonCreationRoutine(TestCase): """Mixin class to share initialization routine. cf: `class`:test_deposit_update.DepositReplaceExistingDataTest `class`:test_deposit_update.DepositUpdateDepositWithNewDataTest `class`:test_deposit_update.DepositUpdateFailuresTest `class`:test_deposit_delete.DepositDeleteTest """ def setUp(self): super().setUp() self.atom_entry_data0 = b""" some-external-id https://hal-test.archives-ouvertes.fr/some-external-id """ self.atom_entry_data1 = b""" anotherthing https://hal-test.archives-ouvertes.fr/anotherthing """ self.atom_entry_data2 = b""" Awesome Compiler urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 1785io25c695 2017-10-07T15:17:08Z some awesome author https://hal-test.archives-ouvertes.fr/id """ self.codemeta_entry_data0 = b""" Awesome Compiler https://hal-test.archives-ouvertes.fr/1785io25c695 urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 1785io25c695 2017-10-07T15:17:08Z some awesome author description key-word 1 """ self.codemeta_entry_data1 = b""" Composing a Web of Audio Applications hal hal-01243065 hal-01243065 https://hal-test.archives-ouvertes.fr/hal-01243065 test DSP programming,Web 2017-05-03T16:08:47+02:00 this is the description 1 phpstorm stable php python C GNU General Public License v3.0 only CeCILL Free Software License Agreement v1.1 HAL hal@ccsd.cnrs.fr Morane Gruenpeter """ def create_invalid_deposit(self, external_id='some-external-id-1'): url = reverse(COL_IRI, args=[self.collection.name]) data = b'some data which is clearly not a zip file' md5sum = hashlib.md5(data).hexdigest() # when response = self.client.post( url, content_type='application/zip', # as zip data=data, # + headers CONTENT_LENGTH=len(data), # other headers needs HTTP_ prefix to be taken into account HTTP_SLUG=external_id, HTTP_CONTENT_MD5=md5sum, HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') response_content = parse_xml(BytesIO(response.content)) deposit_id = response_content[ '{http://www.w3.org/2005/Atom}deposit_id'] return deposit_id def create_deposit_with_status( self, status, external_id='some-external-id-1', swh_id=None): deposit_id = self.create_invalid_deposit(external_id) # We cannot create some form of deposit with a given status in # test context ('rejected' for example). As flipped off the # checks in the configuration so all deposits have the status # deposited). Update in place the deposit with such # status deposit = Deposit.objects.get(pk=deposit_id) deposit.status = status if swh_id: deposit.swh_id = swh_id deposit.save() return deposit_id def create_simple_deposit_partial(self, external_id='some-external-id'): """Create a simple deposit (1 request) in `partial` state and returns its new identifier. Returns: deposit id """ response = self.client.post( reverse(COL_IRI, args=[self.collection.name]), content_type='application/atom+xml;type=entry', data=self.atom_entry_data0, HTTP_SLUG=external_id, HTTP_IN_PROGRESS='true') assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) deposit_id = response_content[ '{http://www.w3.org/2005/Atom}deposit_id'] return deposit_id def create_deposit_partial_with_data_in_args(self, data): """Create a simple deposit (1 request) in `partial` state with the data or metadata as an argument and returns its new identifier. Args: data: atom entry Returns: deposit id """ response = self.client.post( reverse(COL_IRI, args=[self.collection.name]), content_type='application/atom+xml;type=entry', data=data, HTTP_SLUG='external-id', HTTP_IN_PROGRESS='true') assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) deposit_id = response_content[ '{http://www.w3.org/2005/Atom}deposit_id'] return deposit_id def _update_deposit_with_status(self, deposit_id, status_partial=False): """Add to a given deposit another archive and update its current status to `deposited` (by default). Returns: deposit id """ # when response = self.client.post( reverse(EDIT_SE_IRI, args=[self.collection.name, deposit_id]), content_type='application/atom+xml;type=entry', data=self.atom_entry_data1, HTTP_SLUG='external-id', HTTP_IN_PROGRESS=status_partial) # then assert response.status_code == status.HTTP_201_CREATED return deposit_id def create_deposit_ready(self, external_id='some-external-id'): """Create a complex deposit (2 requests) in status `deposited`. """ deposit_id = self.create_simple_deposit_partial( external_id=external_id) deposit_id = self._update_deposit_with_status(deposit_id) return deposit_id def create_deposit_partial(self, external_id='some-external-id'): """Create a complex deposit (2 requests) in status `partial`. """ deposit_id = self.create_simple_deposit_partial( external_id=external_id) deposit_id = self._update_deposit_with_status( deposit_id, status_partial=True) return deposit_id def add_metadata_to_deposit(self, deposit_id, status_partial=False): """Add metadata to deposit. """ # when response = self.client.post( reverse(EDIT_SE_IRI, args=[self.collection.name, deposit_id]), content_type='application/atom+xml;type=entry', data=self.codemeta_entry_data1, HTTP_SLUG='external-id', HTTP_IN_PROGRESS=status_partial) assert response.status_code == status.HTTP_201_CREATED # then deposit = Deposit.objects.get(pk=deposit_id) assert deposit is not None deposit_requests = DepositRequest.objects.filter(deposit=deposit) assert deposit_requests is not [] for dr in deposit_requests: if dr.type.name == 'metadata': assert deposit_requests[0].metadata is not {} return deposit_id diff --git a/swh/deposit/tests/loader/common.py b/swh/deposit/tests/loader/common.py index a1103943..530a1fe3 100644 --- a/swh/deposit/tests/loader/common.py +++ b/swh/deposit/tests/loader/common.py @@ -1,49 +1,49 @@ -# Copyright (C) 2017 The Software Heritage developers +# Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json -from swh.deposit.loader.client import DepositClient +from swh.deposit.client import ApiDepositClient CLIENT_TEST_CONFIG = { 'url': 'http://nowhere:9000/', 'auth': {}, # no authentication in test scenario } -class SWHDepositTestClient(DepositClient): +class SWHDepositTestClient(ApiDepositClient): """Deposit test client to permit overriding the default request client. """ def __init__(self, client, config): super().__init__(config=config) self.client = client def archive_get(self, archive_update_url, archive_path, log=None): r = self.client.get(archive_update_url) with open(archive_path, 'wb') as f: for chunk in r.streaming_content: f.write(chunk) return archive_path def metadata_get(self, metadata_url, log=None): r = self.client.get(metadata_url) return json.loads(r.content.decode('utf-8')) def status_update(self, update_status_url, status, revision_id=None): payload = {'status': status} if revision_id: payload['revision_id'] = revision_id self.client.put(update_status_url, content_type='application/json', data=json.dumps(payload)) def check(self, check_url): r = self.client.get(check_url) data = json.loads(r.content.decode('utf-8')) return data['status'] diff --git a/swh/deposit/tests/loader/test_client.py b/swh/deposit/tests/loader/test_client.py index c4ec4963..c49b21d5 100644 --- a/swh/deposit/tests/loader/test_client.py +++ b/swh/deposit/tests/loader/test_client.py @@ -1,268 +1,268 @@ -# Copyright (C) 2017 The Software Heritage developers +# Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os import shutil import tempfile import unittest from nose.plugins.attrib import attr from nose.tools import istest -from swh.deposit.loader.client import DepositClient +from swh.deposit.client import ApiDepositClient from swh.deposit.config import DEPOSIT_STATUS_LOAD_SUCCESS from swh.deposit.config import DEPOSIT_STATUS_LOAD_FAILURE from .common import CLIENT_TEST_CONFIG class StreamedResponse: """Streamed response facsimile """ def __init__(self, ok, stream): self.ok = ok self.stream = stream def iter_content(self): yield from self.stream class FakeRequestClientGet: """Fake request client dedicated to get method calls. """ def __init__(self, response): self.response = response def get(self, *args, **kwargs): self.args = args self.kwargs = kwargs return self.response @attr('fs') -class DepositClientReadArchiveTest(unittest.TestCase): +class ApiDepositClientReadArchiveTest(unittest.TestCase): def setUp(self): super().setUp() self.temporary_directory = tempfile.mkdtemp(dir='/tmp') def tearDown(self): super().setUp() shutil.rmtree(self.temporary_directory) @istest def archive_get(self): """Reading archive should write data in temporary directory """ stream_content = [b"some", b"streamed", b"response"] response = StreamedResponse( ok=True, stream=(s for s in stream_content)) _client = FakeRequestClientGet(response) - deposit_client = DepositClient(config=CLIENT_TEST_CONFIG, - _client=_client) + deposit_client = ApiDepositClient(config=CLIENT_TEST_CONFIG, + _client=_client) archive_path = os.path.join(self.temporary_directory, 'test.archive') archive_path = deposit_client.archive_get('/some/url', archive_path) self.assertTrue(os.path.exists(archive_path)) with open(archive_path, 'rb') as f: actual_content = f.read() self.assertEquals(actual_content, b''.join(stream_content)) self.assertEquals(_client.args, ('http://nowhere:9000/some/url', )) self.assertEquals(_client.kwargs, { 'stream': True }) @istest def archive_get_with_authentication(self): """Reading archive should write data in temporary directory """ stream_content = [b"some", b"streamed", b"response", b"for", b"auth"] response = StreamedResponse( ok=True, stream=(s for s in stream_content)) _client = FakeRequestClientGet(response) _config = CLIENT_TEST_CONFIG.copy() _config['auth'] = { # add authentication setup 'username': 'user', 'password': 'pass' } - deposit_client = DepositClient(_config, _client=_client) + deposit_client = ApiDepositClient(_config, _client=_client) archive_path = os.path.join(self.temporary_directory, 'test.archive') archive_path = deposit_client.archive_get('/some/url', archive_path) self.assertTrue(os.path.exists(archive_path)) with open(archive_path, 'rb') as f: actual_content = f.read() self.assertEquals(actual_content, b''.join(stream_content)) self.assertEquals(_client.args, ('http://nowhere:9000/some/url', )) self.assertEquals(_client.kwargs, { 'stream': True, 'auth': ('user', 'pass') }) @istest def archive_get_can_fail(self): """Reading archive can fail for some reasons """ response = StreamedResponse(ok=False, stream=None) _client = FakeRequestClientGet(response) - deposit_client = DepositClient(config=CLIENT_TEST_CONFIG, - _client=_client) + deposit_client = ApiDepositClient(config=CLIENT_TEST_CONFIG, + _client=_client) with self.assertRaisesRegex( ValueError, 'Problem when retrieving deposit archive'): deposit_client.archive_get('/some/url', 'some/path') class JsonResponse: """Json response facsimile """ def __init__(self, ok, response): self.ok = ok self.response = response def json(self): return self.response -class DepositClientReadMetadataTest(unittest.TestCase): +class ApiDepositClientReadMetadataTest(unittest.TestCase): @istest def metadata_get(self): """Reading archive should write data in temporary directory """ expected_response = {"some": "dict"} response = JsonResponse( ok=True, response=expected_response) _client = FakeRequestClientGet(response) - deposit_client = DepositClient(config=CLIENT_TEST_CONFIG, - _client=_client) + deposit_client = ApiDepositClient(config=CLIENT_TEST_CONFIG, + _client=_client) actual_metadata = deposit_client.metadata_get('/metadata') self.assertEquals(actual_metadata, expected_response) @istest def metadata_get_can_fail(self): """Reading metadata can fail for some reasons """ _client = FakeRequestClientGet(JsonResponse(ok=False, response=None)) - deposit_client = DepositClient(config=CLIENT_TEST_CONFIG, - _client=_client) + deposit_client = ApiDepositClient(config=CLIENT_TEST_CONFIG, + _client=_client) with self.assertRaisesRegex( ValueError, 'Problem when retrieving metadata at'): deposit_client.metadata_get('/some/metadata/url') class FakeRequestClientPut: """Fake Request client dedicated to put request method calls. """ args = None kwargs = None def put(self, *args, **kwargs): self.args = args self.kwargs = kwargs -class DepositClientStatusUpdateTest(unittest.TestCase): +class ApiDepositClientStatusUpdateTest(unittest.TestCase): @istest def status_update(self): """Update status """ _client = FakeRequestClientPut() - deposit_client = DepositClient(config=CLIENT_TEST_CONFIG, - _client=_client) + deposit_client = ApiDepositClient(config=CLIENT_TEST_CONFIG, + _client=_client) deposit_client.status_update('/update/status', DEPOSIT_STATUS_LOAD_SUCCESS, revision_id='some-revision-id') self.assertEquals(_client.args, ('http://nowhere:9000/update/status', )) self.assertEquals(_client.kwargs, { 'json': { 'status': DEPOSIT_STATUS_LOAD_SUCCESS, 'revision_id': 'some-revision-id', } }) @istest def status_update_with_no_revision_id(self): """Reading metadata can fail for some reasons """ _client = FakeRequestClientPut() - deposit_client = DepositClient(config=CLIENT_TEST_CONFIG, - _client=_client) + deposit_client = ApiDepositClient(config=CLIENT_TEST_CONFIG, + _client=_client) deposit_client.status_update('/update/status/fail', DEPOSIT_STATUS_LOAD_FAILURE) self.assertEquals(_client.args, ('http://nowhere:9000/update/status/fail', )) self.assertEquals(_client.kwargs, { 'json': { 'status': DEPOSIT_STATUS_LOAD_FAILURE, } }) -class DepositClientCheckTest(unittest.TestCase): +class ApiDepositClientCheckTest(unittest.TestCase): @istest def check(self): """When check ok, this should return the deposit's status """ _client = FakeRequestClientGet( JsonResponse(ok=True, response={'status': 'something'})) - deposit_client = DepositClient(config=CLIENT_TEST_CONFIG, - _client=_client) + deposit_client = ApiDepositClient(config=CLIENT_TEST_CONFIG, + _client=_client) r = deposit_client.check('/check') self.assertEquals(_client.args, ('http://nowhere:9000/check', )) self.assertEquals(_client.kwargs, {}) self.assertEquals(r, 'something') @istest def check_fails(self): """Checking deposit can fail for some reason """ _client = FakeRequestClientGet( JsonResponse(ok=False, response=None)) - deposit_client = DepositClient(config=CLIENT_TEST_CONFIG, - _client=_client) + deposit_client = ApiDepositClient(config=CLIENT_TEST_CONFIG, + _client=_client) with self.assertRaisesRegex( ValueError, 'Problem when checking deposit'): deposit_client.check('/check/fails') self.assertEquals(_client.args, ('http://nowhere:9000/check/fails', )) self.assertEquals(_client.kwargs, {}) diff --git a/version.txt b/version.txt index a7ff6e57..4c592f92 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -v0.0.43-0-gcb97e84 \ No newline at end of file +v0.0.44-0-g6520c50 \ No newline at end of file