diff --git a/docs/endpoints/collection.rst b/docs/endpoints/collection.rst new file mode 100644 index 00000000..7debb7f2 --- /dev/null +++ b/docs/endpoints/collection.rst @@ -0,0 +1,73 @@ +Create deposit +^^^^^^^^^^^^^^^ + +.. http:post:: /1// + + Create deposit in a collection. + + The client sends a deposit request to a specific collection with: + + * an archive holding the software source code (binary upload) + * an envelop with metadata describing information regarding a deposit (atom + entry deposit) + + Also known as: COL-IRI + + :param text : the client's credentials + :param text Content-Type: accepted mimetype + :param int Content-Length: tarball size + :param text Content-MD5: md5 checksum hex encoded of the tarball + :param text Content-Disposition: attachment; filename=[filename]; the filename + parameter must be text (ascii) + :param text Content-Disposition: for the metadata file set name parameter + to 'atom'. + :param bool In-progress: true if not final; false when final request. + :statuscode 201: success for deposit on POST + :statuscode 401: Unauthorized + :statuscode 404: access to an unknown collection + :statuscode 415: unsupported media type + +Sample request +~~~~~~~~~~~~~~~ +.. code:: shell + + curl -i -u hal: \ + -F "file=@../deposit.json;type=application/zip;filename=payload" \ + -F "atom=@../atom-entry.xml;type=application/atom+xml;charset=UTF-8" \ + -H 'In-Progress: false' \ + -H 'Slug: some-external-id' \ + -XPOST https://deposit.softwareheritage.org/1/hal/ + +Sample response +~~~~~~~~~~~~~~~ + +.. code:: shell + + HTTP/1.0 201 Created + Date: Tue, 26 Sep 2017 10:32:35 GMT + Server: WSGIServer/0.2 CPython/3.5.3 + Vary: Accept, Cookie + Allow: GET, POST, PUT, DELETE, HEAD, OPTIONS + Location: /1/hal/10/metadata/ + X-Frame-Options: SAMEORIGIN + Content-Type: application/xml + + + 10 + Sept. 26, 2017, 10:32 a.m. + None + deposited + + + + + + + + + + + http://purl.org/net/sword/package/SimpleZip + diff --git a/docs/endpoints/content.rst b/docs/endpoints/content.rst new file mode 100644 index 00000000..ef89d1e9 --- /dev/null +++ b/docs/endpoints/content.rst @@ -0,0 +1,14 @@ +Display content +^^^^^^^^^^^^^^^^ + +.. http:get:: /1///content/ + + Display information on the content's representation in the sword + server. + + + Also known as: CONT-FILE-IRI + + :param text : the client's credentials + :statuscode 200: no error + :statuscode 401: Unauthorized diff --git a/docs/endpoints/service-document.rst b/docs/endpoints/service-document.rst new file mode 100644 index 00000000..97a7af19 --- /dev/null +++ b/docs/endpoints/service-document.rst @@ -0,0 +1,48 @@ +Service document +^^^^^^^^^^^^^^^^^ + +.. http:get:: /1/servicedocument/ + + This is the starting endpoint for the client to discover its initial + collection. The answer to this query will describes: + + * the server's abilities + * connected client's collection information + + Also known as: SD-IRI - The Service Document IRI + + :param text : the client's credentials + :statuscode 200: no error + :statuscode 401: Unauthorized + + + +Sample response +~~~~~~~~~~~~~~~ + .. code:: xml + + + + + 2.0 + 20971520 + + + The Software Heritage (SWH) archive + + SWH Software Archive + application/zip + application/x-tar + Collection Policy + Software Heritage Archive + false + false + Collect, Preserve, Share + http://purl.org/net/sword/package/SimpleZip + https://deposit.softwareheritage.org/1/hal/ + + + diff --git a/docs/endpoints/status.rst b/docs/endpoints/status.rst new file mode 100644 index 00000000..c6e4f664 --- /dev/null +++ b/docs/endpoints/status.rst @@ -0,0 +1,29 @@ +Retrieve status +^^^^^^^^^^^^^^^^ + +.. http:get:: /1/// + + Display deposit's status in regards to loading. + + + The different statuses: + + - **partial**: multipart deposit is still ongoing + - **deposited**: deposit completed + - **rejected**: deposit failed the checks + - **verified**: content and metadata verified + - **loading**: loading in-progress + - **done**: loading completed successfully + - **failed**: the deposit loading has failed + + Also known as STATE-IRI + + :param text : the client's credentials + :statuscode 201: with the deposit's status + :statuscode 401: Unauthorized + :statuscode 404: access to an unknown deposit + + + +Sample response +~~~~~~~~~~~~~~~ diff --git a/docs/endpoints/update-media.rst b/docs/endpoints/update-media.rst new file mode 100644 index 00000000..de32634c --- /dev/null +++ b/docs/endpoints/update-media.rst @@ -0,0 +1,27 @@ +Update content +^^^^^^^^^^^^^^^ + +.. http:post:: /1///media/ + + Add archive(s) to a deposit. Only possible if the deposit's status + is partial. + +.. http:put:: /1///media/ + + Replace all content by submitting a new archive. Only possible if + the deposit's status is partial. + + + Also known as: *update iri* (EM-IRI) + + :param text : the client's credentials + :param text Content-Type: accepted mimetype + :param int Content-Length: tarball size + :param text Content-MD5: md5 checksum hex encoded of the tarball + :param text Content-Disposition: attachment; filename=[filename] ; the filename + parameter must be text (ascii) + :param bool In-progress: true if not final; false when final request. + :statuscode 204: success without payload on PUT + :statuscode 201: success for deposit on POST + :statuscode 401: Unauthorized + :statuscode 415: unsupported media type diff --git a/docs/endpoints/update-metadata.rst b/docs/endpoints/update-metadata.rst new file mode 100644 index 00000000..661d7516 --- /dev/null +++ b/docs/endpoints/update-metadata.rst @@ -0,0 +1,24 @@ +Update metadata +^^^^^^^^^^^^^^^^ + +.. http:post:: /1///metadata/ + + Add metadata to a deposit. Only possible if the deposit's status + is partial. + +.. http:put:: /1///metadata/ + + Replace all metadata by submitting a new metadata file. Only possible if + the deposit's status is partial. + + + Also known as: *update iri* (SE-IRI) + + :param text : the client's credentials + :param text Content-Disposition: attachment; filename=[filename] ; the filename + parameter must be text (ascii), with a name parameter set to 'atom'. + :param bool In-progress: true if not final; false when final request. + :statuscode 204: success without payload on PUT + :statuscode 201: success for deposit on POST + :statuscode 401: Unauthorized + :statuscode 415: unsupported media type diff --git a/docs/getting-started.rst b/docs/getting-started.rst index e6c5ecb5..d6288ac0 100644 --- a/docs/getting-started.rst +++ b/docs/getting-started.rst @@ -1,291 +1,305 @@ Getting Started =============== This is a guide for how to prepare and push a software deposit with the swh-deposit commands. -The api is rooted at https://deposit.softwareheritage.org. +The api is rooted at https://deposit.softwareheritage.org/1. For more details, see the `main documentation <./index.html>`__. Requirements ------------ You need to be referenced on SWH's client list to have: * credentials (needed for the basic authentication step) - in this document we reference ```` as the client's name and - ```` as its associated authentication password. + ```` as its associated authentication password. - * an associated collection +* an associated collection `Contact us for more information. `__ Prepare a deposit ----------------- * compress the files in a supported archive format: - zip: common zip archive (no multi-disk zip files). - tar: tar archive without compression or optionally any of the following compression algorithm gzip (.tar.gz, .tgz), bzip2 (.tar.bz2) , or lzma (.tar.lzma) * prepare a metadata file (`more details <./metadata.html>`__.): - specify metadata schema/vocabulary (CodeMeta is recommended) - specify *MUST* metadata (url, authors, software name and the external\_identifier) - add all available information under the compatible metadata term An example of an atom entry file with CodeMeta terms: .. code:: xml - - - Je suis GPL - 12345 - forge.softwareheritage.org/source/jesuisgpl/ - Yes, this is another implementation of - "Hello, world!” when you run it. - - GPL - https://www.gnu.org/licenses/gpl.html - - - Reuben Thomas - Maintainer - - - Sami Kerola - Maintainer - - + + + Je suis GPL + swh + je-suis-gpl + https://forge.softwareheritage.org/source/jesuisgpl/ + 2018-01-05 + Je suis GPL is a modified version of GNU Hello whose + sole purpose is to showcase the usage of + Software Heritage for license compliance purposes. + 0.1 + GNU/Linux + stable + C + + + GNU General Public License v3.0 or later + https://spdx.org/licenses/GPL-3.0-or-later.html + + + Stefano Zacchiroli + Maintainer + + + Push deposit ------------ You can push a deposit with: -* a one single deposit (archive + metadata): +* a single deposit (archive + metadata): The user posts in one query a software source code archive and associated metadata. The deposit is directly marked with status ``deposited``. + * a multisteps deposit: 1. Create an incomplete deposit (marked with status ``partial``) 2. Add data to a deposit (in multiple requests if needed) 3. Finalize deposit (the status becomes ``deposited``) Single deposit ^^^^^^^^^^^^^^ Once the files are ready for deposit, we want to do the actual deposit in one shot, sending exactly one POST query: * 1 archive (content-type ``application/zip`` or ``application/x-tar``) * 1 metadata file in atom xml format (``content-type: application/atom+xml;type=entry``) For this, we need to provide the: * arguments: ``--username 'name' --password 'pass'`` as credentials * archive's path (example: ``--archive path/to/archive-name.tgz``) : * (optionally) metadata file's path ``--metadata path/to/file.metadata.xml``. If not provided, the archive's filename will be used to determine the metadata file, e.g: ``path/to/archive-name.tgz.metadata.xml`` * (optionally) ``--slug 'your-id'`` argument, a reference to a unique identifier the client uses for the software object. You can do this with the following command: minimal deposit .. code:: shell $ swh-deposit ---username name --password secret \ --archive je-suis-gpl.tgz with client's external identifier (``slug``) .. code:: shell $ swh-deposit --username name --password secret \ --archive je-suis-gpl.tgz \ - --slug 123456 + --slug je-suis-gpl to a specific client's collection .. code:: shell $ swh-deposit --username name --password secret \ --archive je-suis-gpl.tgz \ --collection 'second-collection' You just posted a deposit to your collection on Software Heritage If everything went well, the successful response will contain the elements below: .. code:: shell { 'deposit_status': 'deposited', 'deposit_id': '7', 'deposit_date': 'Jan. 29, 2018, 12:29 p.m.' } Note: As the deposit is in ``deposited`` status, you can no longer update the deposit after this query. It will be answered with a 403 forbidden answer. If something went wrong, an equivalent response will be given with the `error` and `detail` keys explaining the issue, e.g.: .. code:: shell { 'error': 'Unknown collection name xyz', 'detail': None, 'deposit_status': None, 'deposit_status_detail': None, 'deposit_swh_id': None, 'status': 404 } multisteps deposit ^^^^^^^^^^^^^^^^^^^^^^^^^ The steps to create a multisteps deposit: 1. Create an incomplete deposit ~~~~~~~~~~~~~~~~~~~ First use the ``--partial`` argument to declare there is more to come .. code:: shell - $ swh-deposit --username name --password secret --partial \ - --archive foo.tar.gz + $ swh-deposit --username name --password secret \ + --archive foo.tar.gz \ + --partial 2. Add content or metadata to the deposit ~~~~~~~~~~~~~~~~~~~ Continue the deposit by using the ``--deposit-id`` argument given as a response for the first step. You can continue adding content or metadata while you use the ``--partial`` argument. .. code:: shell - $ swh-deposit --username name --password secret --partial \ + $ swh-deposit --username name --password secret \ --archive add-foo.tar.gz \ - --deposit-id 42 + --deposit-id 42 \ + --partial In case you want to add only one new archive without metadata: .. code:: shell - $ swh-deposit --username name --password secret --partial \ + $ swh-deposit --username name --password secret \ --archive add-foo.tar.gz \ - --archive-deposit - --deposit-id 42 + --archive-deposit \ + --deposit-id 42 \ + --partial \ If you want to add only metadata, use: .. code:: shell - $ swh-deposit --username name --password secret --partial \ + $ swh-deposit --username name --password secret \ --metadata add-foo.tar.gz.metadata.xml \ - --metadata-deposit - --deposit-id 42 + --metadata-deposit \ + --deposit-id 42 \ + --partial 3. Finalize deposit ~~~~~~~~~~~~~~~~~~~ On your last addition, by not declaring it as ``--partial``, the deposit will be considered as completed and its status will be changed to ``deposited``. Update deposit ---------------- * replace deposit: - only possible if the deposit status is ``partial`` and ``--deposit-id `` is provided + - by using the ``--replace`` flag + - ``--metadata-deposit`` replaces associated existing metadata - ``--archive-deposit`` replaces associated archive(s) - by default, with no flag or both, you'll replace associated metadata and archive(s) .. code:: shell - $ swh-deposit --username name --password secret --replace\ + $ swh-deposit --username name --password secret \ --deposit-id 11 \ - --archive updated-je-suis-gpl.tar.gz + --archive updated-je-suis-gpl.tgz \ + --replace * update a loaded deposit with a new version: - by using the external-id with the ``--slug`` argument, you will link the new deposit with its parent deposit .. code:: shell - $ swh-deposit --username name --password secret --slug '123456' \ - --archive je-suis-gpl-v2.tgz + $ swh-deposit --username name --password secret \ + --archive je-suis-gpl-v2.tgz \ + --slug 'je-suis-gpl' \ Check the deposit's status -------------------------- You can check the status of the deposit by using the ``--deposit-id`` argument: .. code:: shell $ swh-deposit --username name --password secret --deposit-id '11' --status .. code:: json { 'deposit_id': '11', 'deposit_status': 'deposited', 'deposit_swh_id': None, 'deposit_status_detail': 'Deposit is ready for additional checks \ (tarball ok, metadata, etc...)' } The different statuses: -- *partial* : multipart deposit is still ongoing -- *deposited*: deposit completed -- *rejected*: deposit failed the checks -- *verified*: content and metadata verified -- *loading*: loading in-progress -- *done*: loading completed successfully -- *failed*: the deposit loading has failed +- **partial**: multipart deposit is still ongoing +- **deposited**: deposit completed +- **rejected**: deposit failed the checks +- **verified**: content and metadata verified +- **loading**: loading in-progress +- **done**: loading completed successfully +- **failed**: the deposit loading has failed When the deposit has been loaded into the archive, the status will be marked ``done``. In the response, will also be available the . For example: .. code:: json { 'deposit_id': '11', 'deposit_status': 'done', 'deposit_swh_id': 'swh:1:rev:34898aa991c90b447c27d2ac1fc09f5c8f12783e', 'deposit_status_detail': 'The deposit has been successfully \ loaded into the Software Heritage archive' } diff --git a/docs/index.rst b/docs/index.rst index 98965b86..23e304b5 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,22 +1,21 @@ .. _swh-deposit: Software Heritage Deposit ========================= .. toctree:: :maxdepth: 1 :caption: Contents: getting-started.rst spec-api.rst metadata.rst - spec-loading.rst dev-info.rst sys-info.rst Indices and tables ================== * :ref:`genindex` * :ref:`modindex` * :ref:`search` diff --git a/docs/metadata.rst b/docs/metadata.rst index 543128a3..52d9cedc 100644 --- a/docs/metadata.rst +++ b/docs/metadata.rst @@ -1,187 +1,187 @@ Deposit metadata ================ When making a software deposit into the SWH archive, one can add information describing the software artifact and the software project. Metadata requirements --------------------- - **the schema/vocabulary** used *MUST* be specified with a persistent url (DublinCore, DOAP, CodeMeta, etc.) .. code:: xml or or - **the url** representing the location of the source *MUST* be provided under the url tag. The url will be used for creating an origin object in the archive. .. code:: xml www.url-example.com or www.url-example.com or www.url-example.com - **the external\_identifier** *MUST* be provided as an identifier - **the name** of the software deposit *MUST* be provided [atom:title, codemeta:name, dcterms:title] - **the authors** of the software deposit *MUST* be provided - **the external\_identifier** *SHOULD* match the Slug external-identifier in the header - **the description** of the software deposit *SHOULD* be provided - [codemeta:description] + [codemeta:description]: short or long description of the software -- short or long description of the software - **the license/s** of the software +- **the license/s** of the software deposit *SHOULD* be provided [codemeta:license] - other metadata *MAY* be added with terms defined by the schema in use. Examples -------- Using only Atom ~~~~~~~~~~~~~~~ .. code:: xml Awesome Compiler urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 1785io25c695 2017-10-07T15:17:08Z some awesome author Using Atom with CodeMeta ~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: xml Awesome Compiler urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 1785io25c695 1785io25c695 origin url other identifier, DOI, ARK Domain description key-word 1 key-word 2 creation date publication date comment article name article id Collaboration/Projet project name id see also Sponsor A Sponsor B Platform/OS dependencies Version active license url spdx .Net Framework 3.0 Python2.3 author1 Inria UPMC author2 Inria UPMC http://code.com language 1 language 2 http://issuetracker.com Using Atom with DublinCore and CodeMeta (multi-schema entry) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: xml Awesome Compiler hal urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a %s hal-01587361 doi:10.5281/zenodo.438684 The assignment problem AffectationRO author [INFO] Computer Science [cs] [INFO.INFO-RO] Computer Science [cs]/Operations Research [cs.RO] SOFTWARE Project in OR: The assignment problemA java implementation for the assignment problem first release description fr 2015-06-01 2017-10-19 en origin url 1.0.0 key word Comment Rfrence interne link Sponsor Platform/OS dependencies Ended license url spdx http://code.com language 1 language 2 Note ---- We aim on harmonizing the metadata from different origins and thus metadata will be translated to the `CodeMeta v.2 `__ vocabulary if possible. diff --git a/docs/spec-api.rst b/docs/spec-api.rst index 07c84649..4a6b3cc2 100644 --- a/docs/spec-api.rst +++ b/docs/spec-api.rst @@ -1,750 +1,112 @@ API Specification ================= This is `Software Heritage `__'s `SWORD 2.0 `__ Server implementation. **S.W.O.R.D** (**S**\ imple **W**\ eb-Service **O**\ ffering **R**\ epository **D**\ eposit) is an interoperability standard for digital file deposit. This implementation will permit interaction between a client (a repository) and a server (SWH repository) to push deposits of software source code archives with associated metadata. *Note:* * In the following document, we will use the ``archive`` or ``software source code archive`` interchangeably. * The supported archive formats are: * zip: common zip archive (no multi-disk zip files). * tar: tar archive without compression or optionally any of the following compression algorithm gzip (.tar.gz, .tgz), bzip2 (.tar.bz2) , or lzma (.tar.lzma) Collection ---------- SWORD defines a ``collection`` concept. In SWH's case, this collection refers to a group of deposits. A ``deposit`` is some form of software source code archive(s) associated with metadata. By default the client's collection will have the client's name. Limitations ----------- * upload limitation of 100Mib * no mediation API overview ------------ API access is over HTTPS. The API is protected through basic authentication. -The API endpoints are rooted at https://deposit.softwareheritage.org/1/. - -Data is sent and received as XML (as specified in the SWORD 2.0 -specification). Endpoints --------- -* ``/1/servicedocument/`` *service document iri* (a.k.a `SD-IRI - <#sd-iri-the-service-document-iri>`__) - - *Goal:* For a client to discover its collection's location - -* ``/1//`` *collection iri* (a.k.a `COL-IRI - <#col-iri-the-collection-iri>`__) - - *Goal:*: create deposit to a collection - -* ``/1///media/`` *update iri* (a.k.a - `EM-IRI <#em-iri-the-atom-edit-media-iri>`__) - - *Goal:*: Add or replace archive(s) to a deposit - -* ``/1///metadata/`` *update iri* (a.k.a `EDIT-IRI - <#edit-iri-the-atom-entry-edit-iri>`__ merged with `SE-IRI - <#se-iri-the-sword-edit-iri>`__) - - *Goal:*: Add or replace metadata (and optionally archive(s) to a deposit - -* ``/1///status/`` *state iri* (a.k.a `STATE-IRI - <#state-iri-the-sword-statement-iri>`__) - - *Goal:*: Display deposit's status in regards to loading - -* ``/1///content/`` *content iri* (a.k.a - `CONT-FILE-IRI <#cont-iri-the-content-iri>`__) - - *Goal:*: Display information on the content's representation in the sword - server - - -Service document request -~~~~~~~~~~~~~~~~~~~~~~~~ - -Endpoint: GET /1/servicedocument/ - -This is the starting endpoint for the client to discover its initial -collection. The answer to this query will describes: - -* the server's abilities -* connected client's collection information - - Also known as: `SD-IRI - The Service Document IRI - <#sd-iri-the-service-document-iri>`__. - -Sample request -^^^^^^^^^^^^^^ - -.. code:: shell - - GET https://deposit.softwareheritage.org/1/servicedocument/ HTTP/1.1 - Host: deposit.softwareheritage.org - -The server returns its abilities with the service document in xml format: - -* protocol sword version v2 -* accepted mime types: application/zip (zip), application/x-tar (tar archive - with any of the following optional compression algorithm gzip, bzip2, or - lzma) -* upload max size accepted. Beyond that point, it's expected the client splits - its tarball into multiple ones -* the collection the client can act upon (swh supports only one software - collection per client) -* mediation is not supported - -The current answer for example for the `HAL archive -`__ is: - -.. code:: xml - - - - - 2.0 - 20971520 - - - The Software Heritage (SWH) archive - - SWH Software Archive - application/zip - application/x-tar - Collection Policy - Software Heritage Archive - false - false - Collect, Preserve, Share - http://purl.org/net/sword/package/SimpleZip - https://deposit.softwareheritage.org/1/hal/ - - - - -Deposit creation/update -~~~~~~~~~~~~~~~~~~~~~~~ +The API endpoints are rooted at https://deposit.softwareheritage.org/1/. -The client can send deposit creation/update through a series of deposit -requests to the following endpoints: +Data is sent and received as XML (as specified in the SWORD 2.0 +specification). -* *collection iri* (COL-IRI) to initialize a deposit -* *update iris* (EM-IRI, EDIT-SE-IRI) to complete/finalize a deposit +.. include:: endpoints/service-document.rst -The deposit creation/update can also happens in one request. +.. include:: endpoints/collection.rst -The deposit request can contain: +.. include:: endpoints/update-media.rst -* an archive holding the software source code (binary upload) -* an envelop with metadata describing information regarding a deposit (atom - entry deposit) -* or both (multipart deposit, exactly one archive and one envelop). +.. include:: endpoints/update-metadata.rst -Request Types -^^^^^^^^^^^^^ +.. include:: endpoints/status.rst -Binary deposit -'''''''''''''' +.. include:: endpoints/content.rst -The client can deposit a binary archive, supplying the following -headers: -* Content-Type (text): accepted mimetype -* Content-Length (int): tarball size -* Content-MD5 (text): md5 checksum hex encoded of the tarball -* Content-Disposition (text): attachment; filename=[filename] ; the filename - parameter must be text (ascii) -* Packaging (IRI): http://purl.org/net/sword/package/SimpleZip -* In-Progress (bool): true to specify it's not the last request, false to - specify it's a final request and the server can go on with processing the - request's information (if not provided, this is considered false, so final). - -This is a single zip archive deposit. Almost no metadata is associated -with the archive except for the unique external identifier. - -*Note:* This kind of deposit should be ``partial`` (In-Progress: True) -as almost no metadata can be associated with the uploaded archive. - -API endpoints concerned -''''''''''''''''''''''' - -POST /1// Create a first deposit with one archive PUT /1///media/ -Replace existing archives POST /1///media/ Add new archive - -Sample request -'''''''''''''' - -.. code:: shell - - curl -i -u hal: \ - --data-binary @swh/deposit.zip \ - -H 'In-Progress: false' -H 'Content-MD5: 0faa1ecbf9224b9bf48a7c691b8c2b6f' \ - -H 'Content-Disposition: attachment; filename=[deposit.zip]' \ - -H 'Slug: some-external-id' \ - -H 'Packaging: http://purl.org/net/sword/package/SimpleZIP' \ - -H 'Content-type: application/zip' \ - -XPOST https://deposit.softwareheritage.org/1/hal/ - -Atom entry deposit -^^^^^^^^^^^^^^^^^^ - -The client can deposit an xml body holding metadata information on the -deposit. - -*Note:* This kind of deposit is mostly expected to be ``partial`` -(In-Progress: True) since no archive will be associated to those -metadata. - -API endpoints concerned -''''''''''''''''''''''' - -POST /1// Create a first atom deposit entry PUT /1///metadata/ Replace -existing metadata POST /1///metadata/ Add new metadata to deposit - -Sample request -'''''''''''''' - -Sample query: - -.. code:: shell - - curl -i -u hal: --data-binary @atom-entry.xml \ - -H 'In-Progress: false' \ - -H 'Slug: some-external-id' \ - -H 'Content-Type: application/atom+xml;type=entry' \ - -XPOST https://deposit.softwareheritage.org/1/hal/ - - HTTP/1.0 201 Created - Date: Tue, 26 Sep 2017 10:32:35 GMT - Server: WSGIServer/0.2 CPython/3.5.3 - Vary: Accept, Cookie - Allow: GET, POST, PUT, DELETE, HEAD, OPTIONS - Location: /1/hal/10/metadata/ - X-Frame-Options: SAMEORIGIN - Content-Type: application/xml - - - 10 - Sept. 26, 2017, 10:32 a.m. - None - deposited - - - - - - - - - - - http://purl.org/net/sword/package/SimpleZip - - -Sample body: - -.. code:: xml - - - Title - urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a - 2005-10-07T17:17:08Z - Contributor - The abstract - - - The abstract - Access Rights - Alternative Title - Date Available - Bibliographic Citation # noqa - Contributor - Description - Has Part - Has Version - Identifier - Is Part Of - Publisher - References - Rights Holder - Source - Title - Type - - - -One request deposit / Multipart deposit -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The one request deposit is a single request containing both the metadata -(as atom entry attachment) and the archive (as payload attachment). -Thus, it is a multipart deposit. - -Client provides: - -* Content-Disposition (text): header of type 'attachment' on the Entry Part - with a name parameter set to 'atom' -* Content-Disposition (text): header of type 'attachment' on the Media Part - with a name parameter set to payload and a filename parameter (the filename - will be expressed in ASCII). -* Content-MD5 (text): md5 checksum hex encoded of the tarball -* Packaging (text): http://purl.org/net/sword/package/SimpleZip (packaging - format used on the Media Part) -* In-Progress (bool): true\|false; true means ``partial`` upload and we can - expect other requests in the future, false means the deposit is done. -* add metadata formats or foreign markup to the atom:entry element - -API endpoints concerned -''''''''''''''''''''''' - -POST /1// Create a full deposit (metadata + archive) PUT /1///metadata/ -Replace existing metadata and archive POST /1///metadata/ Add new -metadata and archive to deposit - -Sample request -'''''''''''''' - -Sample query: - -.. code:: shell - - curl -i -u hal: \ - -F "file=@../deposit.json;type=application/zip;filename=payload" \ - -F "atom=@../atom-entry.xml;type=application/atom+xml;charset=UTF-8" \ - -H 'In-Progress: false' \ - -H 'Slug: some-external-id' \ - -XPOST https://deposit.softwareheritage.org/1/hal/ - - HTTP/1.0 201 Created - Date: Tue, 26 Sep 2017 10:11:55 GMT - Server: WSGIServer/0.2 CPython/3.5.3 - Vary: Accept, Cookie - Allow: GET, POST, PUT, DELETE, HEAD, OPTIONS - Location: /1/hal/9/metadata/ - X-Frame-Options: SAMEORIGIN - Content-Type: application/xml - - - 9 - Sept. 26, 2017, 10:11 a.m. - payload - deposited - - - - - - - - - - - http://purl.org/net/sword/package/SimpleZip - - -Sample content: - -.. code:: xml - - POST deposit HTTP/1.1 - Host: deposit.softwareheritage.org - Content-Length: [content length] - Content-Type: multipart/related; - boundary="===============1605871705=="; - type="application/atom+xml" - In-Progress: false - MIME-Version: 1.0 - - Media Post - --===============1605871705== - Content-Type: application/atom+xml; charset="utf-8" - Content-Disposition: attachment; name="atom" - MIME-Version: 1.0 - - - - Title - hal-or-other-archive-id - 2005-10-07T17:17:08Z - Contributor - - - The abstract - Access Rights - Alternative Title - Date Available - Bibliographic Citation # noqa - Contributor - Description - Has Part - Has Version - Identifier - Is Part Of - Publisher - References - Rights Holder - Source - Title - Type - - --===============1605871705== - Content-Type: application/zip - Content-Disposition: attachment; name=payload; filename=[filename] - Packaging: http://purl.org/net/sword/package/SimpleZip - Content-MD5: [md5-digest] - MIME-Version: 1.0 - - [...binary package data...] - --===============1605871705==-- - -Deposit Creation - server point of view ---------------------------------------- - -The server receives the request(s) and does minimal checking on the -input prior to any saving operations. - -Validation of the header and body request -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Any kind of errors can happen, here is the list depending on the -situation: +Possible errors: +---------------- * common errors: * 401 (unauthenticated) if a client does not provide credential or provide wrong ones * 403 (forbidden) if a client tries access to a collection it does not own * 404 (not found) if a client tries access to an unknown collection * 404 (not found) if a client tries access to an unknown deposit * 415 (unsupported media type) if a wrong media type is provided to the endpoint * archive/binary deposit: * 403 (forbidden) if the length of the archive exceeds the max size configured * 412 (precondition failed) if the length or hash provided mismatch the reality of the archive. * 415 (unsupported media type) if a wrong media type is provided * multipart deposit: * 412 (precondition failed) if the md5 hash provided mismatch the reality of the archive * 415 (unsupported media type) if a wrong media type is provided * Atom entry deposit: * 400 (bad request) if the request's body is empty (for creation only) -[3\|5\|6.2] Server uploads the content in a temporary location -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Using an objstorage, the server stores the archive in a temporary -location. It's deemed temporary the time the deposit is completed -(status becomes ``deposited``) and the loading finishes. - -The server also persists requests' information in a database. - -[4] Servers answers the client -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -If everything went well, the server answers either with a 200, 201 or -204 response (depending on the actual endpoint) - -A ``http 200`` response is returned for GET endpoints. - -A ``http 201 Created`` response is returned for POST endpoints. The body -holds the deposit receipt. The headers holds the EDIT-IRI in the -Location header of the response. - -A ``http 204 No Content`` response is returned for PUT, DELETE -endpoints. - -If something went wrong, the server answers with one of the `error -status code and associated message mentioned <#possible%20errors>`__). - -[5] Deposit Update -~~~~~~~~~~~~~~~~~~ - -The client previously deposited a ``partial`` document (through an -archive, metadata, or both). The client wants to update information for -that previous deposit (possibly in multiple steps as well). - -The important thing to note here is that, as long as the deposit is in -status ``partial``, the loading did not start. Thus, the client can -update information (replace or add new archive, new metadata, even -delete) for that same ``partial`` deposit. - -When the deposit status changes to ``deposited``, the client can no -longer change the deposit's information (a 403 will be returned in that -case). - -Then aggregation of all those deposit's information will later be used -for the actual loading. - -Providing the collection name, and the identifier of the previous -deposit id received from the deposit receipt, the client executes a POST -or PUT request on the *update iris*. - -After validation of the body request, the server: - -- uploads such content in a temporary location - -- answers the client an ``http 204 (No content)``. In the Location header of - the response lies an iri to permit further update. - -- Asynchronously, the server will inject the archive uploaded and the - associated metadata. An operation status endpoint *state iri* permits the - client to query the loading operation status. - -Possible update endpoints -^^^^^^^^^^^^^^^^^^^^^^^^^ - -PUT /1///media/ Replace existing archives for the deposit POST -/1///media/ Add new archives to the deposit PUT /1///metadata/ Replace -existing metadata (and possible archives) POST /1///metadata/ Add new -metadata - -[6] Deposit Removal -~~~~~~~~~~~~~~~~~~~ - -As long as the deposit's status remains ``partial``, it's possible to -remove the deposit entirely or remove only the deposit's archive(s). - -If the deposit has been removed, further querying that deposit will -return a *404* response. - -If the deposit's archive(s) has been removed, we can still ensue other -query to update that deposit. - -Operation Status -~~~~~~~~~~~~~~~~ - -Providing a collection name and a deposit id, the client asks the -operation status of a prior deposit. - -URL: GET /1///status/ - -This returns: - -* *201* response with the actual status -* *404* if the deposit does not exist (or no longer does) - - Possible errors ----------------- - -sword:ErrorContent -~~~~~~~~~~~~~~~~~~ - -IRI: ``http://purl.org/net/sword/error/ErrorContent`` - -The supplied format is not the same as that identified in the Packaging -header and/or that supported by the server Associated HTTP - -Associated HTTP status: *415 (Unsupported Media Type)* - -sword:ErrorChecksumMismatch -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -IRI: ``http://purl.org/net/sword/error/ErrorChecksumMismatch`` - -Checksum sent does not match the calculated checksum. - -Associated HTTP status: *412 Precondition Failed* - -sword:ErrorBadRequest -~~~~~~~~~~~~~~~~~~~~~ - -IRI: ``http://purl.org/net/sword/error/ErrorBadRequest`` - -Some parameters sent with the POST/PUT were not understood. - -Associated HTTP status: *400 Bad Request* - -sword:MediationNotAllowed -~~~~~~~~~~~~~~~~~~~~~~~~~ - -IRI: ``http://purl.org/net/sword/error/MediationNotAllowed`` - -Used where a client has attempted a mediated deposit, but this is not -supported by the server. - -Associated HTTP status: *412 Precondition Failed* - -sword:MethodNotAllowed -~~~~~~~~~~~~~~~~~~~~~~ - -IRI: ``http://purl.org/net/sword/error/MethodNotAllowed`` - -Used when the client has attempted one of the HTTP update verbs (POST, -PUT, DELETE) but the server has decided not to respond to such requests -on the specified resource at that time. - -Associated HTTP Status: *405 Method Not Allowed* - -sword:MaxUploadSizeExceeded -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -IRI: ``http://purl.org/net/sword/error/MaxUploadSizeExceeded`` - -Used when the client has attempted to supply to the server a file which -exceeds the server's maximum upload size limit - -Associated HTTP Status: *413 (Request Entity Too Large)* - -sword:Unauthorized -~~~~~~~~~~~~~~~~~~ - -IRI: ``http://purl.org/net/sword/error/ErrorUnauthorized`` - -The access to the api is through authentication. - -Associated HTTP status: *401* - -sword:Forbidden -~~~~~~~~~~~~~~~ - -IRI: ``http://purl.org/net/sword/error/ErrorForbidden`` - -The action is forbidden (access to another collection for example). - -Associated HTTP status: *403* - -Nomenclature ------------- - -SWORD uses IRI notion, Internationalized Resource Identifier. In this -chapter, we will describe SWH's IRIs. - -SD-IRI - The Service Document IRI -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The Service Document IRI. This is the IRI from which the client can -discover its collection IRI. - -HTTP verbs supported: *GET* - -Col-IRI - The Collection IRI -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The software collection associated to one user. - -The SWORD Collection IRI is the IRI to which the initial deposit will -take place, and which is listed in the Service Document. - -Following our previous example, this is: -https://deposit.softwareheritage.org/1/hal/. - -HTTP verbs supported: *POST* - -Cont-IRI - The Content IRI -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -This is the endpoint which permits the client to retrieve -representations of the object as it resides in the SWORD server. - -This will display information about the content and its associated -metadata. - -HTTP verbs supported: *GET* - -*Note:* We also refer to it as *Cont-File-IRI*. - -EM-IRI - The Atom Edit Media IRI -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -This is the endpoint to upload other related archives for the same -deposit. - -It is used to change a ``partial`` deposit in regards of archives, in -particular: - -* replace existing archives with new ones -* add new archives -* delete archives from a deposit - -Example use case: A first archive to put exceeds the deposit's limit -size. The client can thus split the archives in multiple ones. Post a -first ``partial`` archive to the Col-IRI (with In-Progress: - -True). Then, in order to complete the deposit, POST the other remaining -archives to the EM-IRI (the last one with the In-Progress header to -False). - -HTTP verbs supported: *POST*, *PUT*, *DELETE* - -Edit-IRI - The Atom Entry Edit IRI -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -This is the endpoint to change a ``partial`` deposit in regards of -metadata. In particular: - -* replace existing metadata (and archives) with new ones -* add new metadata (and archives) -* delete deposit - -HTTP verbs supported: *POST*, *PUT*, *DELETE* - -*Note:* We also refer to it as *Edit-SE-IRI*. - -SE-IRI - The SWORD Edit IRI -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The sword specification permits to merge this with EDIT-IRI, so we did. - -*Note:* We also refer to it as *Edit-SE-IRI*. - -State-IRI - The SWORD Statement IRI -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -This is the IRI which can be used to retrieve a description of the -object from the sword server, including the structure of the object and -its state. This will be used as the operation status endpoint. -HTTP verbs supported: *GET* Sources ------- * `SWORD v2 specification `__ * `arxiv documentation `__ * `Dataverse example `__ * `SWORD used on HAL `__ * `xml examples for CCSD `__ diff --git a/swh/deposit/client/cli.py b/swh/deposit/client/cli.py index ea7512b0..ee2ad582 100755 --- a/swh/deposit/client/cli.py +++ b/swh/deposit/client/cli.py @@ -1,296 +1,296 @@ # Copyright (C) 2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """Script to demonstrate software deposit scenario to https://deposit.sofwareheritage.org. Use: python3 -m swh.deposit.client.cli --help Documentation: https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html # noqa """ import os import click import logging import uuid from . import PublicApiDepositClient class InputError(ValueError): """Input script error """ pass def generate_slug(prefix='swh-sample'): """Generate a slug (sample purposes). """ return '%s-%s' % (prefix, uuid.uuid4()) def parse_cli_options(username, password, archive, metadata, archive_deposit, metadata_deposit, collection, slug, partial, deposit_id, replace, url, status): """Parse the cli options and make sure the combination is acceptable*. If not, an InputError exception is raised explaining the issue. By acceptable, we mean: - A multipart deposit (create or update) needs both an existing software archive and an existing metadata file - A binary deposit (create/update) needs an existing software archive - A metadata deposit (create/update) needs an existing metadata file - A deposit update needs a deposit_id to be provided This won't prevent all failure cases though. The remaining errors are already dealt with the underlying api client. Raises: InputError explaining the issue Returns: dict with the following keys: 'archive': the software archive to deposit 'username': username 'password': associated password 'metadata': the metadata file to deposit 'collection': the username's associated client 'slug': the slug or external id identifying the deposit to make 'partial': if the deposit is partial or not 'client': instantiated class 'url': deposit's server main entry point 'deposit_type': deposit's type (binary, multipart, metadata) 'deposit_id': optional deposit identifier """ if status and not deposit_id: raise InputError("Deposit id must be provided for status check") if status and deposit_id: # status is higher priority over deposit archive_deposit = False metadata_deposit = False archive = None metadata = None if archive_deposit and metadata_deposit: # too many flags use, remove redundant ones (-> multipart deposit) archive_deposit = False metadata_deposit = False if archive and not os.path.exists(archive): raise InputError('Software Archive %s must exist!' % archive) if archive and not metadata: metadata = '%s.metadata.xml' % archive if metadata_deposit: archive = None if archive_deposit: metadata = None if metadata_deposit and not metadata: raise InputError( "Metadata deposit filepath must be provided for metadata deposit") if metadata and not os.path.exists(metadata): raise InputError('Software Archive metadata %s must exist!' % metadata) if not status and not archive and not metadata: raise InputError( 'Please provide an actionable command. See --help for more ' 'information.') if replace and not deposit_id: raise InputError( 'To update an existing deposit, you must provide its id') client = PublicApiDepositClient({ 'url': url, 'auth': { 'username': username, 'password': password }, }) if not collection: # retrieve user's collection sd_content = client.service_document() if 'error' in sd_content: raise InputError('Service document retrieval: %s' % ( sd_content['error'], )) collection = sd_content['collection'] if not slug: # generate slug slug = generate_slug() return { 'archive': archive, 'username': username, 'password': password, 'metadata': metadata, 'collection': collection, 'slug': slug, 'partial': partial, 'client': client, 'url': url, 'deposit_id': deposit_id, 'replace': replace, } def deposit_status(config, dry_run, log): log.debug('Status deposit') client = config['client'] collection = config['collection'] deposit_id = config['deposit_id'] if not dry_run: r = client.deposit_status(collection, deposit_id, log) return r return {} def deposit_create(config, dry_run, log): """Delegate the actual deposit to the deposit client. """ log.debug('Create deposit') client = config['client'] collection = config['collection'] archive_path = config['archive'] metadata_path = config['metadata'] slug = config['slug'] in_progress = config['partial'] if not dry_run: r = client.deposit_create(collection, slug, archive_path, metadata_path, in_progress, log) return r return {} def deposit_update(config, dry_run, log): """Delegate the actual deposit to the deposit client. """ log.debug('Update deposit') client = config['client'] collection = config['collection'] deposit_id = config['deposit_id'] archive_path = config['archive'] metadata_path = config['metadata'] slug = config['slug'] in_progress = config['partial'] replace = config['replace'] if not dry_run: r = client.deposit_update(collection, deposit_id, slug, archive_path, metadata_path, in_progress, replace, log) return r return {} @click.command() @click.option('--username', required=1, help="(Mandatory) User's name") @click.option('--password', required=1, help="(Mandatory) User's associated password") @click.option('--archive', help='(Optional) Software archive to deposit') @click.option('--metadata', help="(Optional) Path to xml metadata file. If not provided, this will use a file named .metadata.xml") # noqa @click.option('--archive-deposit/--no-archive-deposit', default=False, help='(Optional) Software archive only deposit') @click.option('--metadata-deposit/--no-metadata-deposit', default=False, help='(Optional) Metadata only deposit') @click.option('--collection', help="(Optional) User's collection. If not provided, this will be fetched.") # noqa @click.option('--slug', help="""(Optional) External system information identifier. If not provided, it will be generated""") # noqa @click.option('--partial/--no-partial', default=False, help='(Optional) The deposit will be partial, other deposits will have to take place to finalize it.') # noqa @click.option('--deposit-id', default=None, help='(Optional) Update an existing partial deposit with its identifier') # noqa @click.option('--replace/--no-replace', default=False, help='(Optional) Update by replacing existing metadata to a deposit') # noqa -@click.option('--url', default='http://deposit.softwareheritage.org/1', +@click.option('--url', default='https://deposit.softwareheritage.org/1', help="(Optional) Deposit server api endpoint. By default, https://deposit.softwareheritage.org/1") # noqa @click.option('--status/--no-status', default=False, help="(Optional) Deposit's status") @click.option('--dry-run/--no-dry-run', default=False, help='(Optional) No-op deposit') @click.option('--verbose/--no-verbose', default=False, help='Verbose mode') def main(username, password, archive=None, metadata=None, archive_deposit=False, metadata_deposit=False, collection=None, slug=None, partial=False, deposit_id=None, replace=False, status=False, url='https://deposit.softwareheritage.org/1', dry_run=True, verbose=False): """Software Heritage Deposit client - Create (or update partial) deposit through the command line. More documentation can be found at https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html. """ log = logging.getLogger('swh-deposit') log.addHandler(logging.StreamHandler()) _loglevel = logging.DEBUG if verbose else logging.INFO log.setLevel(_loglevel) if dry_run: log.info("**DRY RUN**") config = {} try: log.debug('Parsing cli options') config = parse_cli_options( username, password, archive, metadata, archive_deposit, metadata_deposit, collection, slug, partial, deposit_id, replace, url, status) except InputError as e: msg = 'Problem during parsing options: %s' % e r = { 'error': msg, } log.info(r) return 1 if verbose: log.info("Parsed configuration: %s" % ( config, )) deposit_id = config['deposit_id'] if status and deposit_id: r = deposit_status(config, dry_run, log) elif not status and deposit_id: r = deposit_update(config, dry_run, log) elif not status and not deposit_id: r = deposit_create(config, dry_run, log) log.info(r) if __name__ == '__main__': main()