diff --git a/PKG-INFO b/PKG-INFO index 1346caef..16421bd2 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,10 +1,10 @@ Metadata-Version: 1.0 Name: swh.deposit -Version: 0.0.47 +Version: 0.0.48 Summary: Software Heritage Deposit Server Home-page: https://forge.softwareheritage.org/source/swh-deposit/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Description: UNKNOWN Platform: UNKNOWN diff --git a/docs/endpoints/collection.rst b/docs/endpoints/collection.rst new file mode 100644 index 00000000..7debb7f2 --- /dev/null +++ b/docs/endpoints/collection.rst @@ -0,0 +1,73 @@ +Create deposit +^^^^^^^^^^^^^^^ + +.. http:post:: /1// + + Create deposit in a collection. + + The client sends a deposit request to a specific collection with: + + * an archive holding the software source code (binary upload) + * an envelop with metadata describing information regarding a deposit (atom + entry deposit) + + Also known as: COL-IRI + + :param text : the client's credentials + :param text Content-Type: accepted mimetype + :param int Content-Length: tarball size + :param text Content-MD5: md5 checksum hex encoded of the tarball + :param text Content-Disposition: attachment; filename=[filename]; the filename + parameter must be text (ascii) + :param text Content-Disposition: for the metadata file set name parameter + to 'atom'. + :param bool In-progress: true if not final; false when final request. + :statuscode 201: success for deposit on POST + :statuscode 401: Unauthorized + :statuscode 404: access to an unknown collection + :statuscode 415: unsupported media type + +Sample request +~~~~~~~~~~~~~~~ +.. code:: shell + + curl -i -u hal: \ + -F "file=@../deposit.json;type=application/zip;filename=payload" \ + -F "atom=@../atom-entry.xml;type=application/atom+xml;charset=UTF-8" \ + -H 'In-Progress: false' \ + -H 'Slug: some-external-id' \ + -XPOST https://deposit.softwareheritage.org/1/hal/ + +Sample response +~~~~~~~~~~~~~~~ + +.. code:: shell + + HTTP/1.0 201 Created + Date: Tue, 26 Sep 2017 10:32:35 GMT + Server: WSGIServer/0.2 CPython/3.5.3 + Vary: Accept, Cookie + Allow: GET, POST, PUT, DELETE, HEAD, OPTIONS + Location: /1/hal/10/metadata/ + X-Frame-Options: SAMEORIGIN + Content-Type: application/xml + + + 10 + Sept. 26, 2017, 10:32 a.m. + None + deposited + + + + + + + + + + + http://purl.org/net/sword/package/SimpleZip + diff --git a/docs/endpoints/content.rst b/docs/endpoints/content.rst new file mode 100644 index 00000000..ef89d1e9 --- /dev/null +++ b/docs/endpoints/content.rst @@ -0,0 +1,14 @@ +Display content +^^^^^^^^^^^^^^^^ + +.. http:get:: /1///content/ + + Display information on the content's representation in the sword + server. + + + Also known as: CONT-FILE-IRI + + :param text : the client's credentials + :statuscode 200: no error + :statuscode 401: Unauthorized diff --git a/docs/endpoints/service-document.rst b/docs/endpoints/service-document.rst new file mode 100644 index 00000000..97a7af19 --- /dev/null +++ b/docs/endpoints/service-document.rst @@ -0,0 +1,48 @@ +Service document +^^^^^^^^^^^^^^^^^ + +.. http:get:: /1/servicedocument/ + + This is the starting endpoint for the client to discover its initial + collection. The answer to this query will describes: + + * the server's abilities + * connected client's collection information + + Also known as: SD-IRI - The Service Document IRI + + :param text : the client's credentials + :statuscode 200: no error + :statuscode 401: Unauthorized + + + +Sample response +~~~~~~~~~~~~~~~ + .. code:: xml + + + + + 2.0 + 20971520 + + + The Software Heritage (SWH) archive + + SWH Software Archive + application/zip + application/x-tar + Collection Policy + Software Heritage Archive + false + false + Collect, Preserve, Share + http://purl.org/net/sword/package/SimpleZip + https://deposit.softwareheritage.org/1/hal/ + + + diff --git a/docs/endpoints/status.rst b/docs/endpoints/status.rst new file mode 100644 index 00000000..c6e4f664 --- /dev/null +++ b/docs/endpoints/status.rst @@ -0,0 +1,29 @@ +Retrieve status +^^^^^^^^^^^^^^^^ + +.. http:get:: /1/// + + Display deposit's status in regards to loading. + + + The different statuses: + + - **partial**: multipart deposit is still ongoing + - **deposited**: deposit completed + - **rejected**: deposit failed the checks + - **verified**: content and metadata verified + - **loading**: loading in-progress + - **done**: loading completed successfully + - **failed**: the deposit loading has failed + + Also known as STATE-IRI + + :param text : the client's credentials + :statuscode 201: with the deposit's status + :statuscode 401: Unauthorized + :statuscode 404: access to an unknown deposit + + + +Sample response +~~~~~~~~~~~~~~~ diff --git a/docs/endpoints/update-media.rst b/docs/endpoints/update-media.rst new file mode 100644 index 00000000..de32634c --- /dev/null +++ b/docs/endpoints/update-media.rst @@ -0,0 +1,27 @@ +Update content +^^^^^^^^^^^^^^^ + +.. http:post:: /1///media/ + + Add archive(s) to a deposit. Only possible if the deposit's status + is partial. + +.. http:put:: /1///media/ + + Replace all content by submitting a new archive. Only possible if + the deposit's status is partial. + + + Also known as: *update iri* (EM-IRI) + + :param text : the client's credentials + :param text Content-Type: accepted mimetype + :param int Content-Length: tarball size + :param text Content-MD5: md5 checksum hex encoded of the tarball + :param text Content-Disposition: attachment; filename=[filename] ; the filename + parameter must be text (ascii) + :param bool In-progress: true if not final; false when final request. + :statuscode 204: success without payload on PUT + :statuscode 201: success for deposit on POST + :statuscode 401: Unauthorized + :statuscode 415: unsupported media type diff --git a/docs/endpoints/update-metadata.rst b/docs/endpoints/update-metadata.rst new file mode 100644 index 00000000..661d7516 --- /dev/null +++ b/docs/endpoints/update-metadata.rst @@ -0,0 +1,24 @@ +Update metadata +^^^^^^^^^^^^^^^^ + +.. http:post:: /1///metadata/ + + Add metadata to a deposit. Only possible if the deposit's status + is partial. + +.. http:put:: /1///metadata/ + + Replace all metadata by submitting a new metadata file. Only possible if + the deposit's status is partial. + + + Also known as: *update iri* (SE-IRI) + + :param text : the client's credentials + :param text Content-Disposition: attachment; filename=[filename] ; the filename + parameter must be text (ascii), with a name parameter set to 'atom'. + :param bool In-progress: true if not final; false when final request. + :statuscode 204: success without payload on PUT + :statuscode 201: success for deposit on POST + :statuscode 401: Unauthorized + :statuscode 415: unsupported media type diff --git a/docs/getting-started.rst b/docs/getting-started.rst index e6c5ecb5..d6288ac0 100644 --- a/docs/getting-started.rst +++ b/docs/getting-started.rst @@ -1,291 +1,305 @@ Getting Started =============== This is a guide for how to prepare and push a software deposit with the swh-deposit commands. -The api is rooted at https://deposit.softwareheritage.org. +The api is rooted at https://deposit.softwareheritage.org/1. For more details, see the `main documentation <./index.html>`__. Requirements ------------ You need to be referenced on SWH's client list to have: * credentials (needed for the basic authentication step) - in this document we reference ```` as the client's name and - ```` as its associated authentication password. + ```` as its associated authentication password. - * an associated collection +* an associated collection `Contact us for more information. `__ Prepare a deposit ----------------- * compress the files in a supported archive format: - zip: common zip archive (no multi-disk zip files). - tar: tar archive without compression or optionally any of the following compression algorithm gzip (.tar.gz, .tgz), bzip2 (.tar.bz2) , or lzma (.tar.lzma) * prepare a metadata file (`more details <./metadata.html>`__.): - specify metadata schema/vocabulary (CodeMeta is recommended) - specify *MUST* metadata (url, authors, software name and the external\_identifier) - add all available information under the compatible metadata term An example of an atom entry file with CodeMeta terms: .. code:: xml - - - Je suis GPL - 12345 - forge.softwareheritage.org/source/jesuisgpl/ - Yes, this is another implementation of - "Hello, world!” when you run it. - - GPL - https://www.gnu.org/licenses/gpl.html - - - Reuben Thomas - Maintainer - - - Sami Kerola - Maintainer - - + + + Je suis GPL + swh + je-suis-gpl + https://forge.softwareheritage.org/source/jesuisgpl/ + 2018-01-05 + Je suis GPL is a modified version of GNU Hello whose + sole purpose is to showcase the usage of + Software Heritage for license compliance purposes. + 0.1 + GNU/Linux + stable + C + + + GNU General Public License v3.0 or later + https://spdx.org/licenses/GPL-3.0-or-later.html + + + Stefano Zacchiroli + Maintainer + + + Push deposit ------------ You can push a deposit with: -* a one single deposit (archive + metadata): +* a single deposit (archive + metadata): The user posts in one query a software source code archive and associated metadata. The deposit is directly marked with status ``deposited``. + * a multisteps deposit: 1. Create an incomplete deposit (marked with status ``partial``) 2. Add data to a deposit (in multiple requests if needed) 3. Finalize deposit (the status becomes ``deposited``) Single deposit ^^^^^^^^^^^^^^ Once the files are ready for deposit, we want to do the actual deposit in one shot, sending exactly one POST query: * 1 archive (content-type ``application/zip`` or ``application/x-tar``) * 1 metadata file in atom xml format (``content-type: application/atom+xml;type=entry``) For this, we need to provide the: * arguments: ``--username 'name' --password 'pass'`` as credentials * archive's path (example: ``--archive path/to/archive-name.tgz``) : * (optionally) metadata file's path ``--metadata path/to/file.metadata.xml``. If not provided, the archive's filename will be used to determine the metadata file, e.g: ``path/to/archive-name.tgz.metadata.xml`` * (optionally) ``--slug 'your-id'`` argument, a reference to a unique identifier the client uses for the software object. You can do this with the following command: minimal deposit .. code:: shell $ swh-deposit ---username name --password secret \ --archive je-suis-gpl.tgz with client's external identifier (``slug``) .. code:: shell $ swh-deposit --username name --password secret \ --archive je-suis-gpl.tgz \ - --slug 123456 + --slug je-suis-gpl to a specific client's collection .. code:: shell $ swh-deposit --username name --password secret \ --archive je-suis-gpl.tgz \ --collection 'second-collection' You just posted a deposit to your collection on Software Heritage If everything went well, the successful response will contain the elements below: .. code:: shell { 'deposit_status': 'deposited', 'deposit_id': '7', 'deposit_date': 'Jan. 29, 2018, 12:29 p.m.' } Note: As the deposit is in ``deposited`` status, you can no longer update the deposit after this query. It will be answered with a 403 forbidden answer. If something went wrong, an equivalent response will be given with the `error` and `detail` keys explaining the issue, e.g.: .. code:: shell { 'error': 'Unknown collection name xyz', 'detail': None, 'deposit_status': None, 'deposit_status_detail': None, 'deposit_swh_id': None, 'status': 404 } multisteps deposit ^^^^^^^^^^^^^^^^^^^^^^^^^ The steps to create a multisteps deposit: 1. Create an incomplete deposit ~~~~~~~~~~~~~~~~~~~ First use the ``--partial`` argument to declare there is more to come .. code:: shell - $ swh-deposit --username name --password secret --partial \ - --archive foo.tar.gz + $ swh-deposit --username name --password secret \ + --archive foo.tar.gz \ + --partial 2. Add content or metadata to the deposit ~~~~~~~~~~~~~~~~~~~ Continue the deposit by using the ``--deposit-id`` argument given as a response for the first step. You can continue adding content or metadata while you use the ``--partial`` argument. .. code:: shell - $ swh-deposit --username name --password secret --partial \ + $ swh-deposit --username name --password secret \ --archive add-foo.tar.gz \ - --deposit-id 42 + --deposit-id 42 \ + --partial In case you want to add only one new archive without metadata: .. code:: shell - $ swh-deposit --username name --password secret --partial \ + $ swh-deposit --username name --password secret \ --archive add-foo.tar.gz \ - --archive-deposit - --deposit-id 42 + --archive-deposit \ + --deposit-id 42 \ + --partial \ If you want to add only metadata, use: .. code:: shell - $ swh-deposit --username name --password secret --partial \ + $ swh-deposit --username name --password secret \ --metadata add-foo.tar.gz.metadata.xml \ - --metadata-deposit - --deposit-id 42 + --metadata-deposit \ + --deposit-id 42 \ + --partial 3. Finalize deposit ~~~~~~~~~~~~~~~~~~~ On your last addition, by not declaring it as ``--partial``, the deposit will be considered as completed and its status will be changed to ``deposited``. Update deposit ---------------- * replace deposit: - only possible if the deposit status is ``partial`` and ``--deposit-id `` is provided + - by using the ``--replace`` flag + - ``--metadata-deposit`` replaces associated existing metadata - ``--archive-deposit`` replaces associated archive(s) - by default, with no flag or both, you'll replace associated metadata and archive(s) .. code:: shell - $ swh-deposit --username name --password secret --replace\ + $ swh-deposit --username name --password secret \ --deposit-id 11 \ - --archive updated-je-suis-gpl.tar.gz + --archive updated-je-suis-gpl.tgz \ + --replace * update a loaded deposit with a new version: - by using the external-id with the ``--slug`` argument, you will link the new deposit with its parent deposit .. code:: shell - $ swh-deposit --username name --password secret --slug '123456' \ - --archive je-suis-gpl-v2.tgz + $ swh-deposit --username name --password secret \ + --archive je-suis-gpl-v2.tgz \ + --slug 'je-suis-gpl' \ Check the deposit's status -------------------------- You can check the status of the deposit by using the ``--deposit-id`` argument: .. code:: shell $ swh-deposit --username name --password secret --deposit-id '11' --status .. code:: json { 'deposit_id': '11', 'deposit_status': 'deposited', 'deposit_swh_id': None, 'deposit_status_detail': 'Deposit is ready for additional checks \ (tarball ok, metadata, etc...)' } The different statuses: -- *partial* : multipart deposit is still ongoing -- *deposited*: deposit completed -- *rejected*: deposit failed the checks -- *verified*: content and metadata verified -- *loading*: loading in-progress -- *done*: loading completed successfully -- *failed*: the deposit loading has failed +- **partial**: multipart deposit is still ongoing +- **deposited**: deposit completed +- **rejected**: deposit failed the checks +- **verified**: content and metadata verified +- **loading**: loading in-progress +- **done**: loading completed successfully +- **failed**: the deposit loading has failed When the deposit has been loaded into the archive, the status will be marked ``done``. In the response, will also be available the . For example: .. code:: json { 'deposit_id': '11', 'deposit_status': 'done', 'deposit_swh_id': 'swh:1:rev:34898aa991c90b447c27d2ac1fc09f5c8f12783e', 'deposit_status_detail': 'The deposit has been successfully \ loaded into the Software Heritage archive' } diff --git a/docs/index.rst b/docs/index.rst index 98965b86..23e304b5 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,22 +1,21 @@ .. _swh-deposit: Software Heritage Deposit ========================= .. toctree:: :maxdepth: 1 :caption: Contents: getting-started.rst spec-api.rst metadata.rst - spec-loading.rst dev-info.rst sys-info.rst Indices and tables ================== * :ref:`genindex` * :ref:`modindex` * :ref:`search` diff --git a/docs/metadata.rst b/docs/metadata.rst index 543128a3..52d9cedc 100644 --- a/docs/metadata.rst +++ b/docs/metadata.rst @@ -1,187 +1,187 @@ Deposit metadata ================ When making a software deposit into the SWH archive, one can add information describing the software artifact and the software project. Metadata requirements --------------------- - **the schema/vocabulary** used *MUST* be specified with a persistent url (DublinCore, DOAP, CodeMeta, etc.) .. code:: xml or or - **the url** representing the location of the source *MUST* be provided under the url tag. The url will be used for creating an origin object in the archive. .. code:: xml www.url-example.com or www.url-example.com or www.url-example.com - **the external\_identifier** *MUST* be provided as an identifier - **the name** of the software deposit *MUST* be provided [atom:title, codemeta:name, dcterms:title] - **the authors** of the software deposit *MUST* be provided - **the external\_identifier** *SHOULD* match the Slug external-identifier in the header - **the description** of the software deposit *SHOULD* be provided - [codemeta:description] + [codemeta:description]: short or long description of the software -- short or long description of the software - **the license/s** of the software +- **the license/s** of the software deposit *SHOULD* be provided [codemeta:license] - other metadata *MAY* be added with terms defined by the schema in use. Examples -------- Using only Atom ~~~~~~~~~~~~~~~ .. code:: xml Awesome Compiler urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 1785io25c695 2017-10-07T15:17:08Z some awesome author Using Atom with CodeMeta ~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: xml Awesome Compiler urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 1785io25c695 1785io25c695 origin url other identifier, DOI, ARK Domain description key-word 1 key-word 2 creation date publication date comment article name article id Collaboration/Projet project name id see also Sponsor A Sponsor B Platform/OS dependencies Version active license url spdx .Net Framework 3.0 Python2.3 author1 Inria UPMC author2 Inria UPMC http://code.com language 1 language 2 http://issuetracker.com Using Atom with DublinCore and CodeMeta (multi-schema entry) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: xml Awesome Compiler hal urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a %s hal-01587361 doi:10.5281/zenodo.438684 The assignment problem AffectationRO author [INFO] Computer Science [cs] [INFO.INFO-RO] Computer Science [cs]/Operations Research [cs.RO] SOFTWARE Project in OR: The assignment problemA java implementation for the assignment problem first release description fr 2015-06-01 2017-10-19 en origin url 1.0.0 key word Comment Rfrence interne link Sponsor Platform/OS dependencies Ended license url spdx http://code.com language 1 language 2 Note ---- We aim on harmonizing the metadata from different origins and thus metadata will be translated to the `CodeMeta v.2 `__ vocabulary if possible. diff --git a/docs/spec-api.rst b/docs/spec-api.rst index 07c84649..4a6b3cc2 100644 --- a/docs/spec-api.rst +++ b/docs/spec-api.rst @@ -1,750 +1,112 @@ API Specification ================= This is `Software Heritage `__'s `SWORD 2.0 `__ Server implementation. **S.W.O.R.D** (**S**\ imple **W**\ eb-Service **O**\ ffering **R**\ epository **D**\ eposit) is an interoperability standard for digital file deposit. This implementation will permit interaction between a client (a repository) and a server (SWH repository) to push deposits of software source code archives with associated metadata. *Note:* * In the following document, we will use the ``archive`` or ``software source code archive`` interchangeably. * The supported archive formats are: * zip: common zip archive (no multi-disk zip files). * tar: tar archive without compression or optionally any of the following compression algorithm gzip (.tar.gz, .tgz), bzip2 (.tar.bz2) , or lzma (.tar.lzma) Collection ---------- SWORD defines a ``collection`` concept. In SWH's case, this collection refers to a group of deposits. A ``deposit`` is some form of software source code archive(s) associated with metadata. By default the client's collection will have the client's name. Limitations ----------- * upload limitation of 100Mib * no mediation API overview ------------ API access is over HTTPS. The API is protected through basic authentication. -The API endpoints are rooted at https://deposit.softwareheritage.org/1/. - -Data is sent and received as XML (as specified in the SWORD 2.0 -specification). Endpoints --------- -* ``/1/servicedocument/`` *service document iri* (a.k.a `SD-IRI - <#sd-iri-the-service-document-iri>`__) - - *Goal:* For a client to discover its collection's location - -* ``/1//`` *collection iri* (a.k.a `COL-IRI - <#col-iri-the-collection-iri>`__) - - *Goal:*: create deposit to a collection - -* ``/1///media/`` *update iri* (a.k.a - `EM-IRI <#em-iri-the-atom-edit-media-iri>`__) - - *Goal:*: Add or replace archive(s) to a deposit - -* ``/1///metadata/`` *update iri* (a.k.a `EDIT-IRI - <#edit-iri-the-atom-entry-edit-iri>`__ merged with `SE-IRI - <#se-iri-the-sword-edit-iri>`__) - - *Goal:*: Add or replace metadata (and optionally archive(s) to a deposit - -* ``/1///status/`` *state iri* (a.k.a `STATE-IRI - <#state-iri-the-sword-statement-iri>`__) - - *Goal:*: Display deposit's status in regards to loading - -* ``/1///content/`` *content iri* (a.k.a - `CONT-FILE-IRI <#cont-iri-the-content-iri>`__) - - *Goal:*: Display information on the content's representation in the sword - server - - -Service document request -~~~~~~~~~~~~~~~~~~~~~~~~ - -Endpoint: GET /1/servicedocument/ - -This is the starting endpoint for the client to discover its initial -collection. The answer to this query will describes: - -* the server's abilities -* connected client's collection information - - Also known as: `SD-IRI - The Service Document IRI - <#sd-iri-the-service-document-iri>`__. - -Sample request -^^^^^^^^^^^^^^ - -.. code:: shell - - GET https://deposit.softwareheritage.org/1/servicedocument/ HTTP/1.1 - Host: deposit.softwareheritage.org - -The server returns its abilities with the service document in xml format: - -* protocol sword version v2 -* accepted mime types: application/zip (zip), application/x-tar (tar archive - with any of the following optional compression algorithm gzip, bzip2, or - lzma) -* upload max size accepted. Beyond that point, it's expected the client splits - its tarball into multiple ones -* the collection the client can act upon (swh supports only one software - collection per client) -* mediation is not supported - -The current answer for example for the `HAL archive -`__ is: - -.. code:: xml - - - - - 2.0 - 20971520 - - - The Software Heritage (SWH) archive - - SWH Software Archive - application/zip - application/x-tar - Collection Policy - Software Heritage Archive - false - false - Collect, Preserve, Share - http://purl.org/net/sword/package/SimpleZip - https://deposit.softwareheritage.org/1/hal/ - - - - -Deposit creation/update -~~~~~~~~~~~~~~~~~~~~~~~ +The API endpoints are rooted at https://deposit.softwareheritage.org/1/. -The client can send deposit creation/update through a series of deposit -requests to the following endpoints: +Data is sent and received as XML (as specified in the SWORD 2.0 +specification). -* *collection iri* (COL-IRI) to initialize a deposit -* *update iris* (EM-IRI, EDIT-SE-IRI) to complete/finalize a deposit +.. include:: endpoints/service-document.rst -The deposit creation/update can also happens in one request. +.. include:: endpoints/collection.rst -The deposit request can contain: +.. include:: endpoints/update-media.rst -* an archive holding the software source code (binary upload) -* an envelop with metadata describing information regarding a deposit (atom - entry deposit) -* or both (multipart deposit, exactly one archive and one envelop). +.. include:: endpoints/update-metadata.rst -Request Types -^^^^^^^^^^^^^ +.. include:: endpoints/status.rst -Binary deposit -'''''''''''''' +.. include:: endpoints/content.rst -The client can deposit a binary archive, supplying the following -headers: -* Content-Type (text): accepted mimetype -* Content-Length (int): tarball size -* Content-MD5 (text): md5 checksum hex encoded of the tarball -* Content-Disposition (text): attachment; filename=[filename] ; the filename - parameter must be text (ascii) -* Packaging (IRI): http://purl.org/net/sword/package/SimpleZip -* In-Progress (bool): true to specify it's not the last request, false to - specify it's a final request and the server can go on with processing the - request's information (if not provided, this is considered false, so final). - -This is a single zip archive deposit. Almost no metadata is associated -with the archive except for the unique external identifier. - -*Note:* This kind of deposit should be ``partial`` (In-Progress: True) -as almost no metadata can be associated with the uploaded archive. - -API endpoints concerned -''''''''''''''''''''''' - -POST /1// Create a first deposit with one archive PUT /1///media/ -Replace existing archives POST /1///media/ Add new archive - -Sample request -'''''''''''''' - -.. code:: shell - - curl -i -u hal: \ - --data-binary @swh/deposit.zip \ - -H 'In-Progress: false' -H 'Content-MD5: 0faa1ecbf9224b9bf48a7c691b8c2b6f' \ - -H 'Content-Disposition: attachment; filename=[deposit.zip]' \ - -H 'Slug: some-external-id' \ - -H 'Packaging: http://purl.org/net/sword/package/SimpleZIP' \ - -H 'Content-type: application/zip' \ - -XPOST https://deposit.softwareheritage.org/1/hal/ - -Atom entry deposit -^^^^^^^^^^^^^^^^^^ - -The client can deposit an xml body holding metadata information on the -deposit. - -*Note:* This kind of deposit is mostly expected to be ``partial`` -(In-Progress: True) since no archive will be associated to those -metadata. - -API endpoints concerned -''''''''''''''''''''''' - -POST /1// Create a first atom deposit entry PUT /1///metadata/ Replace -existing metadata POST /1///metadata/ Add new metadata to deposit - -Sample request -'''''''''''''' - -Sample query: - -.. code:: shell - - curl -i -u hal: --data-binary @atom-entry.xml \ - -H 'In-Progress: false' \ - -H 'Slug: some-external-id' \ - -H 'Content-Type: application/atom+xml;type=entry' \ - -XPOST https://deposit.softwareheritage.org/1/hal/ - - HTTP/1.0 201 Created - Date: Tue, 26 Sep 2017 10:32:35 GMT - Server: WSGIServer/0.2 CPython/3.5.3 - Vary: Accept, Cookie - Allow: GET, POST, PUT, DELETE, HEAD, OPTIONS - Location: /1/hal/10/metadata/ - X-Frame-Options: SAMEORIGIN - Content-Type: application/xml - - - 10 - Sept. 26, 2017, 10:32 a.m. - None - deposited - - - - - - - - - - - http://purl.org/net/sword/package/SimpleZip - - -Sample body: - -.. code:: xml - - - Title - urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a - 2005-10-07T17:17:08Z - Contributor - The abstract - - - The abstract - Access Rights - Alternative Title - Date Available - Bibliographic Citation # noqa - Contributor - Description - Has Part - Has Version - Identifier - Is Part Of - Publisher - References - Rights Holder - Source - Title - Type - - - -One request deposit / Multipart deposit -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The one request deposit is a single request containing both the metadata -(as atom entry attachment) and the archive (as payload attachment). -Thus, it is a multipart deposit. - -Client provides: - -* Content-Disposition (text): header of type 'attachment' on the Entry Part - with a name parameter set to 'atom' -* Content-Disposition (text): header of type 'attachment' on the Media Part - with a name parameter set to payload and a filename parameter (the filename - will be expressed in ASCII). -* Content-MD5 (text): md5 checksum hex encoded of the tarball -* Packaging (text): http://purl.org/net/sword/package/SimpleZip (packaging - format used on the Media Part) -* In-Progress (bool): true\|false; true means ``partial`` upload and we can - expect other requests in the future, false means the deposit is done. -* add metadata formats or foreign markup to the atom:entry element - -API endpoints concerned -''''''''''''''''''''''' - -POST /1// Create a full deposit (metadata + archive) PUT /1///metadata/ -Replace existing metadata and archive POST /1///metadata/ Add new -metadata and archive to deposit - -Sample request -'''''''''''''' - -Sample query: - -.. code:: shell - - curl -i -u hal: \ - -F "file=@../deposit.json;type=application/zip;filename=payload" \ - -F "atom=@../atom-entry.xml;type=application/atom+xml;charset=UTF-8" \ - -H 'In-Progress: false' \ - -H 'Slug: some-external-id' \ - -XPOST https://deposit.softwareheritage.org/1/hal/ - - HTTP/1.0 201 Created - Date: Tue, 26 Sep 2017 10:11:55 GMT - Server: WSGIServer/0.2 CPython/3.5.3 - Vary: Accept, Cookie - Allow: GET, POST, PUT, DELETE, HEAD, OPTIONS - Location: /1/hal/9/metadata/ - X-Frame-Options: SAMEORIGIN - Content-Type: application/xml - - - 9 - Sept. 26, 2017, 10:11 a.m. - payload - deposited - - - - - - - - - - - http://purl.org/net/sword/package/SimpleZip - - -Sample content: - -.. code:: xml - - POST deposit HTTP/1.1 - Host: deposit.softwareheritage.org - Content-Length: [content length] - Content-Type: multipart/related; - boundary="===============1605871705=="; - type="application/atom+xml" - In-Progress: false - MIME-Version: 1.0 - - Media Post - --===============1605871705== - Content-Type: application/atom+xml; charset="utf-8" - Content-Disposition: attachment; name="atom" - MIME-Version: 1.0 - - - - Title - hal-or-other-archive-id - 2005-10-07T17:17:08Z - Contributor - - - The abstract - Access Rights - Alternative Title - Date Available - Bibliographic Citation # noqa - Contributor - Description - Has Part - Has Version - Identifier - Is Part Of - Publisher - References - Rights Holder - Source - Title - Type - - --===============1605871705== - Content-Type: application/zip - Content-Disposition: attachment; name=payload; filename=[filename] - Packaging: http://purl.org/net/sword/package/SimpleZip - Content-MD5: [md5-digest] - MIME-Version: 1.0 - - [...binary package data...] - --===============1605871705==-- - -Deposit Creation - server point of view ---------------------------------------- - -The server receives the request(s) and does minimal checking on the -input prior to any saving operations. - -Validation of the header and body request -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Any kind of errors can happen, here is the list depending on the -situation: +Possible errors: +---------------- * common errors: * 401 (unauthenticated) if a client does not provide credential or provide wrong ones * 403 (forbidden) if a client tries access to a collection it does not own * 404 (not found) if a client tries access to an unknown collection * 404 (not found) if a client tries access to an unknown deposit * 415 (unsupported media type) if a wrong media type is provided to the endpoint * archive/binary deposit: * 403 (forbidden) if the length of the archive exceeds the max size configured * 412 (precondition failed) if the length or hash provided mismatch the reality of the archive. * 415 (unsupported media type) if a wrong media type is provided * multipart deposit: * 412 (precondition failed) if the md5 hash provided mismatch the reality of the archive * 415 (unsupported media type) if a wrong media type is provided * Atom entry deposit: * 400 (bad request) if the request's body is empty (for creation only) -[3\|5\|6.2] Server uploads the content in a temporary location -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Using an objstorage, the server stores the archive in a temporary -location. It's deemed temporary the time the deposit is completed -(status becomes ``deposited``) and the loading finishes. - -The server also persists requests' information in a database. - -[4] Servers answers the client -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -If everything went well, the server answers either with a 200, 201 or -204 response (depending on the actual endpoint) - -A ``http 200`` response is returned for GET endpoints. - -A ``http 201 Created`` response is returned for POST endpoints. The body -holds the deposit receipt. The headers holds the EDIT-IRI in the -Location header of the response. - -A ``http 204 No Content`` response is returned for PUT, DELETE -endpoints. - -If something went wrong, the server answers with one of the `error -status code and associated message mentioned <#possible%20errors>`__). - -[5] Deposit Update -~~~~~~~~~~~~~~~~~~ - -The client previously deposited a ``partial`` document (through an -archive, metadata, or both). The client wants to update information for -that previous deposit (possibly in multiple steps as well). - -The important thing to note here is that, as long as the deposit is in -status ``partial``, the loading did not start. Thus, the client can -update information (replace or add new archive, new metadata, even -delete) for that same ``partial`` deposit. - -When the deposit status changes to ``deposited``, the client can no -longer change the deposit's information (a 403 will be returned in that -case). - -Then aggregation of all those deposit's information will later be used -for the actual loading. - -Providing the collection name, and the identifier of the previous -deposit id received from the deposit receipt, the client executes a POST -or PUT request on the *update iris*. - -After validation of the body request, the server: - -- uploads such content in a temporary location - -- answers the client an ``http 204 (No content)``. In the Location header of - the response lies an iri to permit further update. - -- Asynchronously, the server will inject the archive uploaded and the - associated metadata. An operation status endpoint *state iri* permits the - client to query the loading operation status. - -Possible update endpoints -^^^^^^^^^^^^^^^^^^^^^^^^^ - -PUT /1///media/ Replace existing archives for the deposit POST -/1///media/ Add new archives to the deposit PUT /1///metadata/ Replace -existing metadata (and possible archives) POST /1///metadata/ Add new -metadata - -[6] Deposit Removal -~~~~~~~~~~~~~~~~~~~ - -As long as the deposit's status remains ``partial``, it's possible to -remove the deposit entirely or remove only the deposit's archive(s). - -If the deposit has been removed, further querying that deposit will -return a *404* response. - -If the deposit's archive(s) has been removed, we can still ensue other -query to update that deposit. - -Operation Status -~~~~~~~~~~~~~~~~ - -Providing a collection name and a deposit id, the client asks the -operation status of a prior deposit. - -URL: GET /1///status/ - -This returns: - -* *201* response with the actual status -* *404* if the deposit does not exist (or no longer does) - - Possible errors ----------------- - -sword:ErrorContent -~~~~~~~~~~~~~~~~~~ - -IRI: ``http://purl.org/net/sword/error/ErrorContent`` - -The supplied format is not the same as that identified in the Packaging -header and/or that supported by the server Associated HTTP - -Associated HTTP status: *415 (Unsupported Media Type)* - -sword:ErrorChecksumMismatch -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -IRI: ``http://purl.org/net/sword/error/ErrorChecksumMismatch`` - -Checksum sent does not match the calculated checksum. - -Associated HTTP status: *412 Precondition Failed* - -sword:ErrorBadRequest -~~~~~~~~~~~~~~~~~~~~~ - -IRI: ``http://purl.org/net/sword/error/ErrorBadRequest`` - -Some parameters sent with the POST/PUT were not understood. - -Associated HTTP status: *400 Bad Request* - -sword:MediationNotAllowed -~~~~~~~~~~~~~~~~~~~~~~~~~ - -IRI: ``http://purl.org/net/sword/error/MediationNotAllowed`` - -Used where a client has attempted a mediated deposit, but this is not -supported by the server. - -Associated HTTP status: *412 Precondition Failed* - -sword:MethodNotAllowed -~~~~~~~~~~~~~~~~~~~~~~ - -IRI: ``http://purl.org/net/sword/error/MethodNotAllowed`` - -Used when the client has attempted one of the HTTP update verbs (POST, -PUT, DELETE) but the server has decided not to respond to such requests -on the specified resource at that time. - -Associated HTTP Status: *405 Method Not Allowed* - -sword:MaxUploadSizeExceeded -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -IRI: ``http://purl.org/net/sword/error/MaxUploadSizeExceeded`` - -Used when the client has attempted to supply to the server a file which -exceeds the server's maximum upload size limit - -Associated HTTP Status: *413 (Request Entity Too Large)* - -sword:Unauthorized -~~~~~~~~~~~~~~~~~~ - -IRI: ``http://purl.org/net/sword/error/ErrorUnauthorized`` - -The access to the api is through authentication. - -Associated HTTP status: *401* - -sword:Forbidden -~~~~~~~~~~~~~~~ - -IRI: ``http://purl.org/net/sword/error/ErrorForbidden`` - -The action is forbidden (access to another collection for example). - -Associated HTTP status: *403* - -Nomenclature ------------- - -SWORD uses IRI notion, Internationalized Resource Identifier. In this -chapter, we will describe SWH's IRIs. - -SD-IRI - The Service Document IRI -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The Service Document IRI. This is the IRI from which the client can -discover its collection IRI. - -HTTP verbs supported: *GET* - -Col-IRI - The Collection IRI -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The software collection associated to one user. - -The SWORD Collection IRI is the IRI to which the initial deposit will -take place, and which is listed in the Service Document. - -Following our previous example, this is: -https://deposit.softwareheritage.org/1/hal/. - -HTTP verbs supported: *POST* - -Cont-IRI - The Content IRI -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -This is the endpoint which permits the client to retrieve -representations of the object as it resides in the SWORD server. - -This will display information about the content and its associated -metadata. - -HTTP verbs supported: *GET* - -*Note:* We also refer to it as *Cont-File-IRI*. - -EM-IRI - The Atom Edit Media IRI -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -This is the endpoint to upload other related archives for the same -deposit. - -It is used to change a ``partial`` deposit in regards of archives, in -particular: - -* replace existing archives with new ones -* add new archives -* delete archives from a deposit - -Example use case: A first archive to put exceeds the deposit's limit -size. The client can thus split the archives in multiple ones. Post a -first ``partial`` archive to the Col-IRI (with In-Progress: - -True). Then, in order to complete the deposit, POST the other remaining -archives to the EM-IRI (the last one with the In-Progress header to -False). - -HTTP verbs supported: *POST*, *PUT*, *DELETE* - -Edit-IRI - The Atom Entry Edit IRI -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -This is the endpoint to change a ``partial`` deposit in regards of -metadata. In particular: - -* replace existing metadata (and archives) with new ones -* add new metadata (and archives) -* delete deposit - -HTTP verbs supported: *POST*, *PUT*, *DELETE* - -*Note:* We also refer to it as *Edit-SE-IRI*. - -SE-IRI - The SWORD Edit IRI -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The sword specification permits to merge this with EDIT-IRI, so we did. - -*Note:* We also refer to it as *Edit-SE-IRI*. - -State-IRI - The SWORD Statement IRI -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -This is the IRI which can be used to retrieve a description of the -object from the sword server, including the structure of the object and -its state. This will be used as the operation status endpoint. -HTTP verbs supported: *GET* Sources ------- * `SWORD v2 specification `__ * `arxiv documentation `__ * `Dataverse example `__ * `SWORD used on HAL `__ * `xml examples for CCSD `__ diff --git a/swh.deposit.egg-info/PKG-INFO b/swh.deposit.egg-info/PKG-INFO index 1346caef..16421bd2 100644 --- a/swh.deposit.egg-info/PKG-INFO +++ b/swh.deposit.egg-info/PKG-INFO @@ -1,10 +1,10 @@ Metadata-Version: 1.0 Name: swh.deposit -Version: 0.0.47 +Version: 0.0.48 Summary: Software Heritage Deposit Server Home-page: https://forge.softwareheritage.org/source/swh-deposit/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Description: UNKNOWN Platform: UNKNOWN diff --git a/swh.deposit.egg-info/SOURCES.txt b/swh.deposit.egg-info/SOURCES.txt index 637d96ae..fe5ecaff 100644 --- a/swh.deposit.egg-info/SOURCES.txt +++ b/swh.deposit.egg-info/SOURCES.txt @@ -1,146 +1,152 @@ .gitignore AUTHORS LICENSE MANIFEST.in Makefile Makefile.local README.md requirements-swh.txt requirements.txt setup.py version.txt bin/Makefile bin/content.sh bin/create_deposit.sh bin/create_deposit_atom.sh bin/create_deposit_with_metadata.sh bin/default-setup bin/download-deposit-archive.sh bin/home.sh bin/replace-deposit-archive.sh bin/service-document.sh bin/status.sh bin/swh-deposit bin/update-deposit-with-another-archive.sh bin/update-status.sh debian/changelog debian/compat debian/control debian/copyright debian/rules debian/source/format docs/.gitignore docs/Makefile docs/blueprint.rst docs/conf.py docs/dev-info.rst docs/getting-started.rst docs/index.rst docs/metadata.rst docs/spec-api.rst docs/spec-loading.rst docs/sys-info.rst docs/_static/.placeholder docs/_templates/.placeholder +docs/endpoints/collection.rst +docs/endpoints/content.rst +docs/endpoints/service-document.rst +docs/endpoints/status.rst +docs/endpoints/update-media.rst +docs/endpoints/update-metadata.rst docs/images/deposit-create-chart.png docs/images/deposit-delete-chart.png docs/images/deposit-update-chart.png resources/deposit/server.yml swh/__init__.py swh/manage.py swh.deposit.egg-info/PKG-INFO swh.deposit.egg-info/SOURCES.txt swh.deposit.egg-info/dependency_links.txt swh.deposit.egg-info/requires.txt swh.deposit.egg-info/top_level.txt swh/deposit/__init__.py swh/deposit/apps.py swh/deposit/auth.py swh/deposit/config.py swh/deposit/create_user.py swh/deposit/errors.py swh/deposit/models.py swh/deposit/parsers.py swh/deposit/signals.py swh/deposit/urls.py swh/deposit/wsgi.py swh/deposit/api/__init__.py swh/deposit/api/common.py swh/deposit/api/deposit.py swh/deposit/api/deposit_content.py swh/deposit/api/deposit_status.py swh/deposit/api/deposit_update.py swh/deposit/api/service_document.py swh/deposit/api/urls.py swh/deposit/api/private/__init__.py swh/deposit/api/private/deposit_check.py swh/deposit/api/private/deposit_read.py swh/deposit/api/private/deposit_update_status.py swh/deposit/api/private/urls.py swh/deposit/client/__init__.py swh/deposit/client/cli.py swh/deposit/fixtures/__init__.py swh/deposit/fixtures/deposit_data.yaml swh/deposit/loader/__init__.py swh/deposit/loader/checker.py swh/deposit/loader/loader.py swh/deposit/loader/scheduler.py swh/deposit/loader/tasks.py swh/deposit/migrations/0001_initial.py swh/deposit/migrations/0002_depositrequest_archive.py swh/deposit/migrations/0003_temporaryarchive.py swh/deposit/migrations/0004_delete_temporaryarchive.py swh/deposit/migrations/0005_auto_20171019_1436.py swh/deposit/migrations/0006_depositclient_url.py swh/deposit/migrations/0007_auto_20171129_1609.py swh/deposit/migrations/0008_auto_20171130_1513.py swh/deposit/migrations/0009_deposit_parent.py swh/deposit/migrations/0010_auto_20180110_0953.py swh/deposit/migrations/0011_auto_20180115_1510.py swh/deposit/migrations/__init__.py swh/deposit/settings/__init__.py swh/deposit/settings/common.py swh/deposit/settings/development.py swh/deposit/settings/production.py swh/deposit/settings/testing.py swh/deposit/static/robots.txt swh/deposit/static/css/bootstrap-responsive.min.css swh/deposit/static/css/style.css swh/deposit/static/img/arrow-up-small.png swh/deposit/static/img/swh-logo-deposit.png swh/deposit/static/img/swh-logo-deposit.svg swh/deposit/static/img/icons/swh-logo-32x32.png swh/deposit/static/img/icons/swh-logo-deposit-180x180.png swh/deposit/static/img/icons/swh-logo-deposit-192x192.png swh/deposit/static/img/icons/swh-logo-deposit-270x270.png swh/deposit/templates/__init__.py swh/deposit/templates/homepage.html swh/deposit/templates/layout.html swh/deposit/templates/deposit/__init__.py swh/deposit/templates/deposit/content.xml swh/deposit/templates/deposit/deposit_receipt.xml swh/deposit/templates/deposit/error.xml swh/deposit/templates/deposit/service_document.xml swh/deposit/templates/deposit/status.xml swh/deposit/templates/rest_framework/api.html swh/deposit/tests/__init__.py swh/deposit/tests/common.py swh/deposit/tests/api/__init__.py swh/deposit/tests/api/test_common.py swh/deposit/tests/api/test_deposit.py swh/deposit/tests/api/test_deposit_atom.py swh/deposit/tests/api/test_deposit_binary.py swh/deposit/tests/api/test_deposit_check.py swh/deposit/tests/api/test_deposit_delete.py swh/deposit/tests/api/test_deposit_multipart.py swh/deposit/tests/api/test_deposit_read_archive.py swh/deposit/tests/api/test_deposit_read_metadata.py swh/deposit/tests/api/test_deposit_status.py swh/deposit/tests/api/test_deposit_update.py swh/deposit/tests/api/test_deposit_update_status.py swh/deposit/tests/api/test_service_document.py swh/deposit/tests/loader/__init__.py swh/deposit/tests/loader/common.py swh/deposit/tests/loader/test_checker.py swh/deposit/tests/loader/test_client.py swh/deposit/tests/loader/test_loader.py \ No newline at end of file diff --git a/swh/deposit/api/private/deposit_read.py b/swh/deposit/api/private/deposit_read.py index 02fd80e0..6eb81822 100644 --- a/swh/deposit/api/private/deposit_read.py +++ b/swh/deposit/api/private/deposit_read.py @@ -1,239 +1,239 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json import os import shutil import tempfile from contextlib import contextmanager from django.http import FileResponse from rest_framework import status from swh.core import tarball from swh.model import identifiers from ...config import SWH_PERSON from ..common import SWHGetDepositAPI, SWHPrivateAPIView from ...models import Deposit, DepositRequest @contextmanager def aggregate_tarballs(extraction_dir, archive_paths): """Aggregate multiple tarballs into one and returns this new archive's path. Args: extraction_dir (path): Path to use for the tarballs computation archive_paths ([str]): Deposit's archive paths Returns: Tuple (directory to clean up, archive path (aggregated or not)) """ if len(archive_paths) > 1: # need to rebuild one archive # from multiple ones os.makedirs(extraction_dir, 0o755, exist_ok=True) dir_path = tempfile.mkdtemp(prefix='swh.deposit-', dir=extraction_dir) # root folder to build an aggregated tarball aggregated_tarball_rootdir = os.path.join(dir_path, 'aggregate') os.makedirs(aggregated_tarball_rootdir, 0o755, exist_ok=True) # uncompress in a temporary location all archives for archive_path in archive_paths: tarball.uncompress(archive_path, aggregated_tarball_rootdir) # Aggregate into one big tarball the multiple smaller ones temp_tarpath = tarball.compress( aggregated_tarball_rootdir + '.zip', nature='zip', dirpath_or_files=aggregated_tarball_rootdir) # can already clean up temporary directory shutil.rmtree(aggregated_tarball_rootdir) try: yield temp_tarpath finally: shutil.rmtree(dir_path) else: # only 1 archive, no need to do fancy actions (and no cleanup step) yield archive_paths[0] class SWHDepositReadArchives(SWHGetDepositAPI, SWHPrivateAPIView): """Dedicated class to read a deposit's raw archives content. Only GET is supported. """ ADDITIONAL_CONFIG = { 'extraction_dir': ('str', '/tmp/swh-deposit/archive/'), } def __init__(self): super().__init__() self.extraction_dir = self.config['extraction_dir'] if not os.path.exists(self.extraction_dir): os.makedirs(self.extraction_dir) def retrieve_archives(self, deposit_id): """Given a deposit identifier, returns its associated archives' path. Yields: path to deposited archives """ deposit = Deposit.objects.get(pk=deposit_id) deposit_requests = DepositRequest.objects.filter( deposit=deposit, type=self.deposit_request_types['archive']).order_by('id') for deposit_request in deposit_requests: yield deposit_request.archive.path def process_get(self, req, collection_name, deposit_id): """Build a unique tarball from the multiple received and stream that content to the client. Args: req (Request): collection_name (str): Collection owning the deposit deposit_id (id): Deposit concerned by the reading Returns: Tuple status, stream of content, content-type """ archive_paths = list(self.retrieve_archives(deposit_id)) with aggregate_tarballs(self.extraction_dir, archive_paths) as path: return FileResponse(open(path, 'rb'), status=status.HTTP_200_OK, content_type='application/octet-stream') class SWHDepositReadMetadata(SWHGetDepositAPI, SWHPrivateAPIView): """Class in charge of aggregating metadata on a deposit. """ ADDITIONAL_CONFIG = { 'provider': ('dict', { # 'provider_name': '', # those are not set since read from the # 'provider_url': '', # deposit's client 'provider_type': 'deposit_client', 'metadata': {} }), 'tool': ('dict', { 'name': 'swh-deposit', 'version': '0.0.1', 'configuration': { 'sword_version': '2' } }) } def __init__(self): super().__init__() self.provider = self.config['provider'] self.tool = self.config['tool'] def _aggregate_metadata(self, deposit, metadata_requests): """Retrieve and aggregates metadata information. """ metadata = {} for req in metadata_requests: metadata.update(req.metadata) return metadata def _retrieve_url(self, deposit, metadata): client_domain = deposit.client.domain for field in metadata: if 'url' in field: if client_domain in metadata[field]: return metadata[field] def aggregate(self, deposit, requests): """Aggregate multiple data on deposit into one unified data dictionary. Args: deposit (Deposit): Deposit concerned by the data aggregation. requests ([DepositRequest]): List of associated requests which need aggregation. Returns: Dictionary of data representing the deposit to inject in swh. """ data = {} # Retrieve tarballs/metadata information metadata = self._aggregate_metadata(deposit, requests) # create origin_url from metadata only after deposit_check validates it origin_url = self._retrieve_url(deposit, metadata) # Read information metadata data['origin'] = { 'type': 'deposit', 'url': origin_url } # revision - fullname = deposit.client.get_full_name() + fullname = deposit.client.username author_committer = SWH_PERSON # metadata provider self.provider['provider_name'] = deposit.client.last_name self.provider['provider_url'] = deposit.client.provider_url revision_type = 'tar' revision_msg = '%s: Deposit %s in collection %s' % ( fullname, deposit.id, deposit.collection.name) complete_date = identifiers.normalize_timestamp(deposit.complete_date) data['revision'] = { 'synthetic': True, 'date': complete_date, 'committer_date': complete_date, 'author': author_committer, 'committer': author_committer, 'type': revision_type, 'message': revision_msg, 'metadata': metadata, } if deposit.parent: swh_persistent_id = deposit.parent.swh_id persistent_identifier = identifiers.parse_persistent_identifier( swh_persistent_id) parent_revision = persistent_identifier['object_id'] data['revision']['parents'] = [parent_revision] data['occurrence'] = { 'branch': 'master' } data['origin_metadata'] = { 'provider': self.provider, 'tool': self.tool, 'metadata': metadata } return data def process_get(self, req, collection_name, deposit_id): deposit = Deposit.objects.get(pk=deposit_id) requests = DepositRequest.objects.filter( deposit=deposit, type=self.deposit_request_types['metadata']) data = self.aggregate(deposit, requests) d = {} if data: d = json.dumps(data) return status.HTTP_200_OK, d, 'application/json' diff --git a/swh/deposit/client/cli.py b/swh/deposit/client/cli.py index ea7512b0..ee2ad582 100755 --- a/swh/deposit/client/cli.py +++ b/swh/deposit/client/cli.py @@ -1,296 +1,296 @@ # Copyright (C) 2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """Script to demonstrate software deposit scenario to https://deposit.sofwareheritage.org. Use: python3 -m swh.deposit.client.cli --help Documentation: https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html # noqa """ import os import click import logging import uuid from . import PublicApiDepositClient class InputError(ValueError): """Input script error """ pass def generate_slug(prefix='swh-sample'): """Generate a slug (sample purposes). """ return '%s-%s' % (prefix, uuid.uuid4()) def parse_cli_options(username, password, archive, metadata, archive_deposit, metadata_deposit, collection, slug, partial, deposit_id, replace, url, status): """Parse the cli options and make sure the combination is acceptable*. If not, an InputError exception is raised explaining the issue. By acceptable, we mean: - A multipart deposit (create or update) needs both an existing software archive and an existing metadata file - A binary deposit (create/update) needs an existing software archive - A metadata deposit (create/update) needs an existing metadata file - A deposit update needs a deposit_id to be provided This won't prevent all failure cases though. The remaining errors are already dealt with the underlying api client. Raises: InputError explaining the issue Returns: dict with the following keys: 'archive': the software archive to deposit 'username': username 'password': associated password 'metadata': the metadata file to deposit 'collection': the username's associated client 'slug': the slug or external id identifying the deposit to make 'partial': if the deposit is partial or not 'client': instantiated class 'url': deposit's server main entry point 'deposit_type': deposit's type (binary, multipart, metadata) 'deposit_id': optional deposit identifier """ if status and not deposit_id: raise InputError("Deposit id must be provided for status check") if status and deposit_id: # status is higher priority over deposit archive_deposit = False metadata_deposit = False archive = None metadata = None if archive_deposit and metadata_deposit: # too many flags use, remove redundant ones (-> multipart deposit) archive_deposit = False metadata_deposit = False if archive and not os.path.exists(archive): raise InputError('Software Archive %s must exist!' % archive) if archive and not metadata: metadata = '%s.metadata.xml' % archive if metadata_deposit: archive = None if archive_deposit: metadata = None if metadata_deposit and not metadata: raise InputError( "Metadata deposit filepath must be provided for metadata deposit") if metadata and not os.path.exists(metadata): raise InputError('Software Archive metadata %s must exist!' % metadata) if not status and not archive and not metadata: raise InputError( 'Please provide an actionable command. See --help for more ' 'information.') if replace and not deposit_id: raise InputError( 'To update an existing deposit, you must provide its id') client = PublicApiDepositClient({ 'url': url, 'auth': { 'username': username, 'password': password }, }) if not collection: # retrieve user's collection sd_content = client.service_document() if 'error' in sd_content: raise InputError('Service document retrieval: %s' % ( sd_content['error'], )) collection = sd_content['collection'] if not slug: # generate slug slug = generate_slug() return { 'archive': archive, 'username': username, 'password': password, 'metadata': metadata, 'collection': collection, 'slug': slug, 'partial': partial, 'client': client, 'url': url, 'deposit_id': deposit_id, 'replace': replace, } def deposit_status(config, dry_run, log): log.debug('Status deposit') client = config['client'] collection = config['collection'] deposit_id = config['deposit_id'] if not dry_run: r = client.deposit_status(collection, deposit_id, log) return r return {} def deposit_create(config, dry_run, log): """Delegate the actual deposit to the deposit client. """ log.debug('Create deposit') client = config['client'] collection = config['collection'] archive_path = config['archive'] metadata_path = config['metadata'] slug = config['slug'] in_progress = config['partial'] if not dry_run: r = client.deposit_create(collection, slug, archive_path, metadata_path, in_progress, log) return r return {} def deposit_update(config, dry_run, log): """Delegate the actual deposit to the deposit client. """ log.debug('Update deposit') client = config['client'] collection = config['collection'] deposit_id = config['deposit_id'] archive_path = config['archive'] metadata_path = config['metadata'] slug = config['slug'] in_progress = config['partial'] replace = config['replace'] if not dry_run: r = client.deposit_update(collection, deposit_id, slug, archive_path, metadata_path, in_progress, replace, log) return r return {} @click.command() @click.option('--username', required=1, help="(Mandatory) User's name") @click.option('--password', required=1, help="(Mandatory) User's associated password") @click.option('--archive', help='(Optional) Software archive to deposit') @click.option('--metadata', help="(Optional) Path to xml metadata file. If not provided, this will use a file named .metadata.xml") # noqa @click.option('--archive-deposit/--no-archive-deposit', default=False, help='(Optional) Software archive only deposit') @click.option('--metadata-deposit/--no-metadata-deposit', default=False, help='(Optional) Metadata only deposit') @click.option('--collection', help="(Optional) User's collection. If not provided, this will be fetched.") # noqa @click.option('--slug', help="""(Optional) External system information identifier. If not provided, it will be generated""") # noqa @click.option('--partial/--no-partial', default=False, help='(Optional) The deposit will be partial, other deposits will have to take place to finalize it.') # noqa @click.option('--deposit-id', default=None, help='(Optional) Update an existing partial deposit with its identifier') # noqa @click.option('--replace/--no-replace', default=False, help='(Optional) Update by replacing existing metadata to a deposit') # noqa -@click.option('--url', default='http://deposit.softwareheritage.org/1', +@click.option('--url', default='https://deposit.softwareheritage.org/1', help="(Optional) Deposit server api endpoint. By default, https://deposit.softwareheritage.org/1") # noqa @click.option('--status/--no-status', default=False, help="(Optional) Deposit's status") @click.option('--dry-run/--no-dry-run', default=False, help='(Optional) No-op deposit') @click.option('--verbose/--no-verbose', default=False, help='Verbose mode') def main(username, password, archive=None, metadata=None, archive_deposit=False, metadata_deposit=False, collection=None, slug=None, partial=False, deposit_id=None, replace=False, status=False, url='https://deposit.softwareheritage.org/1', dry_run=True, verbose=False): """Software Heritage Deposit client - Create (or update partial) deposit through the command line. More documentation can be found at https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html. """ log = logging.getLogger('swh-deposit') log.addHandler(logging.StreamHandler()) _loglevel = logging.DEBUG if verbose else logging.INFO log.setLevel(_loglevel) if dry_run: log.info("**DRY RUN**") config = {} try: log.debug('Parsing cli options') config = parse_cli_options( username, password, archive, metadata, archive_deposit, metadata_deposit, collection, slug, partial, deposit_id, replace, url, status) except InputError as e: msg = 'Problem during parsing options: %s' % e r = { 'error': msg, } log.info(r) return 1 if verbose: log.info("Parsed configuration: %s" % ( config, )) deposit_id = config['deposit_id'] if status and deposit_id: r = deposit_status(config, dry_run, log) elif not status and deposit_id: r = deposit_update(config, dry_run, log) elif not status and not deposit_id: r = deposit_create(config, dry_run, log) log.info(r) if __name__ == '__main__': main() diff --git a/swh/deposit/tests/api/test_deposit_read_metadata.py b/swh/deposit/tests/api/test_deposit_read_metadata.py index e46682c4..068f34b8 100644 --- a/swh/deposit/tests/api/test_deposit_read_metadata.py +++ b/swh/deposit/tests/api/test_deposit_read_metadata.py @@ -1,215 +1,215 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json from django.core.urlresolvers import reverse from nose.tools import istest from rest_framework import status from rest_framework.test import APITestCase from swh.deposit.models import Deposit from swh.deposit.config import PRIVATE_GET_DEPOSIT_METADATA from swh.deposit.config import DEPOSIT_STATUS_LOAD_SUCCESS from swh.deposit.config import DEPOSIT_STATUS_PARTIAL from ...config import SWH_PERSON from ..common import BasicTestCase, WithAuthTestCase, CommonCreationRoutine class DepositReadMetadataTest(APITestCase, WithAuthTestCase, BasicTestCase, CommonCreationRoutine): """Deposit access to read metadata information on deposit. """ @istest def read_metadata(self): """Private metadata read api to existing deposit should return metadata """ deposit_id = self.create_deposit_partial() url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=[self.collection.name, deposit_id]) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEquals(response._headers['content-type'][1], 'application/json') data = json.loads(response.content.decode('utf-8')) expected_meta = { 'origin': { 'url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id', 'type': 'deposit' }, 'origin_metadata': { 'metadata': { '{http://www.w3.org/2005/Atom}external_identifier': 'some-external-id', '{http://www.w3.org/2005/Atom}url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id' }, 'provider': { - 'provider_name': '', + 'provider_name': 'hal', 'provider_type': 'deposit_client', 'provider_url': 'https://hal-test.archives-ouvertes.fr/', 'metadata': {} }, 'tool': { 'tool_name': 'swh-deposit', 'tool_version': '0.0.1', 'tool_configuration': { 'sword_version': '2' } } }, 'revision': { 'synthetic': True, 'committer_date': None, - 'message': ': Deposit %s in collection hal' % deposit_id, + 'message': 'hal: Deposit %s in collection hal' % deposit_id, 'author': SWH_PERSON, 'committer': SWH_PERSON, 'date': None, 'metadata': { '{http://www.w3.org/2005/Atom}external_identifier': 'some-external-id', '{http://www.w3.org/2005/Atom}url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id' }, 'type': 'tar' }, 'occurrence': { 'branch': 'master' } } self.assertEquals(data, expected_meta) @istest def read_metadata_revision_with_parent(self): """Private read metadata to a deposit (with parent) returns metadata """ swh_id = 'da78a9d4cf1d5d29873693fd496142e3a18c20fa' swh_persistent_id = 'swh:1:rev:%s' % swh_id deposit_id1 = self.create_deposit_with_status( status=DEPOSIT_STATUS_LOAD_SUCCESS, external_id='some-external-id', swh_id=swh_persistent_id) deposit_parent = Deposit.objects.get(pk=deposit_id1) self.assertEquals(deposit_parent.swh_id, swh_persistent_id) self.assertEquals(deposit_parent.external_id, 'some-external-id') self.assertEquals(deposit_parent.status, DEPOSIT_STATUS_LOAD_SUCCESS) deposit_id = self.create_deposit_partial( external_id='some-external-id') deposit = Deposit.objects.get(pk=deposit_id) self.assertEquals(deposit.external_id, 'some-external-id') self.assertEquals(deposit.swh_id, None) self.assertEquals(deposit.parent, deposit_parent) self.assertEquals(deposit.status, DEPOSIT_STATUS_PARTIAL) url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=[self.collection.name, deposit_id]) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEquals(response._headers['content-type'][1], 'application/json') data = json.loads(response.content.decode('utf-8')) expected_meta = { 'origin': { 'url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id', 'type': 'deposit' }, 'origin_metadata': { 'metadata': { '{http://www.w3.org/2005/Atom}external_identifier': 'some-external-id', '{http://www.w3.org/2005/Atom}url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id' }, 'provider': { - 'provider_name': '', + 'provider_name': 'hal', 'provider_type': 'deposit_client', 'provider_url': 'https://hal-test.archives-ouvertes.fr/', 'metadata': {} }, 'tool': { 'tool_name': 'swh-deposit', 'tool_version': '0.0.1', 'tool_configuration': { 'sword_version': '2' } } }, 'revision': { 'synthetic': True, 'date': None, 'committer_date': None, 'author': SWH_PERSON, 'committer': SWH_PERSON, 'type': 'tar', - 'message': ': Deposit %s in collection hal' % deposit_id, + 'message': 'hal: Deposit %s in collection hal' % deposit_id, 'metadata': { '{http://www.w3.org/2005/Atom}external_identifier': 'some-external-id', '{http://www.w3.org/2005/Atom}url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id' }, 'parents': [swh_id] }, 'occurrence': { 'branch': 'master' } } self.assertEquals(data, expected_meta) @istest def access_to_nonexisting_deposit_returns_404_response(self): """Read unknown collection should return a 404 response """ unknown_id = '999' url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=[self.collection.name, unknown_id]) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) self.assertIn('Deposit with id %s does not exist' % unknown_id, response.content.decode('utf-8')) @istest def access_to_nonexisting_collection_returns_404_response(self): """Read unknown deposit should return a 404 response """ collection_name = 'non-existing' deposit_id = self.create_deposit_partial() url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=[collection_name, deposit_id]) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) self.assertIn('Unknown collection name %s' % collection_name, response.content.decode('utf-8'),) diff --git a/swh/deposit/tests/common.py b/swh/deposit/tests/common.py index fae9364d..d17c836d 100644 --- a/swh/deposit/tests/common.py +++ b/swh/deposit/tests/common.py @@ -1,466 +1,467 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import base64 import hashlib import os import shutil import tempfile from django.core.urlresolvers import reverse from django.test import TestCase from io import BytesIO from nose.plugins.attrib import attr from rest_framework import status from swh.deposit.config import COL_IRI, EM_IRI, EDIT_SE_IRI from swh.deposit.models import DepositClient, DepositCollection, Deposit from swh.deposit.models import DepositRequest from swh.deposit.models import DepositRequestType from swh.deposit.parsers import parse_xml from swh.deposit.settings.testing import MEDIA_ROOT from swh.core import tarball def create_arborescence_zip(root_path, archive_name, filename, content, up_to_size=None): """Build an archive named archive_name in the root_path. This archive contains one file named filename with the content content. Returns: dict with the keys: - dir: the directory of that archive - path: full path to the archive - sha1sum: archive's sha1sum - length: archive's length """ os.makedirs(root_path, exist_ok=True) archive_path_dir = tempfile.mkdtemp(dir=root_path) dir_path = os.path.join(archive_path_dir, archive_name) os.mkdir(dir_path) filepath = os.path.join(dir_path, filename) _length = len(content) count = 0 batch_size = 128 with open(filepath, 'wb') as f: f.write(content) if up_to_size: # fill with blank content up to a given size count += _length while count < up_to_size: f.write(b'0'*batch_size) count += batch_size zip_path = dir_path + '.zip' zip_path = tarball.compress(zip_path, 'zip', dir_path) with open(zip_path, 'rb') as f: length = 0 sha1sum = hashlib.sha1() md5sum = hashlib.md5() data = b'' for chunk in f: sha1sum.update(chunk) md5sum.update(chunk) length += len(chunk) data += chunk return { 'dir': archive_path_dir, 'name': archive_name, 'data': data, 'path': zip_path, 'sha1sum': sha1sum.hexdigest(), 'md5sum': md5sum.hexdigest(), 'length': length, } @attr('fs') class FileSystemCreationRoutine(TestCase): """Mixin intended for tests needed to tamper with archives. """ def setUp(self): """Define the test client and other test variables.""" super().setUp() self.root_path = '/tmp/swh-deposit/test/build-zip/' os.makedirs(self.root_path, exist_ok=True) self.archive = create_arborescence_zip( self.root_path, 'archive1', 'file1', b'some content in file') self.atom_entry = b""" Awesome Compiler urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 1785io25c695 2017-10-07T15:17:08Z some awesome author https://hal-test.archives-ouvertes.fr """ def tearDown(self): super().tearDown() shutil.rmtree(self.root_path) def create_simple_binary_deposit(self, status_partial=True): response = self.client.post( reverse(COL_IRI, args=[self.collection.name]), content_type='application/zip', data=self.archive['data'], CONTENT_LENGTH=self.archive['length'], HTTP_MD5SUM=self.archive['md5sum'], HTTP_SLUG='external-id', HTTP_IN_PROGRESS=status_partial, HTTP_CONTENT_DISPOSITION='attachment; filename=%s' % ( self.archive['name'], )) # then assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) deposit_id = response_content[ '{http://www.w3.org/2005/Atom}deposit_id'] return deposit_id def create_complex_binary_deposit(self, status_partial=False): deposit_id = self.create_simple_binary_deposit( status_partial=True) # Add a second archive to the deposit # update its status to DEPOSIT_STATUS_VERIFIED response = self.client.post( reverse(EM_IRI, args=[self.collection.name, deposit_id]), content_type='application/zip', data=self.archive2['data'], CONTENT_LENGTH=self.archive2['length'], HTTP_MD5SUM=self.archive2['md5sum'], HTTP_SLUG='external-id', HTTP_IN_PROGRESS=status_partial, HTTP_CONTENT_DISPOSITION='attachment; filename=filename1.zip') # then assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) deposit_id = response_content[ '{http://www.w3.org/2005/Atom}deposit_id'] return deposit_id def update_binary_deposit(self, deposit_id, status_partial=False): # update existing deposit with atom entry metadata response = self.client.post( reverse(EDIT_SE_IRI, args=[self.collection.name, deposit_id]), content_type='application/atom+xml;type=entry', data=self.codemeta_entry_data1, HTTP_SLUG='external-id', HTTP_IN_PROGRESS=status_partial) # then # assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) deposit_id = response_content[ '{http://www.w3.org/2005/Atom}deposit_id'] return deposit_id @attr('fs') class BasicTestCase(TestCase): """Mixin intended for data setup purposes (user, collection, etc...) """ def setUp(self): """Define the test client and other test variables.""" super().setUp() # expanding diffs in tests self.maxDiff = None # basic minimum test data deposit_request_types = {} # Add deposit request types for deposit_request_type in ['archive', 'metadata']: drt = DepositRequestType(name=deposit_request_type) drt.save() deposit_request_types[deposit_request_type] = drt _name = 'hal' _provider_url = 'https://hal-test.archives-ouvertes.fr/' _domain = 'archives-ouvertes.fr/' # set collection up _collection = DepositCollection(name=_name) _collection.save() # set user/client up _client = DepositClient.objects.create_user(username=_name, password=_name, provider_url=_provider_url, domain=_domain) _client.collections = [_collection.id] + _client.last_name = _name _client.save() self.collection = _collection self.user = _client self.username = _name self.userpass = _name self.deposit_request_types = deposit_request_types def tearDown(self): super().tearDown() # Clean up uploaded files in temporary directory (tests have # their own media root folder) if os.path.exists(MEDIA_ROOT): for d in os.listdir(MEDIA_ROOT): shutil.rmtree(os.path.join(MEDIA_ROOT, d)) class WithAuthTestCase(TestCase): """Mixin intended for testing the api with basic authentication. """ def setUp(self): super().setUp() _token = '%s:%s' % (self.username, self.userpass) token = base64.b64encode(_token.encode('utf-8')) authorization = 'Basic %s' % token.decode('utf-8') self.client.credentials(HTTP_AUTHORIZATION=authorization) def tearDown(self): super().tearDown() self.client.credentials() class CommonCreationRoutine(TestCase): """Mixin class to share initialization routine. cf: `class`:test_deposit_update.DepositReplaceExistingDataTest `class`:test_deposit_update.DepositUpdateDepositWithNewDataTest `class`:test_deposit_update.DepositUpdateFailuresTest `class`:test_deposit_delete.DepositDeleteTest """ def setUp(self): super().setUp() self.atom_entry_data0 = b""" some-external-id https://hal-test.archives-ouvertes.fr/some-external-id """ self.atom_entry_data1 = b""" anotherthing https://hal-test.archives-ouvertes.fr/anotherthing """ self.atom_entry_data2 = b""" Awesome Compiler urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 1785io25c695 2017-10-07T15:17:08Z some awesome author https://hal-test.archives-ouvertes.fr/id """ self.codemeta_entry_data0 = b""" Awesome Compiler https://hal-test.archives-ouvertes.fr/1785io25c695 urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 1785io25c695 2017-10-07T15:17:08Z some awesome author description key-word 1 """ self.codemeta_entry_data1 = b""" Composing a Web of Audio Applications hal hal-01243065 hal-01243065 https://hal-test.archives-ouvertes.fr/hal-01243065 test DSP programming,Web 2017-05-03T16:08:47+02:00 this is the description 1 phpstorm stable php python C GNU General Public License v3.0 only CeCILL Free Software License Agreement v1.1 HAL hal@ccsd.cnrs.fr Morane Gruenpeter """ def create_invalid_deposit(self, external_id='some-external-id-1'): url = reverse(COL_IRI, args=[self.collection.name]) data = b'some data which is clearly not a zip file' md5sum = hashlib.md5(data).hexdigest() # when response = self.client.post( url, content_type='application/zip', # as zip data=data, # + headers CONTENT_LENGTH=len(data), # other headers needs HTTP_ prefix to be taken into account HTTP_SLUG=external_id, HTTP_CONTENT_MD5=md5sum, HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') response_content = parse_xml(BytesIO(response.content)) deposit_id = response_content[ '{http://www.w3.org/2005/Atom}deposit_id'] return deposit_id def create_deposit_with_status( self, status, external_id='some-external-id-1', swh_id=None): deposit_id = self.create_invalid_deposit(external_id) # We cannot create some form of deposit with a given status in # test context ('rejected' for example). As flipped off the # checks in the configuration so all deposits have the status # deposited). Update in place the deposit with such # status deposit = Deposit.objects.get(pk=deposit_id) deposit.status = status if swh_id: deposit.swh_id = swh_id deposit.save() return deposit_id def create_simple_deposit_partial(self, external_id='some-external-id'): """Create a simple deposit (1 request) in `partial` state and returns its new identifier. Returns: deposit id """ response = self.client.post( reverse(COL_IRI, args=[self.collection.name]), content_type='application/atom+xml;type=entry', data=self.atom_entry_data0, HTTP_SLUG=external_id, HTTP_IN_PROGRESS='true') assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) deposit_id = response_content[ '{http://www.w3.org/2005/Atom}deposit_id'] return deposit_id def create_deposit_partial_with_data_in_args(self, data): """Create a simple deposit (1 request) in `partial` state with the data or metadata as an argument and returns its new identifier. Args: data: atom entry Returns: deposit id """ response = self.client.post( reverse(COL_IRI, args=[self.collection.name]), content_type='application/atom+xml;type=entry', data=data, HTTP_SLUG='external-id', HTTP_IN_PROGRESS='true') assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) deposit_id = response_content[ '{http://www.w3.org/2005/Atom}deposit_id'] return deposit_id def _update_deposit_with_status(self, deposit_id, status_partial=False): """Add to a given deposit another archive and update its current status to `deposited` (by default). Returns: deposit id """ # when response = self.client.post( reverse(EDIT_SE_IRI, args=[self.collection.name, deposit_id]), content_type='application/atom+xml;type=entry', data=self.atom_entry_data1, HTTP_SLUG='external-id', HTTP_IN_PROGRESS=status_partial) # then assert response.status_code == status.HTTP_201_CREATED return deposit_id def create_deposit_ready(self, external_id='some-external-id'): """Create a complex deposit (2 requests) in status `deposited`. """ deposit_id = self.create_simple_deposit_partial( external_id=external_id) deposit_id = self._update_deposit_with_status(deposit_id) return deposit_id def create_deposit_partial(self, external_id='some-external-id'): """Create a complex deposit (2 requests) in status `partial`. """ deposit_id = self.create_simple_deposit_partial( external_id=external_id) deposit_id = self._update_deposit_with_status( deposit_id, status_partial=True) return deposit_id def add_metadata_to_deposit(self, deposit_id, status_partial=False): """Add metadata to deposit. """ # when response = self.client.post( reverse(EDIT_SE_IRI, args=[self.collection.name, deposit_id]), content_type='application/atom+xml;type=entry', data=self.codemeta_entry_data1, HTTP_SLUG='external-id', HTTP_IN_PROGRESS=status_partial) assert response.status_code == status.HTTP_201_CREATED # then deposit = Deposit.objects.get(pk=deposit_id) assert deposit is not None deposit_requests = DepositRequest.objects.filter(deposit=deposit) assert deposit_requests is not [] for dr in deposit_requests: if dr.type.name == 'metadata': assert deposit_requests[0].metadata is not {} return deposit_id diff --git a/version.txt b/version.txt index ad1aa90e..e94b88f2 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -v0.0.47-0-g43dcea8 \ No newline at end of file +v0.0.48-0-ge79c30a \ No newline at end of file