diff --git a/PKG-INFO b/PKG-INFO index be38b0cf..ca77c3c2 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,94 +1,98 @@ Metadata-Version: 2.1 Name: swh.deposit -Version: 0.13.7 +Version: 0.14.0 Summary: Software Heritage Deposit Server Home-page: https://forge.softwareheritage.org/source/swh-deposit/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-deposit Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-deposit/ -Description: Software Heritage - Deposit - =========================== - - Simple Web-Service Offering Repository Deposit (S.W.O.R.D) is an interoperability - standard for digital file deposit. - - This repository is both the `SWORD v2`_ Server and a deposit command-line client - implementations. - - This implementation allows interaction between a client (a repository) and a server (SWH - repository) to deposit software source code archives and associated metadata. - - Description - ----------- - - Most of the software source code artifacts present in the SWH Archive are gathered by - the mean of :term:`loader ` workers run by the SWH project from sourve code - origins identified by :term:`lister ` workers. This is a pull mechanism: it's - the responsibility of the SWH project to gather and collect source code artifacts that - way. - - Alternatively, SWH allows its partners to push source code artifacts and metadata - directly into the Archive with a push-based mechanism. By using this possibility - different actors, holding software artifacts or metadata, can preserve their assets - without having to pass through an intermediate collaborative development platform, which - is already harvested by SWH (e.g GitHub, Gitlab, etc.). - - This mechanism is the `deposit`. - - The main idea is the deposit is an authenticated access to an API allowing the user to - provide source code artifacts -- with metadata -- to be ingested in the SWH Archive. The - result of that is a :ref:`SWHID ` that can be used to uniquely - and persistently identify that very piece of source code. - - This unique identifier can then be used to `reference the source code - `_ (e.g. in a `scientific paper - `_) and - retrieve it using the :ref:`vault ` feature of the SWH Archive platform. - - The differences between a piece of code uploaded using the deposit rather than simply - asking SWH to archive a repository using the `save code now - `_ feature are: - - - a deposited artifact is provided from one of the SWH partners which is regarded as a - trusted authority, - - a deposited artifact requires metadata properties describing the source code artifact, - - a deposited artifact has a codemeta_ metadata entry attached to it, - - a deposited artifact has the same visibility on the SWH Archive than a collected - repository, - - a deposited artifact can be searched with its provided url property on the SWH - Archive, - - the deposit API uses the `SWORD v2`_ API, thus requires some tooling to send deposits - to SWH. These tools are provided with this repository. - - See the :ref:`deposit-user-manual` page for more details on how to use the deposit client - command line tools to push a deposit in the SWH Archive. - - See the :ref:`deposit-api-specifications` reference pages of the SWORDv2 API implementation - in `swh.deposit` if you want to do upload deposits using HTTP requests. - - Read the :ref:`deposit-metadata` chapter to get more details on what metadata - are supported when doing a deposit. - - See :ref:`swh-deposit-dev-env` if you want to hack the code of the `swh.deposit` module. - - See :ref:`swh-deposit-prod-env` if you want to deploy your own copy of the - `swh.deposit` stack. - - - .. _codemeta: https://codemeta.github.io/ - .. _`SWORD v2`: http://swordapp.org/sword-v2/ - Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Requires-Python: >=3.7 Description-Content-Type: text/markdown Provides-Extra: testing Provides-Extra: server +License-File: LICENSE +License-File: AUTHORS + +Software Heritage - Deposit +=========================== + +Simple Web-Service Offering Repository Deposit (S.W.O.R.D) is an interoperability +standard for digital file deposit. + +This repository is both the `SWORD v2`_ Server and a deposit command-line client +implementations. + +This implementation allows interaction between a client (a repository) and a server (SWH +repository) to deposit software source code archives and associated metadata. + +Description +----------- + +Most of the software source code artifacts present in the SWH Archive are gathered by +the mean of :term:`loader ` workers run by the SWH project from sourve code +origins identified by :term:`lister ` workers. This is a pull mechanism: it's +the responsibility of the SWH project to gather and collect source code artifacts that +way. + +Alternatively, SWH allows its partners to push source code artifacts and metadata +directly into the Archive with a push-based mechanism. By using this possibility +different actors, holding software artifacts or metadata, can preserve their assets +without having to pass through an intermediate collaborative development platform, which +is already harvested by SWH (e.g GitHub, Gitlab, etc.). + +This mechanism is the `deposit`. + +The main idea is the deposit is an authenticated access to an API allowing the user to +provide source code artifacts -- with metadata -- to be ingested in the SWH Archive. The +result of that is a :ref:`SWHID ` that can be used to uniquely +and persistently identify that very piece of source code. + +This unique identifier can then be used to `reference the source code +`_ (e.g. in a `scientific paper +`_) and +retrieve it using the :ref:`vault ` feature of the SWH Archive platform. + +The differences between a piece of code uploaded using the deposit rather than simply +asking SWH to archive a repository using the `save code now +`_ feature are: + +- a deposited artifact is provided from one of the SWH partners which is regarded as a + trusted authority, +- a deposited artifact requires metadata properties describing the source code artifact, +- a deposited artifact has a codemeta_ metadata entry attached to it, +- a deposited artifact has the same visibility on the SWH Archive than a collected + repository, +- a deposited artifact can be searched with its provided url property on the SWH + Archive, +- the deposit API uses the `SWORD v2`_ API, thus requires some tooling to send deposits + to SWH. These tools are provided with this repository. + +See the :ref:`deposit-user-manual` page for more details on how to use the deposit client +command line tools to push a deposit in the SWH Archive. + +See the :ref:`deposit-api-specifications` reference pages of the SWORDv2 API implementation +in `swh.deposit` if you want to do upload deposits using HTTP requests. + +Read the :ref:`deposit-metadata` chapter to get more details on what metadata +are supported when doing a deposit. + +See :ref:`swh-deposit-dev-env` if you want to hack the code of the `swh.deposit` module. + +See :ref:`swh-deposit-prod-env` if you want to deploy your own copy of the +`swh.deposit` stack. + + +.. _codemeta: https://codemeta.github.io/ +.. _`SWORD v2`: http://swordapp.org/sword-v2/ + + diff --git a/swh.deposit.egg-info/PKG-INFO b/swh.deposit.egg-info/PKG-INFO index be38b0cf..ca77c3c2 100644 --- a/swh.deposit.egg-info/PKG-INFO +++ b/swh.deposit.egg-info/PKG-INFO @@ -1,94 +1,98 @@ Metadata-Version: 2.1 Name: swh.deposit -Version: 0.13.7 +Version: 0.14.0 Summary: Software Heritage Deposit Server Home-page: https://forge.softwareheritage.org/source/swh-deposit/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-deposit Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-deposit/ -Description: Software Heritage - Deposit - =========================== - - Simple Web-Service Offering Repository Deposit (S.W.O.R.D) is an interoperability - standard for digital file deposit. - - This repository is both the `SWORD v2`_ Server and a deposit command-line client - implementations. - - This implementation allows interaction between a client (a repository) and a server (SWH - repository) to deposit software source code archives and associated metadata. - - Description - ----------- - - Most of the software source code artifacts present in the SWH Archive are gathered by - the mean of :term:`loader ` workers run by the SWH project from sourve code - origins identified by :term:`lister ` workers. This is a pull mechanism: it's - the responsibility of the SWH project to gather and collect source code artifacts that - way. - - Alternatively, SWH allows its partners to push source code artifacts and metadata - directly into the Archive with a push-based mechanism. By using this possibility - different actors, holding software artifacts or metadata, can preserve their assets - without having to pass through an intermediate collaborative development platform, which - is already harvested by SWH (e.g GitHub, Gitlab, etc.). - - This mechanism is the `deposit`. - - The main idea is the deposit is an authenticated access to an API allowing the user to - provide source code artifacts -- with metadata -- to be ingested in the SWH Archive. The - result of that is a :ref:`SWHID ` that can be used to uniquely - and persistently identify that very piece of source code. - - This unique identifier can then be used to `reference the source code - `_ (e.g. in a `scientific paper - `_) and - retrieve it using the :ref:`vault ` feature of the SWH Archive platform. - - The differences between a piece of code uploaded using the deposit rather than simply - asking SWH to archive a repository using the `save code now - `_ feature are: - - - a deposited artifact is provided from one of the SWH partners which is regarded as a - trusted authority, - - a deposited artifact requires metadata properties describing the source code artifact, - - a deposited artifact has a codemeta_ metadata entry attached to it, - - a deposited artifact has the same visibility on the SWH Archive than a collected - repository, - - a deposited artifact can be searched with its provided url property on the SWH - Archive, - - the deposit API uses the `SWORD v2`_ API, thus requires some tooling to send deposits - to SWH. These tools are provided with this repository. - - See the :ref:`deposit-user-manual` page for more details on how to use the deposit client - command line tools to push a deposit in the SWH Archive. - - See the :ref:`deposit-api-specifications` reference pages of the SWORDv2 API implementation - in `swh.deposit` if you want to do upload deposits using HTTP requests. - - Read the :ref:`deposit-metadata` chapter to get more details on what metadata - are supported when doing a deposit. - - See :ref:`swh-deposit-dev-env` if you want to hack the code of the `swh.deposit` module. - - See :ref:`swh-deposit-prod-env` if you want to deploy your own copy of the - `swh.deposit` stack. - - - .. _codemeta: https://codemeta.github.io/ - .. _`SWORD v2`: http://swordapp.org/sword-v2/ - Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Requires-Python: >=3.7 Description-Content-Type: text/markdown Provides-Extra: testing Provides-Extra: server +License-File: LICENSE +License-File: AUTHORS + +Software Heritage - Deposit +=========================== + +Simple Web-Service Offering Repository Deposit (S.W.O.R.D) is an interoperability +standard for digital file deposit. + +This repository is both the `SWORD v2`_ Server and a deposit command-line client +implementations. + +This implementation allows interaction between a client (a repository) and a server (SWH +repository) to deposit software source code archives and associated metadata. + +Description +----------- + +Most of the software source code artifacts present in the SWH Archive are gathered by +the mean of :term:`loader ` workers run by the SWH project from sourve code +origins identified by :term:`lister ` workers. This is a pull mechanism: it's +the responsibility of the SWH project to gather and collect source code artifacts that +way. + +Alternatively, SWH allows its partners to push source code artifacts and metadata +directly into the Archive with a push-based mechanism. By using this possibility +different actors, holding software artifacts or metadata, can preserve their assets +without having to pass through an intermediate collaborative development platform, which +is already harvested by SWH (e.g GitHub, Gitlab, etc.). + +This mechanism is the `deposit`. + +The main idea is the deposit is an authenticated access to an API allowing the user to +provide source code artifacts -- with metadata -- to be ingested in the SWH Archive. The +result of that is a :ref:`SWHID ` that can be used to uniquely +and persistently identify that very piece of source code. + +This unique identifier can then be used to `reference the source code +`_ (e.g. in a `scientific paper +`_) and +retrieve it using the :ref:`vault ` feature of the SWH Archive platform. + +The differences between a piece of code uploaded using the deposit rather than simply +asking SWH to archive a repository using the `save code now +`_ feature are: + +- a deposited artifact is provided from one of the SWH partners which is regarded as a + trusted authority, +- a deposited artifact requires metadata properties describing the source code artifact, +- a deposited artifact has a codemeta_ metadata entry attached to it, +- a deposited artifact has the same visibility on the SWH Archive than a collected + repository, +- a deposited artifact can be searched with its provided url property on the SWH + Archive, +- the deposit API uses the `SWORD v2`_ API, thus requires some tooling to send deposits + to SWH. These tools are provided with this repository. + +See the :ref:`deposit-user-manual` page for more details on how to use the deposit client +command line tools to push a deposit in the SWH Archive. + +See the :ref:`deposit-api-specifications` reference pages of the SWORDv2 API implementation +in `swh.deposit` if you want to do upload deposits using HTTP requests. + +Read the :ref:`deposit-metadata` chapter to get more details on what metadata +are supported when doing a deposit. + +See :ref:`swh-deposit-dev-env` if you want to hack the code of the `swh.deposit` module. + +See :ref:`swh-deposit-prod-env` if you want to deploy your own copy of the +`swh.deposit` stack. + + +.. _codemeta: https://codemeta.github.io/ +.. _`SWORD v2`: http://swordapp.org/sword-v2/ + + diff --git a/swh.deposit.egg-info/SOURCES.txt b/swh.deposit.egg-info/SOURCES.txt index a1d5a9e2..3dd9aab5 100644 --- a/swh.deposit.egg-info/SOURCES.txt +++ b/swh.deposit.egg-info/SOURCES.txt @@ -1,277 +1,286 @@ .gitignore .pre-commit-config.yaml AUTHORS CODE_OF_CONDUCT.md CONTRIBUTORS LICENSE MANIFEST.in Makefile Makefile.local README.rst conftest.py mypy.ini pyproject.toml pytest.ini requirements-server.txt requirements-swh-server.txt requirements-swh.txt requirements-test.txt requirements.txt setup.cfg setup.py tox.ini bin/Makefile bin/content.sh bin/create_deposit.sh bin/create_deposit_atom.sh bin/create_deposit_with_metadata.sh bin/default-setup bin/download-deposit-archive.sh bin/home.sh bin/replace-deposit-archive.sh bin/service-document.sh bin/status.sh bin/update-deposit-with-another-archive.sh bin/update-status.sh docs/.gitignore docs/Makefile docs/README.rst docs/cli.rst docs/conf.py docs/index.rst docs/metadata.rst docs/spec-api.rst docs/user-manual.rst docs/_static/.placeholder docs/_templates/.placeholder docs/api/api-documentation.rst docs/api/index.rst docs/api/metadata.rst docs/api/use-cases.rst docs/api/user-manual.rst docs/endpoints/collection.rst docs/endpoints/content.rst docs/endpoints/service-document.rst docs/endpoints/status.rst docs/endpoints/update-media.rst docs/endpoints/update-metadata.rst docs/images/.gitignore docs/images/Makefile docs/images/deposit-authentication-basic.uml docs/images/deposit-authentication-keycloak.uml docs/images/deposit-create-chart.uml docs/images/deposit-delete-chart.uml docs/images/deposit-update-chart.uml docs/images/deposit-workflow-checking.uml docs/images/deposit-workflow-loading.uml docs/images/deposit-workflow-reception.uml docs/images/status.uml docs/internals/authentication.rst docs/internals/dev-environment.rst docs/internals/index.rst docs/internals/loading-workflow.rst docs/internals/prod-environment.rst docs/specs/blueprint.rst docs/specs/index.rst docs/specs/metadata_example.xml docs/specs/protocol-reference.rst docs/specs/spec-loading.rst docs/specs/spec-meta-deposit.rst docs/specs/swh.xsd resources/deposit/server.yml swh/__init__.py swh.deposit.egg-info/PKG-INFO swh.deposit.egg-info/SOURCES.txt swh.deposit.egg-info/dependency_links.txt swh.deposit.egg-info/entry_points.txt swh.deposit.egg-info/requires.txt swh.deposit.egg-info/top_level.txt swh/deposit/__init__.py swh/deposit/apps.py swh/deposit/auth.py swh/deposit/client.py swh/deposit/config.py swh/deposit/errors.py swh/deposit/exception.py swh/deposit/gunicorn_config.py swh/deposit/manage.py swh/deposit/models.py swh/deposit/parsers.py swh/deposit/py.typed swh/deposit/urls.py swh/deposit/utils.py swh/deposit/api/__init__.py swh/deposit/api/checks.py swh/deposit/api/collection.py swh/deposit/api/common.py swh/deposit/api/content.py swh/deposit/api/converters.py swh/deposit/api/edit.py swh/deposit/api/edit_media.py swh/deposit/api/service_document.py swh/deposit/api/state.py swh/deposit/api/sword_edit.py swh/deposit/api/urls.py +swh/deposit/api/utils.py swh/deposit/api/private/__init__.py swh/deposit/api/private/deposit_check.py swh/deposit/api/private/deposit_list.py swh/deposit/api/private/deposit_read.py swh/deposit/api/private/deposit_update_status.py swh/deposit/api/private/urls.py swh/deposit/cli/__init__.py swh/deposit/cli/admin.py swh/deposit/cli/client.py swh/deposit/fixtures/__init__.py swh/deposit/fixtures/deposit_data.yaml swh/deposit/loader/__init__.py swh/deposit/loader/checker.py swh/deposit/loader/tasks.py swh/deposit/migrations/0001_initial.py swh/deposit/migrations/0002_depositrequest_archive.py swh/deposit/migrations/0003_temporaryarchive.py swh/deposit/migrations/0004_delete_temporaryarchive.py swh/deposit/migrations/0005_auto_20171019_1436.py swh/deposit/migrations/0006_depositclient_url.py swh/deposit/migrations/0007_auto_20171129_1609.py swh/deposit/migrations/0008_auto_20171130_1513.py swh/deposit/migrations/0009_deposit_parent.py swh/deposit/migrations/0010_auto_20180110_0953.py swh/deposit/migrations/0011_auto_20180115_1510.py swh/deposit/migrations/0012_deposit_status_detail.py swh/deposit/migrations/0013_depositrequest_raw_metadata.py swh/deposit/migrations/0014_auto_20180720_1221.py swh/deposit/migrations/0015_depositrequest_typemigration.py swh/deposit/migrations/0016_auto_20190507_1408.py swh/deposit/migrations/0017_auto_20190925_0906.py swh/deposit/migrations/0018_migrate_swhids.py swh/deposit/migrations/0019_auto_20200519_1035.py swh/deposit/migrations/0020_auto_20200929_0855.py swh/deposit/migrations/0021_deposit_origin_url_20201124_1438.py swh/deposit/migrations/__init__.py swh/deposit/settings/__init__.py swh/deposit/settings/common.py swh/deposit/settings/development.py swh/deposit/settings/production.py swh/deposit/settings/testing.py swh/deposit/static/robots.txt swh/deposit/static/css/bootstrap-responsive.min.css swh/deposit/static/css/style.css swh/deposit/static/img/arrow-up-small.png swh/deposit/static/img/swh-logo-deposit.png swh/deposit/static/img/swh-logo-deposit.svg swh/deposit/static/img/icons/swh-logo-32x32.png swh/deposit/static/img/icons/swh-logo-deposit-180x180.png swh/deposit/static/img/icons/swh-logo-deposit-192x192.png swh/deposit/static/img/icons/swh-logo-deposit-270x270.png swh/deposit/templates/__init__.py swh/deposit/templates/api.html swh/deposit/templates/homepage.html swh/deposit/templates/layout.html swh/deposit/templates/deposit/__init__.py +swh/deposit/templates/deposit/collection_list.xml swh/deposit/templates/deposit/content.xml swh/deposit/templates/deposit/deposit_info.xml swh/deposit/templates/deposit/deposit_receipt.xml swh/deposit/templates/deposit/error.xml swh/deposit/templates/deposit/service_document.xml swh/deposit/templates/deposit/state.xml swh/deposit/templates/rest_framework/api.html swh/deposit/tests/__init__.py swh/deposit/tests/common.py swh/deposit/tests/conftest.py swh/deposit/tests/test_backend.py +swh/deposit/tests/test_client_module.py swh/deposit/tests/test_common.py swh/deposit/tests/test_gunicorn_config.py swh/deposit/tests/test_init.py swh/deposit/tests/test_utils.py swh/deposit/tests/api/__init__.py swh/deposit/tests/api/conftest.py swh/deposit/tests/api/test_basic_auth.py swh/deposit/tests/api/test_checks.py swh/deposit/tests/api/test_collection.py swh/deposit/tests/api/test_collection_add_to_origin.py +swh/deposit/tests/api/test_collection_list.py swh/deposit/tests/api/test_collection_post_atom.py swh/deposit/tests/api/test_collection_post_binary.py swh/deposit/tests/api/test_collection_post_multipart.py swh/deposit/tests/api/test_collection_reuse_slug.py swh/deposit/tests/api/test_converters.py swh/deposit/tests/api/test_delete.py -swh/deposit/tests/api/test_deposit_list.py swh/deposit/tests/api/test_deposit_private_check.py +swh/deposit/tests/api/test_deposit_private_list.py swh/deposit/tests/api/test_deposit_private_read_archive.py swh/deposit/tests/api/test_deposit_private_read_metadata.py swh/deposit/tests/api/test_deposit_private_update_status.py swh/deposit/tests/api/test_deposit_schedule.py swh/deposit/tests/api/test_deposit_state.py swh/deposit/tests/api/test_deposit_update.py swh/deposit/tests/api/test_deposit_update_atom.py swh/deposit/tests/api/test_deposit_update_binary.py swh/deposit/tests/api/test_exception.py swh/deposit/tests/api/test_get_file.py swh/deposit/tests/api/test_keycloak_auth.py swh/deposit/tests/api/test_parsers.py swh/deposit/tests/api/test_service_document.py swh/deposit/tests/cli/__init__.py swh/deposit/tests/cli/conftest.py swh/deposit/tests/cli/test_admin.py swh/deposit/tests/cli/test_client.py swh/deposit/tests/data/archives/single-artifact-package.tar.gz swh/deposit/tests/data/atom/codemeta-sample.xml swh/deposit/tests/data/atom/entry-data-badly-formatted.xml swh/deposit/tests/data/atom/entry-data-deposit-binary.xml swh/deposit/tests/data/atom/entry-data-empty-body.xml swh/deposit/tests/data/atom/entry-data-fail-metadata-functional-checks.xml swh/deposit/tests/data/atom/entry-data-ko.xml swh/deposit/tests/data/atom/entry-data-minimal.xml swh/deposit/tests/data/atom/entry-data-no-origin-url.xml swh/deposit/tests/data/atom/entry-data-parsing-error-prone.xml swh/deposit/tests/data/atom/entry-data-with-add-to-origin.xml swh/deposit/tests/data/atom/entry-data-with-both-add-to-origin-and-external-id.xml swh/deposit/tests/data/atom/entry-data-with-both-create-origin-and-add-to-origin.xml swh/deposit/tests/data/atom/entry-data-with-origin-reference.xml swh/deposit/tests/data/atom/entry-data-with-swhid-fail-metadata-functional-checks.xml swh/deposit/tests/data/atom/entry-data-with-swhid.xml swh/deposit/tests/data/atom/entry-data0.xml swh/deposit/tests/data/atom/entry-data1.xml swh/deposit/tests/data/atom/entry-data2.xml swh/deposit/tests/data/atom/entry-data3.xml +swh/deposit/tests/data/atom/entry-list-deposits-page1.xml +swh/deposit/tests/data/atom/entry-list-deposits-page2.xml +swh/deposit/tests/data/atom/entry-list-deposits.xml swh/deposit/tests/data/atom/entry-only-create-origin.xml swh/deposit/tests/data/atom/entry-update-in-place.xml swh/deposit/tests/data/atom/error-cli.xml swh/deposit/tests/data/atom/error-with-decimal.xml swh/deposit/tests/data/atom/error-with-external-identifier-and-create-origin.xml swh/deposit/tests/data/atom/error-with-external-identifier.xml swh/deposit/tests/data/atom/error-with-reference-and-create-origin.xml swh/deposit/tests/data/atom/metadata.xml swh/deposit/tests/data/https_deposit.swh.test/1_servicedocument swh/deposit/tests/data/https_deposit.swh.test/1_test +swh/deposit/tests/data/https_deposit.test.list/1_servicedocument +swh/deposit/tests/data/https_deposit.test.list/1_test,page=1,page_size=10 swh/deposit/tests/data/https_deposit.test.metadata/1_servicedocument swh/deposit/tests/data/https_deposit.test.metadata/1_test swh/deposit/tests/data/https_deposit.test.metadata/1_test_666_media swh/deposit/tests/data/https_deposit.test.metadata/1_test_666_metadata swh/deposit/tests/data/https_deposit.test.metadata/1_test_666_status swh/deposit/tests/data/https_deposit.test.metadataonly/1_servicedocument swh/deposit/tests/data/https_deposit.test.metadataonly/1_test swh/deposit/tests/data/https_deposit.test.status/1_servicedocument swh/deposit/tests/data/https_deposit.test.status/1_test_1033_status swh/deposit/tests/data/https_deposit.test.updateswhid/1_servicedocument swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_123_atom swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_123_status swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_321_status swh/deposit/tests/loader/__init__.py swh/deposit/tests/loader/common.py swh/deposit/tests/loader/conftest.py swh/deposit/tests/loader/test_checker.py swh/deposit/tests/loader/test_client.py swh/deposit/tests/loader/test_tasks.py swh/deposit/tests/loader/data/http_example.org/hello.json swh/deposit/tests/loader/data/http_example.org/hello_you swh/deposit/tests/loader/data/https_deposit.softwareheritage.org/1_private_test_1_check swh/deposit/tests/loader/data/https_deposit.softwareheritage.org/1_private_test_2_check swh/deposit/tests/loader/data/https_deposit.softwareheritage.org/1_private_test_999_meta swh/deposit/tests/loader/data/https_deposit.softwareheritage.org/1_private_test_999_raw swh/deposit/tests/loader/data/https_deposit.softwareheritage.org/1_private_test_999_update swh/deposit/tests/loader/data/https_nowhere.org/1_private_test_1_check swh/deposit/tests/loader/data/https_nowhere.org/1_private_test_1_metadata swh/deposit/tests/loader/data/https_nowhere.org/1_private_test_1_raw swh/deposit/tests_migration/__init__.py swh/deposit/tests_migration/test_migrations.py \ No newline at end of file diff --git a/swh/deposit/api/collection.py b/swh/deposit/api/collection.py index 47f3d291..d66ee47d 100644 --- a/swh/deposit/api/collection.py +++ b/swh/deposit/api/collection.py @@ -1,135 +1,175 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from typing import Optional, Tuple +from django.shortcuts import render from rest_framework import status +from rest_framework.generics import ListAPIView from ..config import DEPOSIT_STATUS_LOAD_SUCCESS, EDIT_IRI from ..models import Deposit from ..parsers import ( SWHAtomEntryParser, SWHFileUploadTarParser, SWHFileUploadZipParser, SWHMultiPartParser, ) from .common import ( ACCEPT_ARCHIVE_CONTENT_TYPES, APIPost, ParsedRequestHeaders, Receipt, get_collection_by_name, ) +from .utils import DefaultPagination, DepositSerializer -class CollectionAPI(APIPost): +class CollectionAPI(ListAPIView, APIPost): """Deposit request class defining api endpoints for sword deposit. What's known as 'Col-IRI' in the sword specification. - HTTP verbs supported: POST + HTTP verbs supported: GET and POST """ parser_classes = ( SWHMultiPartParser, SWHFileUploadZipParser, SWHFileUploadTarParser, SWHAtomEntryParser, ) + serializer_class = DepositSerializer + pagination_class = DefaultPagination + + def get(self, request, *args, **kwargs): + """List the user's collection if the user has access to said collection. + + """ + self.checks(request, kwargs["collection_name"]) + paginated_result = super().get(request, *args, **kwargs) + data = paginated_result.data + # Build pagination link headers + links = [] + for link_name in ["next", "previous"]: + link = data.get(link_name) + if link is None: + continue + links.append(f'<{link}>; rel="{link_name}"') + response = render( + request, + "deposit/collection_list.xml", + context={ + "count": data["count"], + "results": [dict(d) for d in data["results"]], + }, + content_type="application/xml", + status=status.HTTP_200_OK, + ) + response._headers["Link"] = ",".join(links) + return response + + def get_queryset(self): + """List the deposits for the authenticated user (pagination is handled by the + `pagination_class` class attribute). + + """ + return Deposit.objects.filter(client=self.request.user.id).order_by("id") + def process_post( self, req, headers: ParsedRequestHeaders, collection_name: str, deposit: Optional[Deposit] = None, ) -> Tuple[int, str, Receipt]: """Create a first deposit as: - archive deposit (1 zip) - multipart (1 zip + 1 atom entry) - atom entry Args: req (Request): the request holding the information to parse and inject in db collection_name (str): the associated client Returns: An http response (HttpResponse) according to the situation. If everything is ok, a 201 response (created) with a deposit receipt. Raises: - archive deposit: - 400 (bad request) if the request is not providing an external identifier - 403 (forbidden) if the length of the archive exceeds the max size configured - 412 (precondition failed) if the length or hash provided mismatch the reality of the archive. - 415 (unsupported media type) if a wrong media type is provided - multipart deposit: - 400 (bad request) if the request is not providing an external identifier - 412 (precondition failed) if the potentially md5 hash provided mismatch the reality of the archive - 415 (unsupported media type) if a wrong media type is provided - Atom entry deposit: - 400 (bad request) if the request is not providing an external identifier - 400 (bad request) if the request's body is empty - 415 (unsupported media type) if a wrong media type is provided """ assert deposit is None deposit = self._deposit_create(req, collection_name, external_id=headers.slug) if req.content_type in ACCEPT_ARCHIVE_CONTENT_TYPES: receipt = self._binary_upload(req, headers, collection_name, deposit) elif req.content_type.startswith("multipart/"): receipt = self._multipart_upload(req, headers, collection_name, deposit) else: receipt = self._atom_entry(req, headers, collection_name, deposit) return status.HTTP_201_CREATED, EDIT_IRI, receipt def _deposit_create( self, request, collection_name: str, external_id: Optional[str] ) -> Deposit: collection = get_collection_by_name(collection_name) client = self.get_client(request) deposit_parent: Optional[Deposit] = None if external_id: # TODO: delete this when clients stopped relying on the slug try: # find a deposit parent (same external id, status load to success) deposit_parent = ( Deposit.objects.filter( client=client, external_id=external_id, status=DEPOSIT_STATUS_LOAD_SUCCESS, ) .order_by("-id")[0:1] .get() ) except Deposit.DoesNotExist: # then no parent for that deposit, deposit_parent already None pass return Deposit( collection=collection, external_id=external_id or "", client=client, parent=deposit_parent, ) diff --git a/swh/deposit/api/private/deposit_list.py b/swh/deposit/api/private/deposit_list.py index a5c81c12..402e269c 100644 --- a/swh/deposit/api/private/deposit_list.py +++ b/swh/deposit/api/private/deposit_list.py @@ -1,66 +1,39 @@ -# Copyright (C) 2018-2020 The Software Heritage developers +# Copyright (C) 2018-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from rest_framework import serializers -from rest_framework.fields import _UnvalidatedField from rest_framework.generics import ListAPIView -from rest_framework.pagination import PageNumberPagination + +from swh.deposit.api.utils import DefaultPagination, DepositSerializer from . import APIPrivateView from ...models import Deposit -from ..converters import convert_status_detail - - -class DefaultPagination(PageNumberPagination): - page_size = 100 - page_size_query_param = "page_size" - - -class StatusDetailField(_UnvalidatedField): - """status_detail field is a dict, we want a simple message instead. - So, we reuse the convert_status_detail from deposit_status - endpoint to that effect. - - """ - - def to_representation(self, value): - return convert_status_detail(value) - - -class DepositSerializer(serializers.ModelSerializer): - status_detail = StatusDetailField() - - class Meta: - model = Deposit - fields = "__all__" class APIList(ListAPIView, APIPrivateView): """Deposit request class to list the deposit's status per page. HTTP verbs supported: GET """ serializer_class = DepositSerializer pagination_class = DefaultPagination def get_queryset(self): params = self.request.query_params exclude_like = params.get("exclude") if exclude_like: # sql injection: A priori, nothing to worry about, django does it for # queryset # https://docs.djangoproject.com/en/3.0/topics/security/#sql-injection-protection # noqa - # https://docs.djangoproject.com/en/2.2/topics/security/#sql-injection-protection # noqa deposits = ( Deposit.objects.all() .exclude(external_id__startswith=exclude_like) .order_by("id") ) else: deposits = Deposit.objects.all().order_by("id") return deposits diff --git a/swh/deposit/api/urls.py b/swh/deposit/api/urls.py index 4287baa7..a8b7146b 100644 --- a/swh/deposit/api/urls.py +++ b/swh/deposit/api/urls.py @@ -1,76 +1,76 @@ -# Copyright (C) 2017-2020 The Software Heritage developers +# Copyright (C) 2017-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """SWH's deposit api URL Configuration """ from django.conf.urls import url from django.shortcuts import render from ..config import COL_IRI, CONT_FILE_IRI, EDIT_IRI, EM_IRI, SD_IRI, SE_IRI, STATE_IRI from .collection import CollectionAPI from .content import ContentAPI from .edit import EditAPI from .edit_media import EditMediaAPI from .service_document import ServiceDocumentAPI from .state import StateAPI from .sword_edit import SwordEditAPI def api_view(req): return render(req, "api.html") # PUBLIC API urlpatterns = [ # simple view on the api url(r"^$", api_view, name="api"), # SD IRI - Service Document IRI # -> GET url(r"^servicedocument/", ServiceDocumentAPI.as_view(), name=SD_IRI), # Col-IRI - Collection IRI # -> POST url(r"^(?P[^/]+)/$", CollectionAPI.as_view(), name=COL_IRI), # EM IRI - Atom Edit Media IRI (update archive IRI) # -> PUT (update-in-place existing archive) # -> POST (add new archive) url( r"^(?P[^/]+)/(?P[^/]+)/media/$", EditMediaAPI.as_view(), name=EM_IRI, ), # Edit IRI - Atom Entry Edit IRI (update metadata IRI) # -> PUT (update in place) # -> DELETE (delete container) url( r"^(?P[^/]+)/(?P[^/]+)/atom/$", EditAPI.as_view(), name=EDIT_IRI, ), # SE IRI - Sword Edit IRI ;; possibly same as Edit IRI # -> POST (add new metadata) url( r"^(?P[^/]+)/(?P[^/]+)/metadata/$", SwordEditAPI.as_view(), name=SE_IRI, ), # State IRI # -> GET url( r"^(?P[^/]+)/(?P[^/]+)/status/$", StateAPI.as_view(), name=STATE_IRI, ), # Cont-IRI # -> GET url( r"^(?P[^/]+)/(?P[^/]+)/content/$", ContentAPI.as_view(), name=CONT_FILE_IRI, ), # specification is not clear about # File-IRI, we assume it's the same as # the Cont-IRI one ] diff --git a/swh/deposit/api/utils.py b/swh/deposit/api/utils.py new file mode 100644 index 00000000..938d44e6 --- /dev/null +++ b/swh/deposit/api/utils.py @@ -0,0 +1,35 @@ +# Copyright (C) 2018-2021 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from rest_framework import serializers +from rest_framework.fields import _UnvalidatedField +from rest_framework.pagination import PageNumberPagination + +from swh.deposit.api.converters import convert_status_detail +from swh.deposit.models import Deposit + + +class DefaultPagination(PageNumberPagination): + page_size = 100 + page_size_query_param = "page_size" + + +class StatusDetailField(_UnvalidatedField): + """status_detail field is a dict, we want a simple message instead. + So, we reuse the convert_status_detail from deposit_status + endpoint to that effect. + + """ + + def to_representation(self, value): + return convert_status_detail(value) + + +class DepositSerializer(serializers.ModelSerializer): + status_detail = StatusDetailField() + + class Meta: + model = Deposit + fields = "__all__" diff --git a/swh/deposit/cli/client.py b/swh/deposit/cli/client.py index 1bb37351..42a3500f 100644 --- a/swh/deposit/cli/client.py +++ b/swh/deposit/cli/client.py @@ -1,575 +1,601 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from __future__ import annotations from contextlib import contextmanager from datetime import datetime, timezone import logging # WARNING: do not import unnecessary things here to keep cli startup time under # control import os import sys from typing import TYPE_CHECKING, Any, Collection, Dict, List, Optional import warnings import click from swh.deposit.cli import deposit logger = logging.getLogger(__name__) if TYPE_CHECKING: from swh.deposit.client import PublicApiDepositClient class InputError(ValueError): """Input script error """ pass @contextmanager def trap_and_report_exceptions(): """Trap and report exceptions (InputError, MaintenanceError) in a unified way. """ from swh.deposit.client import MaintenanceError try: yield except InputError as e: logger.error("Problem during parsing options: %s", e) sys.exit(1) except MaintenanceError as e: logger.error(e) sys.exit(1) def _url(url: str) -> str: """Force the /1 api version at the end of the url (avoiding confusing issues without it). Args: url (str): api url used by cli users Returns: Top level api url to actually request """ if not url.endswith("/1"): url = "%s/1" % url return url def generate_metadata( deposit_client: str, name: str, authors: List[str], external_id: Optional[str] = None, create_origin: Optional[str] = None, ) -> str: """Generate sword compliant xml metadata with the minimum required metadata. The Atom spec, https://tools.ietf.org/html/rfc4287, says that: - atom:entry elements MUST contain one or more atom:author elements - atom:entry elements MUST contain exactly one atom:title element. - atom:entry elements MUST contain exactly one atom:updated element. However, we are also using CodeMeta, so we want some basic information to be mandatory. Therefore, we generate the following mandatory fields: - http://www.w3.org/2005/Atom#updated - http://www.w3.org/2005/Atom#author - http://www.w3.org/2005/Atom#title - https://doi.org/10.5063/SCHEMA/CODEMETA-2.0#name (yes, in addition to http://www.w3.org/2005/Atom#title, even if they have somewhat the same meaning) - https://doi.org/10.5063/SCHEMA/CODEMETA-2.0#author Args: deposit_client: Deposit client username, name: Software name authors: List of author names create_origin: Origin concerned by the deposit Returns: metadata xml string """ import xmltodict # generate a metadata file with the minimum required metadata document = { "atom:entry": { "@xmlns:atom": "http://www.w3.org/2005/Atom", "@xmlns:codemeta": "https://doi.org/10.5063/SCHEMA/CODEMETA-2.0", "atom:updated": datetime.now(tz=timezone.utc), # mandatory, cf. docstring "atom:author": deposit_client, # mandatory, cf. docstring "atom:title": name, # mandatory, cf. docstring "codemeta:name": name, # mandatory, cf. docstring "codemeta:author": [ # mandatory, cf. docstring {"codemeta:name": author_name} for author_name in authors ], }, } if external_id: document["atom:entry"]["codemeta:identifier"] = external_id if create_origin: document["atom:entry"][ "@xmlns:swh" ] = "https://www.softwareheritage.org/schema/2018/deposit" document["atom:entry"]["swh:deposit"] = { "swh:create_origin": {"swh:origin": {"@url": create_origin}} } logging.debug("Atom entry dict to generate as xml: %s", document) return xmltodict.unparse(document, pretty=True) def _collection(client: PublicApiDepositClient) -> str: """Retrieve the client's collection """ # retrieve user's collection sd_content = client.service_document() if "error" in sd_content: msg = sd_content["error"] raise InputError(f"Service document retrieval: {msg}") collection = sd_content["app:service"]["app:workspace"]["app:collection"][ "sword:name" ] return collection def client_command_parse_input( client, username: str, archive: Optional[str], metadata: Optional[str], collection: Optional[str], slug: Optional[str], create_origin: Optional[str], partial: bool, deposit_id: Optional[int], swhid: Optional[str], replace: bool, url: str, name: Optional[str], authors: List[str], temp_dir: str, ) -> Dict[str, Any]: """Parse the client subcommand options and make sure the combination is acceptable*. If not, an InputError exception is raised explaining the issue. By acceptable, we mean: - A multipart deposit (create or update) requires: - an existing software archive - an existing metadata file or author(s) and name provided in params - A binary deposit (create/update) requires an existing software archive - A metadata deposit (create/update) requires an existing metadata file or author(s) and name provided in params - A deposit update requires a deposit_id This will not prevent all failure cases though. The remaining errors are already dealt with by the underlying api client. Raises: InputError explaining the user input related issue MaintenanceError explaining the api status Returns: dict with the following keys: "archive": the software archive to deposit "username": username "metadata": the metadata file to deposit "collection": the user's collection under which to put the deposit "create_origin": the origin concerned by the deposit "in_progress": if the deposit is partial or not "url": deposit's server main entry point "deposit_id": optional deposit identifier "swhid": optional deposit swhid "replace": whether the given deposit is to be replaced or not """ if not metadata: if name and authors: metadata_path = os.path.join(temp_dir, "metadata.xml") logging.debug("Temporary file: %s", metadata_path) metadata_xml = generate_metadata( username, name, authors, external_id=slug, create_origin=create_origin ) logging.debug("Metadata xml generated: %s", metadata_xml) with open(metadata_path, "w") as f: f.write(metadata_xml) metadata = metadata_path elif archive is not None and not partial and not deposit_id: # If we meet all the following conditions: # * this is not an archive-only deposit request # * it is not part of a multipart deposit (either create/update # or finish) # * it misses either name or authors raise InputError( "For metadata deposit request, either a metadata file with " "--metadata or both --author and --name must be provided. " ) elif name or authors: # If we are generating metadata, then all mandatory metadata # must be present raise InputError( "For metadata deposit request, either a metadata file with " "--metadata or both --author and --name must be provided." ) else: # TODO: this is a multipart deposit, we might want to check that # metadata are deposited at some point pass elif name or authors or create_origin: raise InputError( "Using --metadata flag is incompatible with " "--author and --name and --create-origin (those are used to generate one " "metadata file)." ) if not archive and not metadata: raise InputError( "Please provide an actionable command. See --help for more information" ) if metadata: from swh.deposit.utils import parse_xml metadata_raw = open(metadata, "r").read() metadata_dict = parse_xml(metadata_raw).get("swh:deposit", {}) if ( "swh:create_origin" not in metadata_dict and "swh:add_to_origin" not in metadata_dict ): logger.warning( "The metadata file provided should contain " '"" or "" tag', ) if replace and not deposit_id: raise InputError("To update an existing deposit, you must provide its id") if not collection: collection = _collection(client) return { "archive": archive, "username": username, "metadata": metadata, "collection": collection, "slug": slug, "in_progress": partial, "url": url, "deposit_id": deposit_id, "swhid": swhid, "replace": replace, } def _subdict(d: Dict[str, Any], keys: Collection[str]) -> Dict[str, Any]: "return a dict from d with only given keys" return {k: v for k, v in d.items() if k in keys} def credentials_decorator(f): """Add default --url, --username and --password flag to cli. """ f = click.option( "--password", required=True, help="(Mandatory) User's associated password" )(f) f = click.option("--username", required=True, help="(Mandatory) User's name")(f) f = click.option( "--url", default="https://deposit.softwareheritage.org", help=( "(Optional) Deposit server api endpoint. By default, " "https://deposit.softwareheritage.org/1" ), )(f) return f def output_format_decorator(f): """Add --format output flag decorator to cli. """ return click.option( "-f", "--format", "output_format", default="logging", type=click.Choice(["logging", "yaml", "json"]), help="Output format results.", )(f) @deposit.command() @credentials_decorator @click.option( "--archive", type=click.Path(exists=True), help="(Optional) Software archive to deposit", ) @click.option( "--metadata", type=click.Path(exists=True), help=( "(Optional) Path to xml metadata file. If not provided, " "this will use a file named .metadata.xml" ), ) @click.option( "--archive-deposit/--no-archive-deposit", default=False, help="Deprecated (ignored)", ) @click.option( "--metadata-deposit/--no-metadata-deposit", default=False, help="Deprecated (ignored)", ) @click.option( "--collection", help="(Optional) User's collection. If not provided, this will be fetched.", ) @click.option( "--slug", help=( "(Deprecated) (Optional) External system information identifier. " "If not provided, it will be generated" ), ) @click.option( "--create-origin", help=( "(Optional) Origin url to attach information to. To be used alongside " "--name and --author. This will be generated alongside the metadata to " "provide to the deposit server." ), ) @click.option( "--partial/--no-partial", default=False, help=( "(Optional) The deposit will be partial, other deposits " "will have to take place to finalize it." ), ) @click.option( "--deposit-id", default=None, help="(Optional) Update an existing partial deposit with its identifier", ) @click.option( "--swhid", default=None, help="(Optional) Update existing completed deposit (status done) with new metadata", ) @click.option( "--replace/--no-replace", default=False, help="(Optional) Update by replacing existing metadata to a deposit", ) @click.option("--verbose/--no-verbose", default=False, help="Verbose mode") @click.option("--name", help="Software name") @click.option( "--author", multiple=True, help="Software author(s), this can be repeated as many times" " as there are authors", ) @output_format_decorator @click.pass_context def upload( ctx, username: str, password: str, archive: Optional[str], metadata: Optional[str], archive_deposit: bool, metadata_deposit: bool, collection: Optional[str], slug: Optional[str], create_origin: Optional[str], partial: bool, deposit_id: Optional[int], swhid: Optional[str], replace: bool, url: str, verbose: bool, name: Optional[str], author: List[str], output_format: Optional[str], ): """Software Heritage Public Deposit Client Create/Update deposit through the command line. More documentation can be found at https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html. """ import tempfile from swh.deposit.client import PublicApiDepositClient if archive_deposit or metadata_deposit: warnings.warn( '"archive_deposit" and "metadata_deposit" option arguments are ' "deprecated and have no effect; simply do not provide the archive " "for a metadata-only deposit, and do not provide a metadata for a" "archive-only deposit.", DeprecationWarning, ) if slug: if create_origin and slug != create_origin: raise InputError( '"--slug" flag has been deprecated in favor of "--create-origin" flag. ' "You mentioned both with different values, please only " 'use "--create-origin".' ) warnings.warn( '"--slug" flag has been deprecated in favor of "--create-origin" flag. ' 'Please, start using "--create-origin" instead of "--slug"', DeprecationWarning, ) url = _url(url) client = PublicApiDepositClient(url=url, auth=(username, password)) with tempfile.TemporaryDirectory() as temp_dir: with trap_and_report_exceptions(): logger.debug("Parsing cli options") config = client_command_parse_input( client, username, archive, metadata, collection, slug, create_origin, partial, deposit_id, swhid, replace, url, name, author, temp_dir, ) if verbose: logger.info("Parsed configuration: %s", config) keys = [ "archive", "collection", "in_progress", "metadata", "slug", ] if config["deposit_id"]: keys += ["deposit_id", "replace", "swhid"] data = client.deposit_update(**_subdict(config, keys)) else: data = client.deposit_create(**_subdict(config, keys)) print_result(data, output_format) @deposit.command() @credentials_decorator @click.option("--deposit-id", default=None, required=True, help="Deposit identifier.") @output_format_decorator @click.pass_context def status(ctx, url, username, password, deposit_id, output_format): """Deposit's status """ from swh.deposit.client import PublicApiDepositClient url = _url(url) logger.debug("Status deposit") with trap_and_report_exceptions(): - client = PublicApiDepositClient(url=url, auth=(username, password)) + client = PublicApiDepositClient(url=_url(url), auth=(username, password)) collection = _collection(client) print_result( client.deposit_status(collection=collection, deposit_id=deposit_id), output_format, ) def print_result(data: Dict[str, Any], output_format: Optional[str]) -> None: """Display the result data into a dedicated output format. """ import json import yaml if output_format == "json": click.echo(json.dumps(data)) elif output_format == "yaml": click.echo(yaml.dump(data)) else: logger.info(data) @deposit.command("metadata-only") @credentials_decorator @click.option( "--metadata", "metadata_path", type=click.Path(exists=True), required=True, help="Path to xml metadata file", ) @output_format_decorator @click.pass_context def metadata_only(ctx, url, username, password, metadata_path, output_format): """Deposit metadata only upload """ from swh.deposit.client import PublicApiDepositClient from swh.deposit.utils import parse_swh_reference, parse_xml # Parse to check for a swhid presence within the metadata file with open(metadata_path, "r") as f: metadata_raw = f.read() actual_swhid = parse_swh_reference(parse_xml(metadata_raw)) if not actual_swhid: raise InputError("A SWHID must be provided for a metadata-only deposit") with trap_and_report_exceptions(): client = PublicApiDepositClient(url=_url(url), auth=(username, password)) collection = _collection(client) result = client.deposit_metadata_only(collection, metadata_path) print_result(result, output_format) + + +@deposit.command("list") +@credentials_decorator +@output_format_decorator +@click.option( + "--page", default=1, help="Page number when requesting more information", +) +@click.option( + "--page-size", default=100, help="Page number when requesting more information", +) +@click.pass_context +def deposit_list(ctx, url, username, password, output_format, page, page_size): + """Deposit metadata only upload + + """ + from swh.deposit.client import PublicApiDepositClient + + url = _url(url) + logger.debug("List deposits for user %s", username) + with trap_and_report_exceptions(): + client = PublicApiDepositClient(url=_url(url), auth=(username, password)) + collection = _collection(client) + result = client.deposit_list(collection, page=page, page_size=page_size) + + print_result(result, output_format) diff --git a/swh/deposit/client.py b/swh/deposit/client.py index e9df16f3..50255f0f 100644 --- a/swh/deposit/client.py +++ b/swh/deposit/client.py @@ -1,748 +1,846 @@ # Copyright (C) 2017-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """Module in charge of defining an swh-deposit client """ import hashlib import logging import os from typing import Any, Dict, Optional, Tuple from urllib.parse import urljoin import warnings import requests +from requests import Response +from requests.utils import parse_header_links from swh.core.config import load_from_envvar from swh.deposit import __version__ as swh_deposit_version from swh.deposit.utils import parse_xml logger = logging.getLogger(__name__) def compute_unified_information( collection: str, in_progress: bool, slug: str, *, filepath: Optional[str] = None, swhid: Optional[str] = None, **kwargs, ) -> Dict[str, Any]: """Given a filepath, compute necessary information on that file. Args: collection: Deposit collection in_progress: do we finalize the deposit? slug: external id to use filepath: Path to the file to compute the necessary information out of swhid: Deposit swhid if any Returns: dict with keys: 'slug': external id to use 'in_progress': do we finalize the deposit? 'content-type': content type associated 'md5sum': md5 sum 'filename': filename 'filepath': filepath 'swhid': deposit swhid """ result: Dict[str, Any] = { "slug": slug, "in_progress": in_progress, "swhid": swhid, } content_type: Optional[str] = None md5sum: Optional[str] = None if filepath: filename = os.path.basename(filepath) md5sum = hashlib.md5(open(filepath, "rb").read()).hexdigest() extension = filename.split(".")[-1] if "zip" in extension: content_type = "application/zip" else: content_type = "application/x-tar" result.update( { "content-type": content_type, "md5sum": md5sum, "filename": filename, "filepath": filepath, } ) return result class MaintenanceError(ValueError): """Informational maintenance error exception """ pass def handle_deprecated_config(config: Dict) -> Tuple[str, Optional[Tuple[str, str]]]: warnings.warn( '"config" argument is deprecated, please ' 'use "url" and "auth" arguments instead; note that "auth" ' "expects now a couple (username, password) and not a dict.", DeprecationWarning, ) url: str = config["url"] auth: Optional[Tuple[str, str]] = None if config.get("auth"): auth = (config["auth"]["username"], config["auth"]["password"]) return (url, auth) class BaseApiDepositClient: """Deposit client base class """ def __init__( self, config: Optional[Dict] = None, url: Optional[str] = None, auth: Optional[Tuple[str, str]] = None, ): if not url and not config: config = load_from_envvar() if config: url, auth = handle_deprecated_config(config) # needed to help mypy not be fooled by the Optional nature of url assert url is not None self.base_url = url.strip("/") + "/" self.auth = auth self.session = requests.Session() if auth: self.session.auth = auth self.session.headers.update( {"user-agent": f"swh-deposit/{swh_deposit_version}"} ) def do(self, method, url, *args, **kwargs): """Internal method to deal with requests, possibly with basic http authentication. Args: method (str): supported http methods as in self._methods' keys Returns: The request's execution """ full_url = urljoin(self.base_url, url.lstrip("/")) return self.session.request(method, full_url, *args, **kwargs) class PrivateApiDepositClient(BaseApiDepositClient): """Private API deposit client to: - read a given deposit's archive(s) - read a given deposit's metadata - update a given deposit's status """ - def archive_get(self, archive_update_url, archive): + def archive_get(self, archive_update_url: str, archive: str) -> Optional[str]: """Retrieve the archive from the deposit to a local directory. Args: archive_update_url (str): The full deposit archive(s)'s raw content to retrieve locally archive (str): the local archive's path where to store the raw content Returns: The archive path to the local archive to load. Or None if any problem arose. """ - r = self.do("get", archive_update_url, stream=True) - if r.ok: + response = self.do("get", archive_update_url, stream=True) + if response.ok: with open(archive, "wb") as f: - for chunk in r.iter_content(): + for chunk in response.iter_content(): f.write(chunk) return archive msg = "Problem when retrieving deposit archive at %s" % (archive_update_url,) logger.error(msg) raise ValueError(msg) def metadata_get(self, metadata_url): """Retrieve the metadata information on a given deposit. Args: metadata_url (str): The full deposit metadata url to retrieve locally Returns: The dictionary of metadata for that deposit or None if any problem arose. """ r = self.do("get", metadata_url) if r.ok: return r.json() msg = "Problem when retrieving metadata at %s" % metadata_url logger.error(msg) raise ValueError(msg) def status_update( self, update_status_url, status, revision_id=None, directory_id=None, origin_url=None, ): """Update the deposit's status. Args: update_status_url (str): the full deposit's archive status (str): The status to update the deposit with revision_id (str/None): the revision's identifier to update to directory_id (str/None): the directory's identifier to update to origin_url (str/None): deposit's associated origin url """ payload = {"status": status} if revision_id: payload["revision_id"] = revision_id if directory_id: payload["directory_id"] = directory_id if origin_url: payload["origin_url"] = origin_url self.do("put", update_status_url, json=payload) def check(self, check_url): """Check the deposit's associated data (metadata, archive(s)) Args: check_url (str): the full deposit's check url """ r = self.do("get", check_url) if r.ok: data = r.json() return data["status"] msg = "Problem when checking deposit %s" % check_url logger.error(msg) raise ValueError(msg) class BaseDepositClient(BaseApiDepositClient): """Base Deposit client to access the public api. """ def __init__( self, config=None, url=None, auth=None, error_msg=None, empty_result={} ): super().__init__(url=url, auth=auth, config=config) self.error_msg = error_msg self.empty_result = empty_result def compute_url(self, *args, **kwargs): """Compute api url endpoint to query.""" raise NotImplementedError def compute_method(self, *args, **kwargs): """Http method to use on the url""" raise NotImplementedError - def parse_result_ok(self, xml_content): + def parse_result_ok( + self, xml_content: str, headers: Optional[Dict] = None + ) -> Dict[str, Any]: """Given an xml result from the api endpoint, parse it and returns a dict. """ raise NotImplementedError def compute_information(self, *args, **kwargs) -> Dict[str, Any]: """Compute some more information given the inputs (e.g http headers, ...) """ return {} - def parse_result_error(self, xml_content: bytes) -> Dict: + def parse_result_error(self, xml_content: str) -> Dict[str, Any]: """Given an error response in xml, parse it into a dict. Returns: dict with following keys: 'error': The error message 'detail': Some more detail about the error if any """ data = parse_xml(xml_content) sword_error = data["sword:error"] return { "summary": sword_error["atom:summary"], "detail": sword_error.get("detail", ""), "sword:verboseDescription": sword_error.get("sword:verboseDescription", ""), } - def do_execute(self, method, url, info): + def do_execute(self, method: str, url: str, info: Dict, **kwargs) -> Response: """Execute the http query to url using method and info information. - By default, execute a simple query to url with the http - method. Override this in daughter class to improve the - default behavior if needed. + By default, execute a simple query to url with the http method. Override this in + subclass to improve the default behavior if needed. """ - return self.do(method, url) + return self.do(method, url, **kwargs) + + def compute_params(self, **kwargs) -> Dict[str, Any]: + """Determine the params out of the kwargs""" + return {} def execute(self, *args, **kwargs) -> Dict[str, Any]: """Main endpoint to prepare and execute the http query to the api. Raises: MaintenanceError if some api maintenance is happening. Returns: Dict of computed api data """ url = self.compute_url(*args, **kwargs) method = self.compute_method(*args, **kwargs) info = self.compute_information(*args, **kwargs) + params = self.compute_params(**kwargs) try: - r = self.do_execute(method, url, info) + response = self.do_execute(method, url, info, params=params) except Exception as e: msg = self.error_msg % (url, e) - r = self.empty_result - r.update( + result = self.empty_result + result.update( {"error": msg,} ) - return r + return result else: - if r.ok: - if int(r.status_code) == 204: # 204 returns no body - return {"status": r.status_code} + if response.ok: + if int(response.status_code) == 204: # 204 returns no body + return {"status": response.status_code} else: - return self.parse_result_ok(r.text) + headers = dict(response.headers) if response.headers else None + return self.parse_result_ok(response.text, headers) else: - error = self.parse_result_error(r.text) + error = self.parse_result_error(response.text) empty = self.empty_result error.update(empty) - if r.status_code == 503: + if response.status_code == 503: summary = error.get("summary") detail = error.get("sword:verboseDescription") # Maintenance error if summary and detail: raise MaintenanceError(f"{summary}: {detail}") error.update( - {"status": r.status_code,} + {"status": response.status_code,} ) return error class ServiceDocumentDepositClient(BaseDepositClient): """Service Document information retrieval. """ def __init__(self, config=None, url=None, auth=None): super().__init__( url=url, auth=auth, config=config, error_msg="Service document failure at %s: %s", empty_result={"collection": None}, ) def compute_url(self, *args, **kwargs): return "/servicedocument/" def compute_method(self, *args, **kwargs): return "get" - def parse_result_ok(self, xml_content): + def parse_result_ok( + self, xml_content: str, headers: Optional[Dict] = None + ) -> Dict[str, Any]: """Parse service document's success response. """ return parse_xml(xml_content) - def parse_result_error(self, xml_content: bytes) -> Dict: + def parse_result_error(self, xml_content: str) -> Dict[str, Any]: result = super().parse_result_error(xml_content) return {"error": result["summary"]} class StatusDepositClient(BaseDepositClient): """Status information on a deposit. """ def __init__(self, config=None, url=None, auth=None): super().__init__( url=url, auth=auth, config=config, error_msg="Status check failure at %s: %s", empty_result={ "deposit_status": None, "deposit_status_detail": None, "deposit_swh_id": None, }, ) def compute_url(self, collection, deposit_id): return "/%s/%s/status/" % (collection, deposit_id) def compute_method(self, *args, **kwargs): return "get" - def parse_result_ok(self, xml_content): + def parse_result_ok( + self, xml_content: str, headers: Optional[Dict] = None + ) -> Dict[str, Any]: """Given an xml content as string, returns a deposit dict. """ data = parse_xml(xml_content) keys = [ "deposit_id", "deposit_status", "deposit_status_detail", "deposit_swh_id", "deposit_swh_id_context", "deposit_external_id", ] return {key: data.get("swh:" + key) for key in keys} +class CollectionListDepositClient(BaseDepositClient): + """List a collection of deposits (owned by a user) + + """ + + def __init__(self, config=None, url=None, auth=None): + super().__init__( + url=url, + auth=auth, + config=config, + error_msg="List deposits failure at %s: %s", + empty_result={}, + ) + + def compute_url(self, collection, **kwargs): + return f"/{collection}/" + + def compute_method(self, *args, **kwargs): + return "get" + + def compute_params(self, **kwargs) -> Dict[str, Any]: + """Transmit pagination params if values provided are not None + (e.g. page, page_size) + + """ + return {k: v for k, v in kwargs.items() if v is not None} + + def parse_result_ok( + self, xml_content: str, headers: Optional[Dict] = None + ) -> Dict[str, Any]: + """Given an xml content as string, returns a deposit dict. + + """ + link_header = headers.get("Link", "") if headers else "" + links = parse_header_links(link_header) + data = parse_xml(xml_content)["atom:feed"] + total_result = data.get("swh:count", 0) + keys = [ + "id", + "reception_date", + "complete_date", + "external_id", + "swhid", + "status", + "status_detail", + "swhid_context", + "origin_url", + ] + entries_ = data.get("atom:entry", []) + entries = [entries_] if isinstance(entries_, dict) else entries_ + deposits_d = [ + { + key: deposit.get(f"swh:{key}") + for key in keys + if deposit.get(f"swh:{key}") is not None + } + for deposit in entries + ] + + return { + "count": total_result, + "deposits": deposits_d, + **{entry["rel"]: entry["url"] for entry in links}, + } + + class BaseCreateDepositClient(BaseDepositClient): """Deposit client base class to post new deposit. """ def __init__(self, config=None, url=None, auth=None): super().__init__( url=url, auth=auth, config=config, error_msg="Post Deposit failure at %s: %s", empty_result={"swh:deposit_id": None, "swh:deposit_status": None,}, ) def compute_url(self, collection, *args, **kwargs): return "/%s/" % collection def compute_method(self, *args, **kwargs): return "post" - def parse_result_ok(self, xml_content): + def parse_result_ok( + self, xml_content: str, headers: Optional[Dict] = None + ) -> Dict[str, Any]: """Given an xml content as string, returns a deposit dict. """ data = parse_xml(xml_content) keys = [ "deposit_id", "deposit_status", "deposit_status_detail", "deposit_date", ] return {key: data.get("swh:" + key) for key in keys} def compute_headers(self, info: Dict[str, Any]) -> Dict[str, Any]: return info - def do_execute(self, method, url, info): + def do_execute(self, method, url, info, **kwargs): with open(info["filepath"], "rb") as f: return self.do(method, url, data=f, headers=info["headers"]) class CreateArchiveDepositClient(BaseCreateDepositClient): """Post an archive (binary) deposit client.""" def compute_headers(self, info): headers = { "CONTENT_MD5": info["md5sum"], "IN-PROGRESS": str(info["in_progress"]), "CONTENT-TYPE": info["content-type"], "CONTENT-DISPOSITION": "attachment; filename=%s" % (info["filename"],), } if "slug" in info: headers["SLUG"] = info["slug"] return headers def compute_information(self, *args, **kwargs) -> Dict[str, Any]: info = compute_unified_information( *args, filepath=kwargs["archive_path"], **kwargs ) info["headers"] = self.compute_headers(info) return info class UpdateArchiveDepositClient(CreateArchiveDepositClient): """Update (add/replace) an archive (binary) deposit client.""" def compute_url(self, collection, *args, deposit_id=None, **kwargs): return "/%s/%s/media/" % (collection, deposit_id) def compute_method(self, *args, replace=False, **kwargs): return "put" if replace else "post" class CreateMetadataDepositClient(BaseCreateDepositClient): """Post a metadata deposit client.""" def compute_headers(self, info): headers = { "IN-PROGRESS": str(info["in_progress"]), "CONTENT-TYPE": "application/atom+xml;type=entry", } if "slug" in info: headers["SLUG"] = info["slug"] return headers def compute_information(self, *args, **kwargs) -> Dict[str, Any]: info = compute_unified_information( *args, filepath=kwargs["metadata_path"], **kwargs ) info["headers"] = self.compute_headers(info) return info class UpdateMetadataOnPartialDepositClient(CreateMetadataDepositClient): """Update (add/replace) metadata on partial deposit scenario.""" def compute_url(self, collection, *args, deposit_id=None, **kwargs): return f"/{collection}/{deposit_id}/metadata/" def compute_method(self, *args, replace: bool = False, **kwargs) -> str: return "put" if replace else "post" class UpdateMetadataOnDoneDepositClient(CreateMetadataDepositClient): """Update metadata on "done" deposit. This requires the deposit swhid.""" def compute_url(self, collection, *args, deposit_id=None, **kwargs): return f"/{collection}/{deposit_id}/atom/" def compute_headers(self, info: Dict[str, Any]) -> Dict[str, Any]: return { "CONTENT-TYPE": "application/atom+xml;type=entry", "X_CHECK_SWHID": info["swhid"], } def compute_method(self, *args, **kwargs) -> str: return "put" class CreateMetadataOnlyDepositClient(BaseCreateDepositClient): """Create metadata-only deposit.""" def compute_information(self, *args, **kwargs) -> Dict[str, Any]: return { "headers": {"CONTENT-TYPE": "application/atom+xml;type=entry",}, "filepath": kwargs["metadata_path"], } - def parse_result_ok(self, xml_content): + def parse_result_ok( + self, xml_content: str, headers: Optional[Dict] = None + ) -> Dict[str, Any]: """Given an xml content as string, returns a deposit dict. """ data = parse_xml(xml_content) keys = [ "deposit_id", "deposit_status", "deposit_date", ] return {key: data.get("swh:" + key) for key in keys} class CreateMultipartDepositClient(BaseCreateDepositClient): """Create a multipart deposit client.""" def _multipart_info(self, info, info_meta): files = [ ( "file", (info["filename"], open(info["filepath"], "rb"), info["content-type"]), ), ( "atom", ( info_meta["filename"], open(info_meta["filepath"], "rb"), "application/atom+xml", ), ), ] headers = { "CONTENT_MD5": info["md5sum"], "IN-PROGRESS": str(info["in_progress"]), } if "slug" in info: headers["SLUG"] = info["slug"] return files, headers def compute_information(self, *args, **kwargs) -> Dict[str, Any]: info = compute_unified_information(*args, filepath=kwargs["archive_path"],) info_meta = compute_unified_information( *args, filepath=kwargs["metadata_path"], ) files, headers = self._multipart_info(info, info_meta) return {"files": files, "headers": headers} - def do_execute(self, method, url, info): + def do_execute(self, method, url, info, **kwargs): return self.do(method, url, files=info["files"], headers=info["headers"]) class UpdateMultipartDepositClient(CreateMultipartDepositClient): """Update a multipart deposit client.""" def compute_url(self, collection, *args, deposit_id=None, **kwargs): return "/%s/%s/metadata/" % (collection, deposit_id) def compute_method(self, *args, replace=False, **kwargs): return "put" if replace else "post" class PublicApiDepositClient(BaseApiDepositClient): """Public api deposit client.""" def service_document(self): """Retrieve service document endpoint's information.""" return ServiceDocumentDepositClient(url=self.base_url, auth=self.auth).execute() def deposit_status(self, collection: str, deposit_id: int): """Retrieve status information on a deposit.""" return StatusDepositClient(url=self.base_url, auth=self.auth).execute( collection, deposit_id ) + def deposit_list( + self, + collection: str, + page: Optional[int] = None, + page_size: Optional[int] = None, + ): + """List deposits from the collection""" + return CollectionListDepositClient(url=self.base_url, auth=self.auth).execute( + collection, page=page, page_size=page_size + ) + def deposit_create( self, collection: str, slug: Optional[str], archive: Optional[str] = None, metadata: Optional[str] = None, in_progress: bool = False, ): """Create a new deposit (archive, metadata, both as multipart).""" if archive and not metadata: return CreateArchiveDepositClient( url=self.base_url, auth=self.auth ).execute(collection, in_progress, slug, archive_path=archive) elif not archive and metadata: return CreateMetadataDepositClient( url=self.base_url, auth=self.auth ).execute(collection, in_progress, slug, metadata_path=metadata) else: return CreateMultipartDepositClient( url=self.base_url, auth=self.auth ).execute( collection, in_progress, slug, archive_path=archive, metadata_path=metadata, ) def deposit_update( self, collection: str, deposit_id: int, slug: Optional[str], archive: Optional[str] = None, metadata: Optional[str] = None, in_progress: bool = False, replace: bool = False, swhid: Optional[str] = None, ): """Update (add/replace) existing deposit (archive, metadata, both).""" - r = self.deposit_status(collection, deposit_id) - if "error" in r: - return r + response = self.deposit_status(collection, deposit_id) + if "error" in response: + return response - status = r["deposit_status"] + status = response["deposit_status"] if swhid is None and status != "partial": return { "error": "You can only act on deposit with status 'partial'", "detail": f"The deposit {deposit_id} has status '{status}'", "deposit_status": status, "deposit_id": deposit_id, } if swhid is not None and status != "done": return { "error": "You can only update metadata on deposit with status 'done'", "detail": f"The deposit {deposit_id} has status '{status}'", "deposit_status": status, "deposit_id": deposit_id, } if archive and not metadata: - r = UpdateArchiveDepositClient(url=self.base_url, auth=self.auth).execute( + result = UpdateArchiveDepositClient( + url=self.base_url, auth=self.auth + ).execute( collection, in_progress, slug, deposit_id=deposit_id, archive_path=archive, replace=replace, ) elif not archive and metadata and swhid is None: - r = UpdateMetadataOnPartialDepositClient( + result = UpdateMetadataOnPartialDepositClient( url=self.base_url, auth=self.auth ).execute( collection, in_progress, slug, deposit_id=deposit_id, metadata_path=metadata, replace=replace, ) elif not archive and metadata and swhid is not None: - r = UpdateMetadataOnDoneDepositClient( + result = UpdateMetadataOnDoneDepositClient( url=self.base_url, auth=self.auth ).execute( collection, in_progress, slug, deposit_id=deposit_id, metadata_path=metadata, swhid=swhid, ) else: - r = UpdateMultipartDepositClient(url=self.base_url, auth=self.auth).execute( + result = UpdateMultipartDepositClient( + url=self.base_url, auth=self.auth + ).execute( collection, in_progress, slug, deposit_id=deposit_id, archive_path=archive, metadata_path=metadata, replace=replace, ) - if "error" in r: - return r + if "error" in result: + return result return self.deposit_status(collection, deposit_id) def deposit_metadata_only( self, collection: str, metadata: Optional[str] = None, ): assert metadata is not None return CreateMetadataOnlyDepositClient( url=self.base_url, auth=self.auth ).execute(collection, metadata_path=metadata) diff --git a/swh/deposit/templates/deposit/collection_list.xml b/swh/deposit/templates/deposit/collection_list.xml new file mode 100644 index 00000000..09664bf0 --- /dev/null +++ b/swh/deposit/templates/deposit/collection_list.xml @@ -0,0 +1,16 @@ + + {{ count }} + {% for deposit in results %} + + {% for key, value in deposit.items %} + {% if value is not None %} + {{ value }} + {% endif %} + {% endfor %} + + {% endfor %} + diff --git a/swh/deposit/tests/api/test_collection_list.py b/swh/deposit/tests/api/test_collection_list.py new file mode 100644 index 00000000..248216ae --- /dev/null +++ b/swh/deposit/tests/api/test_collection_list.py @@ -0,0 +1,113 @@ +# Copyright (C) 2017-2021 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from io import BytesIO + +from django.urls import reverse_lazy as reverse +from requests.utils import parse_header_links +from rest_framework import status + +from swh.deposit.config import COL_IRI, DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_PARTIAL +from swh.deposit.models import DepositCollection +from swh.deposit.parsers import parse_xml + + +def test_deposit_collection_list_is_auth_protected(anonymous_client): + """Deposit list should require authentication + + """ + url = reverse(COL_IRI, args=("test",)) + response = anonymous_client.get(url) + assert response.status_code == status.HTTP_401_UNAUTHORIZED + assert b"protected by basic authentication" in response.content + + +def test_deposit_collection_list_collection_access_restricted_to_user_coll( + deposit_another_collection, deposit_user, authenticated_client +): + """Deposit list api should restrict access to user's collection + + """ + collection_id = authenticated_client.deposit_client.collections[0] + coll = DepositCollection.objects.get(pk=collection_id) + # authenticated_client has access to the "coll" collection + coll2 = deposit_another_collection + assert coll.name != coll2.name + # but does not have access to that coll2 collection + url = reverse(COL_IRI, args=(coll2.name,)) + response = authenticated_client.get(url) + # so it gets rejected access to the listing of that coll2 collection + assert response.status_code == status.HTTP_403_FORBIDDEN + msg = f"{deposit_user.username} cannot access collection {coll2.name}" + assert msg in response.content.decode("utf-8") + + +def test_deposit_collection_list_nominal( + partial_deposit, deposited_deposit, authenticated_client +): + """Deposit list api should return the user deposits in a paginated way + + """ + client_id = authenticated_client.deposit_client.id + assert partial_deposit.client.id == client_id + assert deposited_deposit.client.id == client_id + # Both deposit were deposited by the authenticated client + # so requesting the listing of the deposits, both should be listed + + deposit_id = str(partial_deposit.id) + deposit_id2 = str(deposited_deposit.id) + coll = partial_deposit.collection + # requesting the listing of the deposit for the user's collection + url = reverse(COL_IRI, args=(coll.name,)) + response = authenticated_client.get(f"{url}?page_size=1") + assert response.status_code == status.HTTP_200_OK + + data = parse_xml(BytesIO(response.content))["atom:feed"] + assert ( + data["swh:count"] == "2" + ) # total result of 2 deposits if consuming all results + header_link = parse_header_links(response._headers["Link"]) + assert len(header_link) == 1 # only 1 next link + expected_next = f"{url}?page=2&page_size=1" + assert header_link[0]["url"].endswith(expected_next) + assert header_link[0]["rel"] == "next" + + # only one deposit in the response + deposit = data["atom:entry"] # dict as only 1 value (a-la js) + assert isinstance(deposit, dict) + assert deposit["swh:id"] == deposit_id + assert deposit["swh:status"] == DEPOSIT_STATUS_PARTIAL + + # then 2nd page + response2 = authenticated_client.get(expected_next) + + assert response2.status_code == status.HTTP_200_OK + data2 = parse_xml(BytesIO(response2.content))["atom:feed"] + assert data2["swh:count"] == "2" # still total of 2 deposits across all results + + expected_previous = f"{url}?page_size=1" + header_link2 = parse_header_links(response2._headers["Link"]) + assert len(header_link2) == 1 # only 1 previous link + assert header_link2[0]["url"].endswith(expected_previous) + assert header_link2[0]["rel"] == "previous" + + # only 1 deposit in the response + deposit2 = data2["atom:entry"] # dict as only 1 value (a-la js) + assert isinstance(deposit2, dict) + assert deposit2["swh:id"] == deposit_id2 + assert deposit2["swh:status"] == DEPOSIT_STATUS_DEPOSITED + + # Retrieve every deposit in one query (no page_size parameter) + response3 = authenticated_client.get(url) + assert response3.status_code == status.HTTP_200_OK + data3 = parse_xml(BytesIO(response3.content))["atom:feed"] + assert data3["swh:count"] == "2" # total result of 2 deposits across all results + deposits3 = data3["atom:entry"] # list here + assert isinstance(deposits3, list) + assert len(deposits3) == 2 + header_link3 = parse_header_links(response3._headers["Link"]) + assert header_link3 == [] # no pagination as all results received in one round + assert deposit in deposits3 + assert deposit2 in deposits3 diff --git a/swh/deposit/tests/api/test_deposit_list.py b/swh/deposit/tests/api/test_deposit_private_list.py similarity index 91% rename from swh/deposit/tests/api/test_deposit_list.py rename to swh/deposit/tests/api/test_deposit_private_list.py index 013a6493..505da6ae 100644 --- a/swh/deposit/tests/api/test_deposit_list.py +++ b/swh/deposit/tests/api/test_deposit_private_list.py @@ -1,100 +1,97 @@ # Copyright (C) 2017-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.urls import reverse_lazy as reverse from rest_framework import status from swh.deposit.api.converters import convert_status_detail from swh.deposit.config import ( DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_PARTIAL, PRIVATE_LIST_DEPOSITS, ) STATUS_DETAIL = { "url": { "summary": "At least one compatible url field. Failed", "fields": ["testurl"], }, "metadata": [{"summary": "Mandatory fields missing", "fields": ["9", 10, 1.212],},], "archive": [ {"summary": "Invalid archive", "fields": ["3"],}, {"summary": "Unsupported archive", "fields": [2],}, ], } def test_deposit_list(partial_deposit, deposited_deposit, authenticated_client): """Deposit list api should return all deposits in a paginated way """ partial_deposit.status_detail = STATUS_DETAIL partial_deposit.save() deposit_id = partial_deposit.id deposit_id2 = deposited_deposit.id - # NOTE: does not work as documented - # https://docs.djangoproject.com/en/1.11/ref/urlresolvers/#django.core.urlresolvers.reverse # noqa - # url = reverse(PRIVATE_LIST_DEPOSITS, kwargs={'page_size': 1}) main_url = reverse(PRIVATE_LIST_DEPOSITS) url = "%s?page_size=1" % main_url response = authenticated_client.get(url) assert response.status_code == status.HTTP_200_OK data = response.json() - assert data["count"] == 2 # 2 deposits + assert data["count"] == 2 # total result of 2 deposits if consuming all results expected_next = f"{main_url}?page=2&page_size=1" assert data["next"].endswith(expected_next) is True assert data["previous"] is None assert len(data["results"]) == 1 # page of size 1 deposit = data["results"][0] assert deposit["id"] == deposit_id assert deposit["status"] == DEPOSIT_STATUS_PARTIAL expected_status_detail = convert_status_detail(STATUS_DETAIL) assert deposit["status_detail"] == expected_status_detail # then 2nd page response2 = authenticated_client.get(expected_next) assert response2.status_code == status.HTTP_200_OK data2 = response2.json() - assert data2["count"] == 2 # still 2 deposits + assert data["count"] == 2 # total result of 2 deposits if consuming all results assert data2["next"] is None expected_previous = f"{main_url}?page_size=1" assert data2["previous"].endswith(expected_previous) is True assert len(data2["results"]) == 1 # page of size 1 deposit2 = data2["results"][0] assert deposit2["id"] == deposit_id2 assert deposit2["status"] == DEPOSIT_STATUS_DEPOSITED def test_deposit_list_exclude(partial_deposit, deposited_deposit, authenticated_client): """Exclusion pattern on external_id should be respected """ partial_deposit.status_detail = STATUS_DETAIL partial_deposit.save() main_url = reverse(PRIVATE_LIST_DEPOSITS) # Testing exclusion pattern exclude_pattern = "external-id" assert partial_deposit.external_id.startswith(exclude_pattern) assert deposited_deposit.external_id.startswith(exclude_pattern) url = f"{main_url}?page_size=1&exclude=external-id" response = authenticated_client.get(url) assert response.status_code == status.HTTP_200_OK data = response.json() assert data["count"] == 0 url = "%s?page_size=1&exclude=dummy" % main_url # that won't exclude anything response = authenticated_client.get(url) assert response.status_code == status.HTTP_200_OK data = response.json() assert data["count"] == 2 diff --git a/swh/deposit/tests/cli/test_client.py b/swh/deposit/tests/cli/test_client.py index 4f0352e4..a11bbe27 100644 --- a/swh/deposit/tests/cli/test_client.py +++ b/swh/deposit/tests/cli/test_client.py @@ -1,958 +1,1033 @@ # Copyright (C) 2020-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import ast from collections import OrderedDict import contextlib import json import logging import os from unittest.mock import MagicMock import pytest import yaml from swh.deposit.api.checks import check_metadata from swh.deposit.cli import deposit as cli from swh.deposit.cli.client import InputError, _collection, _url, generate_metadata from swh.deposit.client import ( BaseDepositClient, MaintenanceError, PublicApiDepositClient, ServiceDocumentDepositClient, ) from swh.deposit.parsers import parse_xml from swh.model.exceptions import ValidationError from ..conftest import TEST_USER def generate_slug() -> str: """Generate a slug (sample purposes). """ import uuid return str(uuid.uuid4()) @pytest.fixture def datadir(request): """Override default datadir to target main test datadir""" return os.path.join(os.path.dirname(str(request.fspath)), "../data") @pytest.fixture def slug(): return generate_slug() @pytest.fixture def patched_tmp_path(tmp_path, mocker): mocker.patch( "tempfile.TemporaryDirectory", return_value=contextlib.nullcontext(str(tmp_path)), ) return tmp_path @pytest.fixture def client_mock_api_down(mocker, slug): """A mock client whose connection with api fails due to maintenance issue """ mock_client = MagicMock() mocker.patch("swh.deposit.client.PublicApiDepositClient", return_value=mock_client) mock_client.service_document.side_effect = MaintenanceError( "Database backend maintenance: Temporarily unavailable, try again later." ) return mock_client def test_cli_url(): assert _url("http://deposit") == "http://deposit/1" assert _url("https://other/1") == "https://other/1" def test_cli_collection_error(): mock_client = MagicMock() mock_client.service_document.return_value = {"error": "something went wrong"} with pytest.raises(InputError) as e: _collection(mock_client) assert "Service document retrieval: something went wrong" == str(e.value) def test_cli_collection_ok(requests_mock_datadir): client = PublicApiDepositClient( url="https://deposit.swh.test/1", auth=("test", "test") ) collection_name = _collection(client) assert collection_name == "test" def test_cli_collection_ko_because_downtime(): mock_client = MagicMock() mock_client.service_document.side_effect = MaintenanceError("downtime") with pytest.raises(MaintenanceError, match="downtime"): _collection(mock_client) def test_cli_upload_conflictual_flags( datadir, requests_mock_datadir, cli_runner, atom_dataset, tmp_path, ): """Post metadata-only deposit through cli with invalid swhid raises """ api_url_basename = "deposit.test.metadataonly" metadata = atom_dataset["entry-data-minimal"] metadata_path = os.path.join(tmp_path, "entry-data-minimal.xml") with open(metadata_path, "w") as f: f.write(metadata) with pytest.raises(InputError, match="both with different values"): # fmt: off cli_runner.invoke( cli, [ "upload", "--url", f"https://{api_url_basename}/1", "--username", TEST_USER["username"], "--password", TEST_USER["password"], "--metadata", metadata_path, "--slug", "some-slug", # deprecated flag "--create-origin", "some-other-slug", # conflictual value, so raise "--format", "json", ], catch_exceptions=False, ) # fmt: on def test_cli_deposit_with_server_down_for_maintenance( sample_archive, caplog, client_mock_api_down, slug, patched_tmp_path, cli_runner ): """ Deposit failure due to maintenance down time should be explicit """ # fmt: off result = cli_runner.invoke( cli, [ "upload", "--url", "https://deposit.swh.test/1", "--username", TEST_USER["username"], "--password", TEST_USER["password"], "--name", "test-project", "--archive", sample_archive["path"], "--author", "Jane Doe", ], ) # fmt: on assert result.exit_code == 1, result.output assert result.output == "" down_for_maintenance_log_record = ( "swh.deposit.cli.client", logging.ERROR, "Database backend maintenance: Temporarily unavailable, try again later.", ) assert down_for_maintenance_log_record in caplog.record_tuples client_mock_api_down.service_document.assert_called_once_with() def test_cli_client_generate_metadata_ok(slug): """Generated metadata is well formed and pass service side metadata checks """ actual_metadata_xml = generate_metadata( "deposit-client", "project-name", authors=["some", "authors"], external_id="external-id", create_origin="origin-url", ) actual_metadata = dict(parse_xml(actual_metadata_xml)) assert actual_metadata["atom:author"] == "deposit-client" assert actual_metadata["atom:title"] == "project-name" assert actual_metadata["atom:updated"] is not None assert actual_metadata["codemeta:name"] == "project-name" assert actual_metadata["codemeta:identifier"] == "external-id" assert actual_metadata["codemeta:author"] == [ OrderedDict([("codemeta:name", "some")]), OrderedDict([("codemeta:name", "authors")]), ] assert ( actual_metadata["swh:deposit"]["swh:create_origin"]["swh:origin"]["@url"] == "origin-url" ) checks_ok, detail = check_metadata(actual_metadata) assert checks_ok is True assert detail is None def test_cli_client_generate_metadata_ok2(slug): """Generated metadata is well formed and pass service side metadata checks """ actual_metadata_xml = generate_metadata( "deposit-client", "project-name", authors=["some", "authors"], ) actual_metadata = dict(parse_xml(actual_metadata_xml)) assert actual_metadata["atom:author"] == "deposit-client" assert actual_metadata["atom:title"] == "project-name" assert actual_metadata["atom:updated"] is not None assert actual_metadata["codemeta:name"] == "project-name" assert actual_metadata["codemeta:author"] == [ OrderedDict([("codemeta:name", "some")]), OrderedDict([("codemeta:name", "authors")]), ] assert actual_metadata.get("codemeta:identifier") is None assert actual_metadata.get("swh:deposit") is None checks_ok, detail = check_metadata(actual_metadata) assert checks_ok is True assert detail is None def test_cli_single_minimal_deposit_with_slug( sample_archive, slug, patched_tmp_path, requests_mock_datadir, cli_runner, caplog, ): """ This ensure a single deposit upload through the cli is fine, cf. https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html#single-deposit """ # noqa metadata_path = os.path.join(patched_tmp_path, "metadata.xml") # fmt: off result = cli_runner.invoke( cli, [ "upload", "--url", "https://deposit.swh.test/1", "--username", TEST_USER["username"], "--password", TEST_USER["password"], "--name", "test-project", "--archive", sample_archive["path"], "--author", "Jane Doe", "--slug", slug, "--format", "json", ], ) # fmt: on assert result.exit_code == 0, result.output assert json.loads(result.output) == { "deposit_id": "615", "deposit_status": "partial", "deposit_status_detail": None, "deposit_date": "Oct. 8, 2020, 4:57 p.m.", } with open(metadata_path) as fd: actual_metadata = dict(parse_xml(fd.read())) assert actual_metadata["atom:author"] == TEST_USER["username"] assert actual_metadata["codemeta:name"] == "test-project" assert actual_metadata["atom:title"] == "test-project" assert actual_metadata["atom:updated"] is not None assert actual_metadata["codemeta:identifier"] == slug assert actual_metadata["codemeta:author"] == OrderedDict( [("codemeta:name", "Jane Doe")] ) count_warnings = 0 for (_, log_level, _) in caplog.record_tuples: count_warnings += 1 if log_level == logging.WARNING else 0 assert ( count_warnings == 1 ), "We should have 1 warning as we are using slug instead of create_origin" def test_cli_single_minimal_deposit_with_create_origin( sample_archive, slug, patched_tmp_path, requests_mock_datadir, cli_runner, caplog, ): """ This ensure a single deposit upload through the cli is fine, cf. https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html#single-deposit """ # noqa metadata_path = os.path.join(patched_tmp_path, "metadata.xml") origin = slug # fmt: off result = cli_runner.invoke( cli, [ "upload", "--url", "https://deposit.swh.test/1", "--username", TEST_USER["username"], "--password", TEST_USER["password"], "--name", "test-project", "--archive", sample_archive["path"], "--author", "Jane Doe", "--create-origin", origin, "--format", "json", ], ) # fmt: on assert result.exit_code == 0, result.output assert json.loads(result.output) == { "deposit_id": "615", "deposit_status": "partial", "deposit_status_detail": None, "deposit_date": "Oct. 8, 2020, 4:57 p.m.", } with open(metadata_path) as fd: actual_metadata = dict(parse_xml(fd.read())) assert actual_metadata["atom:author"] == TEST_USER["username"] assert actual_metadata["codemeta:name"] == "test-project" assert actual_metadata["atom:title"] == "test-project" assert actual_metadata["atom:updated"] is not None assert ( actual_metadata["swh:deposit"]["swh:create_origin"]["swh:origin"]["@url"] == origin ) assert actual_metadata["codemeta:author"] == OrderedDict( [("codemeta:name", "Jane Doe")] ) count_warnings = 0 for (_, log_level, _) in caplog.record_tuples: count_warnings += 1 if log_level == logging.WARNING else 0 assert ( count_warnings == 0 ), "We should have no warning as we are using create_origin" def test_cli_validation_metadata( sample_archive, caplog, patched_tmp_path, cli_runner, slug ): """Multiple metadata flags scenario (missing, conflicts) properly fails the calls """ metadata_path = os.path.join(patched_tmp_path, "metadata.xml") with open(metadata_path, "a"): pass # creates the file for flag_title_or_name, author_or_name in [ ("--author", "no one"), ("--name", "test-project"), ]: # Test missing author then missing name # fmt: off result = cli_runner.invoke( cli, [ "upload", "--url", "https://deposit.swh.test/1", "--username", TEST_USER["username"], "--password", TEST_USER["password"], "--archive", sample_archive["path"], "--slug", slug, flag_title_or_name, author_or_name, ], ) # fmt: on assert result.exit_code == 1, f"unexpected result: {result.output}" assert result.output == "" expected_error_log_record = ( "swh.deposit.cli.client", logging.ERROR, ( "Problem during parsing options: " "For metadata deposit request, either a metadata file with " "--metadata or both --author and --name must be provided. " ), ) assert expected_error_log_record in caplog.record_tuples # Clear mocking state caplog.clear() # incompatible flags: Test both --metadata and --author, then --metadata and # --name # fmt: off result = cli_runner.invoke( cli, [ "upload", "--url", "https://deposit.swh.test/1", "--username", TEST_USER["username"], "--password", TEST_USER["password"], "--name", "test-project", "--deposit-id", 666, "--archive", sample_archive["path"], "--slug", slug, ], ) # fmt: on assert result.exit_code == 1, f"unexpected result: {result.output}" assert result.output == "" expected_error_log_record = ( "swh.deposit.cli.client", logging.ERROR, ( "Problem during parsing options: " "For metadata deposit request, either a metadata file with " "--metadata or both --author and --name must be provided." ), ) assert expected_error_log_record in caplog.record_tuples # Clear mocking state caplog.clear() # incompatible flags check (Test both --metadata and --author, # then --metadata and --name) # fmt: off result = cli_runner.invoke( cli, [ "upload", "--url", "https://deposit.swh.test/1", "--username", TEST_USER["username"], "--password", TEST_USER["password"], "--archive", sample_archive["path"], "--metadata", metadata_path, "--author", "Jane Doe", "--slug", slug, ], ) # fmt: on assert result.exit_code == 1, result.output assert result.output == "" expected_error_log_record = ( "swh.deposit.cli.client", logging.ERROR, ( "Problem during parsing options: " "Using --metadata flag is incompatible with --author " "and --name and --create-origin (those are used to generate " "one metadata file)." ), ) assert expected_error_log_record in caplog.record_tuples caplog.clear() def test_cli_validation_no_actionable_command(caplog, cli_runner): """Multiple metadata flags scenario (missing, conflicts) properly fails the calls """ # no actionable command # fmt: off result = cli_runner.invoke( cli, [ "upload", "--url", "https://deposit.swh.test/1", "--username", TEST_USER["username"], "--password", TEST_USER["password"], "--partial", ], ) # fmt: on assert result.exit_code == 1, result.output assert result.output == "" expected_error_log_record = ( "swh.deposit.cli.client", logging.ERROR, ( "Problem during parsing options: " "Please provide an actionable command. See --help for more information" ), ) assert expected_error_log_record in caplog.record_tuples def test_cli_validation_replace_with_no_deposit_id_fails( sample_archive, caplog, patched_tmp_path, requests_mock_datadir, datadir, cli_runner ): """--replace flags require --deposit-id otherwise fails """ metadata_path = os.path.join(datadir, "atom", "entry-data-deposit-binary.xml") # fmt: off result = cli_runner.invoke( cli, [ "upload", "--url", "https://deposit.swh.test/1", "--username", TEST_USER["username"], "--password", TEST_USER["password"], "--metadata", metadata_path, "--archive", sample_archive["path"], "--replace", ], ) # fmt: on assert result.exit_code == 1, result.output assert result.output == "" expected_error_log_record = ( "swh.deposit.cli.client", logging.ERROR, ( "Problem during parsing options: " "To update an existing deposit, you must provide its id" ), ) assert expected_error_log_record in caplog.record_tuples def test_cli_single_deposit_slug_generation( sample_archive, patched_tmp_path, requests_mock_datadir, cli_runner ): """Single deposit scenario without providing the slug, it should not be generated. """ metadata_path = os.path.join(patched_tmp_path, "metadata.xml") # fmt: off result = cli_runner.invoke( cli, [ "upload", "--url", "https://deposit.swh.test/1", "--username", TEST_USER["username"], "--password", TEST_USER["password"], "--name", "test-project", "--archive", sample_archive["path"], "--author", "Jane Doe", "--format", "json", ], ) # fmt: on assert result.exit_code == 0, result.output assert json.loads(result.output) == { "deposit_id": "615", "deposit_status": "partial", "deposit_status_detail": None, "deposit_date": "Oct. 8, 2020, 4:57 p.m.", } with open(metadata_path) as fd: metadata_xml = fd.read() actual_metadata = dict(parse_xml(metadata_xml)) assert "codemeta:identifier" not in actual_metadata def test_cli_multisteps_deposit( sample_archive, datadir, slug, requests_mock_datadir, cli_runner ): """ First deposit a partial deposit (no metadata, only archive), then update the metadata part. https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html#multisteps-deposit """ # noqa api_url = "https://deposit.test.metadata/1" deposit_id = 666 # Create a partial deposit with only 1 archive # fmt: off result = cli_runner.invoke( cli, [ "upload", "--url", api_url, "--username", TEST_USER["username"], "--password", TEST_USER["password"], "--archive", sample_archive["path"], "--slug", slug, "--format", "json", "--partial", ], ) # fmt: on assert result.exit_code == 0, f"unexpected output: {result.output}" actual_deposit = json.loads(result.output) assert actual_deposit == { "deposit_id": str(deposit_id), "deposit_status": "partial", "deposit_status_detail": None, "deposit_date": "Oct. 8, 2020, 4:57 p.m.", } # Update the partial deposit with only 1 archive # fmt: off result = cli_runner.invoke( cli, [ "upload", "--url", api_url, "--username", TEST_USER["username"], "--password", TEST_USER["password"], "--archive", sample_archive["path"], "--deposit-id", deposit_id, "--slug", slug, "--format", "json", "--partial", # in-progress: True, because remains the metadata to upload ], ) # fmt: on assert result.exit_code == 0, f"unexpected output: {result.output}" assert result.output is not None actual_deposit = json.loads(result.output) # deposit update scenario actually returns a deposit status dict assert actual_deposit["deposit_id"] == str(deposit_id) assert actual_deposit["deposit_status"] == "partial" # Update the partial deposit with only some metadata (and then finalize it) # https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html#add-content-or-metadata-to-the-deposit metadata_path = os.path.join(datadir, "atom", "entry-data-deposit-binary.xml") # Update deposit with metadata # fmt: off result = cli_runner.invoke( cli, [ "upload", "--url", api_url, "--username", TEST_USER["username"], "--password", TEST_USER["password"], "--metadata", metadata_path, "--deposit-id", deposit_id, "--slug", slug, "--format", "json", ], # this time, ^ we no longer flag it to partial, so the status changes to # in-progress false ) # fmt: on assert result.exit_code == 0, f"unexpected output: {result.output}" assert result.output is not None actual_deposit = json.loads(result.output) # deposit update scenario actually returns a deposit status dict assert actual_deposit["deposit_id"] == str(deposit_id) # FIXME: should be "deposited" but current limitation in the # requests_mock_datadir_visits use, cannot find a way to make it work right now assert actual_deposit["deposit_status"] == "partial" @pytest.mark.parametrize( - "output_format,callable_fn", + "output_format,parser_fn", [ ("json", json.loads), ("yaml", yaml.safe_load), ( "logging", ast.literal_eval, ), # not enough though, the caplog fixture is needed ], ) def test_cli_deposit_status_with_output_format( - output_format, callable_fn, datadir, slug, requests_mock_datadir, caplog, cli_runner + output_format, parser_fn, datadir, slug, requests_mock_datadir, caplog, cli_runner ): """Check deposit status cli with all possible output formats (json, yaml, logging). """ api_url_basename = "deposit.test.status" deposit_id = 1033 expected_deposit_status = { "deposit_id": str(deposit_id), "deposit_status": "done", "deposit_status_detail": ( "The deposit has been successfully loaded into the " "Software Heritage archive" ), "deposit_swh_id": "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea", "deposit_swh_id_context": "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea;origin=https://www.softwareheritage.org/check-deposit-2020-10-08T13:52:34.509655;visit=swh:1:snp:c477c6ef51833127b13a86ece7d75e5b3cc4e93d;anchor=swh:1:rev:f26f3960c175f15f6e24200171d446b86f6f7230;path=/", # noqa "deposit_external_id": "check-deposit-2020-10-08T13:52:34.509655", } # fmt: off result = cli_runner.invoke( cli, [ "status", "--url", f"https://{api_url_basename}/1", "--username", TEST_USER["username"], "--password", TEST_USER["password"], "--deposit-id", deposit_id, "--format", output_format, ], ) # fmt: on assert result.exit_code == 0, f"unexpected output: {result.output}" if output_format == "logging": assert len(caplog.record_tuples) == 1 # format: (, , ) _, _, result_output = caplog.record_tuples[0] else: result_output = result.output - actual_deposit = callable_fn(result_output) + actual_deposit = parser_fn(result_output) assert actual_deposit == expected_deposit_status def test_cli_update_metadata_with_swhid_on_completed_deposit( datadir, requests_mock_datadir, cli_runner ): """Update new metadata on a completed deposit (status done) is ok """ api_url_basename = "deposit.test.updateswhid" deposit_id = 123 expected_deposit_status = { "deposit_external_id": "check-deposit-2020-10-08T13:52:34.509655", "deposit_id": str(deposit_id), "deposit_status": "done", "deposit_status_detail": ( "The deposit has been successfully loaded into the " "Software Heritage archive" ), "deposit_swh_id": "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea", "deposit_swh_id_context": "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea;origin=https://www.softwareheritage.org/check-deposit-2020-10-08T13:52:34.509655;visit=swh:1:snp:c477c6ef51833127b13a86ece7d75e5b3cc4e93d;anchor=swh:1:rev:f26f3960c175f15f6e24200171d446b86f6f7230;path=/", # noqa } assert expected_deposit_status["deposit_status"] == "done" assert expected_deposit_status["deposit_swh_id"] is not None # fmt: off result = cli_runner.invoke( cli, [ "upload", "--url", f"https://{api_url_basename}/1", "--username", TEST_USER["username"], "--password", TEST_USER["password"], "--name", "test-project", "--author", "John Doe", "--deposit-id", deposit_id, "--swhid", expected_deposit_status["deposit_swh_id"], "--format", "json", ], ) # fmt: on assert result.exit_code == 0, result.output actual_deposit_status = json.loads(result.output) assert "error" not in actual_deposit_status assert actual_deposit_status == expected_deposit_status def test_cli_update_metadata_with_swhid_on_other_status_deposit( datadir, requests_mock_datadir, cli_runner ): """Update new metadata with swhid on other deposit status is not possible """ api_url_basename = "deposit.test.updateswhid" deposit_id = 321 # fmt: off result = cli_runner.invoke( cli, [ "upload", "--url", f"https://{api_url_basename}/1", "--username", TEST_USER["username"], "--password", TEST_USER["password"], "--name", "test-project", "--author", "John Doe", "--deposit-id", deposit_id, "--swhid", "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea", "--format", "json", ], ) # fmt: on assert result.exit_code == 0, result.output actual_result = json.loads(result.output) assert "error" in actual_result assert actual_result == { "error": "You can only update metadata on deposit with status 'done'", "detail": "The deposit 321 has status 'partial'", "deposit_status": "partial", "deposit_id": 321, } def test_cli_metadata_only_deposit_full_metadata_file( datadir, requests_mock_datadir, cli_runner, atom_dataset, tmp_path, ): """Post metadata-only deposit through cli The metadata file posted by the client already contains the swhid """ api_url_basename = "deposit.test.metadataonly" swhid = "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea" metadata = atom_dataset["entry-data-with-swhid"].format(swhid=swhid) metadata_path = os.path.join(tmp_path, "entry-data-with-swhid.xml") with open(metadata_path, "w") as m: m.write(metadata) expected_deposit_status = { "deposit_id": "100", "deposit_status": "done", "deposit_date": "2020-10-08T13:52:34.509655", } assert expected_deposit_status["deposit_status"] == "done" # fmt: off result = cli_runner.invoke( cli, [ "metadata-only", "--url", f"https://{api_url_basename}/1", "--username", TEST_USER["username"], "--password", TEST_USER["password"], "--metadata", metadata_path, "--format", "json", ], ) # fmt: on assert result.exit_code == 0, result.output actual_deposit_status = json.loads(result.output) assert "error" not in actual_deposit_status assert actual_deposit_status == expected_deposit_status def test_cli_metadata_only_deposit_invalid_swhid( datadir, requests_mock_datadir, cli_runner, atom_dataset, tmp_path, ): """Post metadata-only deposit through cli with invalid swhid raises """ api_url_basename = "deposit.test.metadataonly" invalid_swhid = "ssh:2:sth:xxx" metadata = atom_dataset["entry-data-with-swhid"].format(swhid=invalid_swhid) metadata_path = os.path.join(tmp_path, "entry-data-with-swhid.xml") with open(metadata_path, "w") as f: f.write(metadata) with pytest.raises(ValidationError, match="Invalid"): # fmt: off cli_runner.invoke( cli, [ "metadata-only", "--url", f"https://{api_url_basename}/1", "--username", TEST_USER["username"], "--password", TEST_USER["password"], "--metadata", metadata_path, "--format", "json", ], catch_exceptions=False, ) # fmt: on def test_cli_metadata_only_deposit_no_swhid( datadir, requests_mock_datadir, cli_runner, atom_dataset, tmp_path, ): """Post metadata-only deposit through cli with invalid swhid raises """ api_url_basename = "deposit.test.metadataonly" metadata = atom_dataset["entry-data-minimal"] metadata_path = os.path.join(tmp_path, "entry-data-minimal.xml") with open(metadata_path, "w") as f: f.write(metadata) with pytest.raises(InputError, match="SWHID must be provided"): # fmt: off cli_runner.invoke( cli, [ "metadata-only", "--url", f"https://{api_url_basename}/1", "--username", TEST_USER["username"], "--password", TEST_USER["password"], "--metadata", metadata_path, "--format", "json", ], catch_exceptions=False, ) # fmt: on @pytest.mark.parametrize( "metadata_entry_key", ["entry-data-with-add-to-origin", "entry-only-create-origin"] ) def test_cli_deposit_warning_missing_origin( sample_archive, metadata_entry_key, tmp_path, atom_dataset, caplog, cli_runner, requests_mock_datadir, ): """Deposit cli should log warning when the provided metadata xml is missing origins """ # For the next deposit, no warning should be logged as either or # are provided metadata_raw = atom_dataset[metadata_entry_key] % "some-url" metadata_path = os.path.join(tmp_path, "metadata-with-origin-tag-to-deposit.xml") with open(metadata_path, "w") as f: f.write(metadata_raw) # fmt: off cli_runner.invoke( cli, [ "upload", "--url", "https://deposit.swh.test/1", "--username", TEST_USER["username"], "--password", TEST_USER["password"], "--metadata", metadata_path, ], ) # fmt: on for (_, log_level, _) in caplog.record_tuples: # all messages are info or below messages so everything is fine assert log_level < logging.WARNING def test_cli_failure_should_be_parseable(atom_dataset, mocker): summary = "Cannot load metadata" verbose_description = ( "Cannot load metadata on swh:1:dir:0eda267e7d3c2e37b3f6a78e542b16190ac4574e, " "this directory object does not exist in the archive (yet?)." ) error_xml = atom_dataset["error-cli"].format( summary=summary, verboseDescription=verbose_description ) api_call = BaseDepositClient(url="https://somewhere.org/") actual_error = api_call.parse_result_error(error_xml) assert actual_error == { "summary": summary, "detail": "", "sword:verboseDescription": verbose_description, } def test_cli_service_document_failure(atom_dataset, mocker): """Ensure service document failures are properly served """ summary = "Invalid user credentials" error_xml = atom_dataset["error-cli"].format(summary=summary, verboseDescription="") api_call = ServiceDocumentDepositClient(url="https://somewhere.org/") actual_error = api_call.parse_result_error(error_xml) assert actual_error == {"error": summary} + + +@pytest.mark.parametrize( + "output_format,parser_fn", + [ + ("json", json.loads), + ("yaml", yaml.safe_load), + ( + "logging", + ast.literal_eval, + ), # not enough though, the caplog fixture is needed + ], +) +def test_cli_deposit_collection_list( + output_format, parser_fn, datadir, slug, requests_mock_datadir, caplog, cli_runner +): + """Check deposit status cli with all possible output formats (json, yaml, logging). + + """ + api_url_basename = "deposit.test.list" + + expected_deposits = { + "count": "3", + "deposits": [ + { + "external_id": "check-deposit-2020-10-09T13:10:00.000000", + "id": "1031", + "status": "rejected", + "status_detail": "Deposit without archive", + }, + { + "external_id": "check-deposit-2020-10-10T13:20:00.000000", + "id": "1032", + "status": "rejected", + "status_detail": "Deposit without archive", + }, + { + "complete_date": "2020-10-08T13:52:34.509655", + "external_id": "check-deposit-2020-10-08T13:52:34.509655", + "id": "1033", + "reception_date": "2020-10-08T13:50:30", + "status": "done", + "status_detail": "The deposit has been successfully loaded into " + "the Software Heritage archive", + "swhid": "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea", + "swhid_context": "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea;origin=https://www.softwareheritage.org/check-deposit-2020-10-08T13:52:34.509655;visit=swh:1:snp:c477c6ef51833127b13a86ece7d75e5b3cc4e93d;anchor=swh:1:rev:f26f3960c175f15f6e24200171d446b86f6f7230;path=/", # noqa + }, + ], + } + + # fmt: off + result = cli_runner.invoke( + cli, + [ + "list", + "--url", f"https://{api_url_basename}/1", + "--username", TEST_USER["username"], + "--password", TEST_USER["password"], + "--page", 1, + "--page-size", 10, + "--format", output_format, + ], + ) + # fmt: on + assert result.exit_code == 0, f"unexpected output: {result.output}" + + if output_format == "logging": + assert len(caplog.record_tuples) == 1 + # format: (, , ) + _, _, result_output = caplog.record_tuples[0] + else: + result_output = result.output + + actual_deposit = parser_fn(result_output) + assert actual_deposit == expected_deposits diff --git a/swh/deposit/tests/data/atom/entry-list-deposits-page1.xml b/swh/deposit/tests/data/atom/entry-list-deposits-page1.xml new file mode 100644 index 00000000..d094aab2 --- /dev/null +++ b/swh/deposit/tests/data/atom/entry-list-deposits-page1.xml @@ -0,0 +1,18 @@ + + 3 + + 1031 + rejected + Deposit without archive + check-deposit-2020-10-09T13:10:00.000000 + + + 1032 + rejected + Deposit without archive + check-deposit-2020-10-10T13:20:00.000000 + + diff --git a/swh/deposit/tests/data/atom/entry-list-deposits-page2.xml b/swh/deposit/tests/data/atom/entry-list-deposits-page2.xml new file mode 100644 index 00000000..eba33ad8 --- /dev/null +++ b/swh/deposit/tests/data/atom/entry-list-deposits-page2.xml @@ -0,0 +1,16 @@ + + 3 + + 1033 + 2020-10-08T13:50:30 + 2020-10-08T13:52:34.509655 + done + The deposit has been successfully loaded into the Software Heritage archive + swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea + swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea;origin=https://www.softwareheritage.org/check-deposit-2020-10-08T13:52:34.509655;visit=swh:1:snp:c477c6ef51833127b13a86ece7d75e5b3cc4e93d;anchor=swh:1:rev:f26f3960c175f15f6e24200171d446b86f6f7230;path=/ + check-deposit-2020-10-08T13:52:34.509655 + + diff --git a/swh/deposit/tests/data/atom/entry-list-deposits.xml b/swh/deposit/tests/data/atom/entry-list-deposits.xml new file mode 100644 index 00000000..e830bd33 --- /dev/null +++ b/swh/deposit/tests/data/atom/entry-list-deposits.xml @@ -0,0 +1,28 @@ + + 3 + + 1031 + rejected + Deposit without archive + check-deposit-2020-10-09T13:10:00.000000 + + + 1032 + rejected + Deposit without archive + check-deposit-2020-10-10T13:20:00.000000 + + + 1033 + 2020-10-08T13:50:30 + 2020-10-08T13:52:34.509655 + done + The deposit has been successfully loaded into the Software Heritage archive + swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea + swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea;origin=https://www.softwareheritage.org/check-deposit-2020-10-08T13:52:34.509655;visit=swh:1:snp:c477c6ef51833127b13a86ece7d75e5b3cc4e93d;anchor=swh:1:rev:f26f3960c175f15f6e24200171d446b86f6f7230;path=/ + check-deposit-2020-10-08T13:52:34.509655 + + diff --git a/swh/deposit/tests/data/https_deposit.test.list/1_servicedocument b/swh/deposit/tests/data/https_deposit.test.list/1_servicedocument new file mode 100644 index 00000000..3abadf1a --- /dev/null +++ b/swh/deposit/tests/data/https_deposit.test.list/1_servicedocument @@ -0,0 +1,26 @@ + + + + 2.0 + 209715200 + + + The Software Heritage (SWH) Archive + + test Software Collection + application/zip + application/x-tar + Collection Policy + Software Heritage Archive + Collect, Preserve, Share + false + false + http://purl.org/net/sword/package/SimpleZip + https://deposit.test.status/1/test/ + test + + + diff --git a/swh/deposit/tests/data/https_deposit.test.list/1_test,page=1,page_size=10 b/swh/deposit/tests/data/https_deposit.test.list/1_test,page=1,page_size=10 new file mode 100644 index 00000000..e830bd33 --- /dev/null +++ b/swh/deposit/tests/data/https_deposit.test.list/1_test,page=1,page_size=10 @@ -0,0 +1,28 @@ + + 3 + + 1031 + rejected + Deposit without archive + check-deposit-2020-10-09T13:10:00.000000 + + + 1032 + rejected + Deposit without archive + check-deposit-2020-10-10T13:20:00.000000 + + + 1033 + 2020-10-08T13:50:30 + 2020-10-08T13:52:34.509655 + done + The deposit has been successfully loaded into the Software Heritage archive + swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea + swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea;origin=https://www.softwareheritage.org/check-deposit-2020-10-08T13:52:34.509655;visit=swh:1:snp:c477c6ef51833127b13a86ece7d75e5b3cc4e93d;anchor=swh:1:rev:f26f3960c175f15f6e24200171d446b86f6f7230;path=/ + check-deposit-2020-10-08T13:52:34.509655 + + diff --git a/swh/deposit/tests/test_client_module.py b/swh/deposit/tests/test_client_module.py new file mode 100644 index 00000000..03e64c1c --- /dev/null +++ b/swh/deposit/tests/test_client_module.py @@ -0,0 +1,215 @@ +# Copyright (C) 2021 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +# Ensure the gist of the BaseDepositClient.execute works as expected in corner cases The +# following tests uses the ServiceDocumentDepositClient and StatusDepositClient because +# they are BaseDepositClient subclasses. We could have used other classes but those ones +# got elected as they are fairly simple ones. + +import pytest + +from swh.deposit.client import ( + CollectionListDepositClient, + MaintenanceError, + PublicApiDepositClient, + ServiceDocumentDepositClient, + StatusDepositClient, +) +from swh.deposit.utils import to_header_link + + +def test_client_read_data_ok(requests_mock_datadir): + client = ServiceDocumentDepositClient( + url="https://deposit.swh.test/1", auth=("test", "test") + ) + + result = client.execute() + + assert isinstance(result, dict) + + collection = result["app:service"]["app:workspace"]["app:collection"] + assert collection["sword:name"] == "test" + + +def test_client_read_data_fails(mocker): + mock = mocker.patch("swh.deposit.client.BaseDepositClient.do_execute") + mock.side_effect = ValueError("here comes trouble") + + client = ServiceDocumentDepositClient( + url="https://deposit.swh.test/1", auth=("test", "test") + ) + + result = client.execute() + assert isinstance(result, dict) + assert "error" in result + assert mock.called + + +def test_client_read_data_no_result(requests_mock): + url = "https://deposit.swh.test/1" + requests_mock.get(f"{url}/servicedocument/", status_code=204) + + client = ServiceDocumentDepositClient( + url="https://deposit.swh.test/1", auth=("test", "test") + ) + + result = client.execute() + assert isinstance(result, dict) + assert result == {"status": 204} + + +def test_client_read_data_collection_error_503(requests_mock, atom_dataset): + error_content = atom_dataset["error-cli"].format( + summary="forbidden", verboseDescription="Access restricted", + ) + url = "https://deposit.swh.test/1" + requests_mock.get(f"{url}/servicedocument/", status_code=503, text=error_content) + + client = ServiceDocumentDepositClient( + url="https://deposit.swh.test/1", auth=("test", "test") + ) + + result = client.execute() + assert isinstance(result, dict) + assert result == { + "error": "forbidden", + "status": 503, + "collection": None, + } + + +def test_client_read_data_status_error_503(requests_mock, atom_dataset): + error_content = atom_dataset["error-cli"].format( + summary="forbidden", verboseDescription="Access restricted", + ) + collection = "test" + deposit_id = 1 + url = "https://deposit.swh.test/1" + requests_mock.get( + f"{url}/{collection}/{deposit_id}/status/", status_code=503, text=error_content + ) + + client = StatusDepositClient( + url="https://deposit.swh.test/1", auth=("test", "test") + ) + + with pytest.raises(MaintenanceError, match="forbidden"): + client.execute(collection, deposit_id) + + +EXPECTED_DEPOSIT = { + "id": "1031", + "external_id": "check-deposit-2020-10-09T13:10:00.000000", + "status": "rejected", + "status_detail": "Deposit without archive", +} + +EXPECTED_DEPOSIT2 = { + "id": "1032", + "external_id": "check-deposit-2020-10-10T13:20:00.000000", + "status": "rejected", + "status_detail": "Deposit without archive", +} + +EXPECTED_DEPOSIT3 = { + "id": "1033", + "external_id": "check-deposit-2020-10-08T13:52:34.509655", + "status": "done", + "status_detail": ( + "The deposit has been successfully loaded into the Software " "Heritage archive" + ), + "reception_date": "2020-10-08T13:50:30", + "complete_date": "2020-10-08T13:52:34.509655", + "swhid": "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea", + "swhid_context": "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea;origin=https://www.softwareheritage.org/check-deposit-2020-10-08T13:52:34.509655;visit=swh:1:snp:c477c6ef51833127b13a86ece7d75e5b3cc4e93d;anchor=swh:1:rev:f26f3960c175f15f6e24200171d446b86f6f7230;path=/", # noqa +} + + +def test_client_collection_list(requests_mock, atom_dataset): + collection_list_xml = atom_dataset["entry-list-deposits"] + base_url = "https://deposit.test.list/1" + collection = "test" + url = f"{base_url}/{collection}/" + requests_mock.get(url, status_code=200, text=collection_list_xml) + expected_result = { + "count": "3", + "deposits": [EXPECTED_DEPOSIT, EXPECTED_DEPOSIT2, EXPECTED_DEPOSIT3], + } + + # use dedicated client + client = CollectionListDepositClient(url=base_url, auth=("test", "test")) + + # no pagination + result = client.execute(collection) + + assert result == expected_result + + # The main public client should work the same way + client2 = PublicApiDepositClient(url=base_url, auth=("test", "test")) + result2 = client2.deposit_list(collection) + + assert result2 == expected_result + + assert requests_mock.called + request_history = [m.url for m in requests_mock.request_history] + assert request_history == [url] * 2 + + +def test_client_collection_list_with_pagination_headers(requests_mock, atom_dataset): + collection_list_xml_page1 = atom_dataset["entry-list-deposits-page1"] + collection_list_xml_page2 = atom_dataset["entry-list-deposits-page2"] + base_url = "https://deposit.test.list/1" + collection = "test" + url = f"{base_url}/{collection}/" + page1 = 1 + page2 = 2 + page_size = 10 + url_page1 = f"{url}?page={page1}" + url_page2 = f"{url}?page={page2}&page_size={page_size}" + requests_mock.get( + url_page1, + status_code=200, + text=collection_list_xml_page1, + headers={"Link": to_header_link(url_page2, "next"),}, + ) + requests_mock.get( + url_page2, + status_code=200, + text=collection_list_xml_page2, + headers={"Link": to_header_link(url_page1, "previous"),}, + ) + + expected_result_page1 = { + "count": "3", + "deposits": [EXPECTED_DEPOSIT, EXPECTED_DEPOSIT2], + "next": url_page2, + } + expected_result_page2 = { + "count": "3", + "deposits": [EXPECTED_DEPOSIT3], + "previous": url_page1, + } + + client = CollectionListDepositClient( + url="https://deposit.test.list/1", auth=("test", "test") + ) + client2 = PublicApiDepositClient(url=base_url, auth=("test", "test")) + + result = client.execute(collection, page=page1) + assert result == expected_result_page1 + + result2 = client.execute(collection, page=page2, page_size=page_size) + assert result2 == expected_result_page2 + + # The main public client should work the same way + result = client2.deposit_list(collection, page=page1) + assert result == expected_result_page1 + + result2 = client2.deposit_list(collection, page=page2, page_size=page_size) + assert result2 == expected_result_page2 + + assert requests_mock.called + request_history = [m.url for m in requests_mock.request_history] + assert request_history == [url_page1, url_page2] * 2 diff --git a/swh/deposit/utils.py b/swh/deposit/utils.py index 0bb94c86..2e01de8c 100644 --- a/swh/deposit/utils.py +++ b/swh/deposit/utils.py @@ -1,240 +1,253 @@ # Copyright (C) 2018-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import logging from types import GeneratorType from typing import Any, Dict, Optional, Union import iso8601 import xmltodict from swh.model.exceptions import ValidationError from swh.model.identifiers import ( ExtendedSWHID, ObjectType, QualifiedSWHID, normalize_timestamp, ) logger = logging.getLogger(__name__) def parse_xml(stream, encoding="utf-8"): namespaces = { "http://www.w3.org/2005/Atom": "atom", "http://www.w3.org/2007/app": "app", "http://purl.org/dc/terms/": "dc", "https://doi.org/10.5063/SCHEMA/CODEMETA-2.0": "codemeta", "http://purl.org/net/sword/terms/": "sword", "https://www.softwareheritage.org/schema/2018/deposit": "swh", } data = xmltodict.parse( stream, encoding=encoding, namespaces=namespaces, process_namespaces=True, dict_constructor=dict, ) if "atom:entry" in data: data = data["atom:entry"] return data def merge(*dicts): """Given an iterator of dicts, merge them losing no information. Args: *dicts: arguments are all supposed to be dict to merge into one Returns: dict merged without losing information """ def _extend(existing_val, value): """Given an existing value and a value (as potential lists), merge them together without repetition. """ if isinstance(value, (list, map, GeneratorType)): vals = value else: vals = [value] for v in vals: if v in existing_val: continue existing_val.append(v) return existing_val d = {} for data in dicts: if not isinstance(data, dict): raise ValueError("dicts is supposed to be a variable arguments of dict") for key, value in data.items(): existing_val = d.get(key) if not existing_val: d[key] = value continue if isinstance(existing_val, (list, map, GeneratorType)): new_val = _extend(existing_val, value) elif isinstance(existing_val, dict): if isinstance(value, dict): new_val = merge(existing_val, value) else: new_val = _extend([existing_val], value) else: new_val = _extend([existing_val], value) d[key] = new_val return d def normalize_date(date): """Normalize date fields as expected by swh workers. If date is a list, elect arbitrarily the first element of that list If date is (then) a string, parse it through dateutil.parser.parse to extract a datetime. Then normalize it through swh.model.identifiers.normalize_timestamp. Returns The swh date object """ if isinstance(date, list): date = date[0] if isinstance(date, str): date = iso8601.parse_date(date) return normalize_timestamp(date) def compute_metadata_context(swhid_reference: QualifiedSWHID) -> Dict[str, Any]: """Given a SWHID object, determine the context as a dict. """ metadata_context: Dict[str, Any] = {"origin": None} if swhid_reference.qualifiers(): metadata_context = { "origin": swhid_reference.origin, "path": swhid_reference.path, } snapshot = swhid_reference.visit if snapshot: metadata_context["snapshot"] = snapshot anchor = swhid_reference.anchor if anchor: metadata_context[anchor.object_type.name.lower()] = anchor return metadata_context ALLOWED_QUALIFIERS_NODE_TYPE = ( ObjectType.SNAPSHOT, ObjectType.REVISION, ObjectType.RELEASE, ObjectType.DIRECTORY, ) def parse_swh_reference(metadata: Dict,) -> Optional[Union[QualifiedSWHID, str]]: """Parse swh reference within the metadata dict (or origin) reference if found, None otherwise. .. code-block:: xml or: .. code-block:: xml Args: metadata: result of parsing an Atom document with :func:`parse_xml` Raises: ValidationError in case the swhid referenced (if any) is invalid Returns: Either swhid or origin reference if any. None otherwise. """ # noqa swh_deposit = metadata.get("swh:deposit") if not swh_deposit: return None swh_reference = swh_deposit.get("swh:reference") if not swh_reference: return None swh_origin = swh_reference.get("swh:origin") if swh_origin: url = swh_origin.get("@url") if url: return url swh_object = swh_reference.get("swh:object") if not swh_object: return None swhid = swh_object.get("@swhid") if not swhid: return None swhid_reference = QualifiedSWHID.from_string(swhid) if swhid_reference.qualifiers(): anchor = swhid_reference.anchor if anchor: if anchor.object_type not in ALLOWED_QUALIFIERS_NODE_TYPE: error_msg = ( "anchor qualifier should be a core SWHID with type one of " f"{', '.join(t.name.lower() for t in ALLOWED_QUALIFIERS_NODE_TYPE)}" ) raise ValidationError(error_msg) visit = swhid_reference.visit if visit: if visit.object_type != ObjectType.SNAPSHOT: raise ValidationError( f"visit qualifier should be a core SWHID with type snp, " f"not {visit.object_type.value}" ) if ( visit and anchor and visit.object_type == ObjectType.SNAPSHOT and anchor.object_type == ObjectType.SNAPSHOT ): logger.warn( "SWHID use of both anchor and visit targeting " f"a snapshot: {swhid_reference}" ) raise ValidationError( "'anchor=swh:1:snp:' is not supported when 'visit' is also provided." ) return swhid_reference def extended_swhid_from_qualified(swhid: QualifiedSWHID) -> ExtendedSWHID: """Used to get the target of a metadata object from a , as the latter uses a QualifiedSWHID.""" return ExtendedSWHID.from_string(str(swhid).split(";")[0]) + + +def to_header_link(link: str, link_name: str) -> str: + """Build a single header link. + + >>> link_next = to_header_link("next-url", "next") + >>> link_next + '; rel="next"' + >>> ','.join([link_next, to_header_link("prev-url", "prev")]) + '; rel="next",; rel="prev"' + + """ + return f'<{link}>; rel="{link_name}"'