diff --git a/PKG-INFO b/PKG-INFO
index 9ae16c24..5e3d8fa8 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,35 +1,35 @@
Metadata-Version: 2.1
Name: swh.deposit
-Version: 0.0.78
+Version: 0.0.79
Summary: Software Heritage Deposit Server
Home-page: https://forge.softwareheritage.org/source/swh-deposit/
Author: Software Heritage developers
Author-email: swh-devel@inria.fr
License: UNKNOWN
Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest
Project-URL: Funding, https://www.softwareheritage.org/donate
Project-URL: Source, https://forge.softwareheritage.org/source/swh-deposit
Description: # swh-deposit
This is [Software Heritage](https://www.softwareheritage.org)'s
[SWORD 2.0](http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html) Server
implementation, as well as a simple client to upload deposits on the server.
**S.W.O.R.D** (**S**imple **W**eb-Service **O**ffering **R**epository
**D**eposit) is an interoperability standard for digital file deposit.
This implementation will permit interaction between a client (a
repository) and a server (SWH repository) to permit deposits of
software source code archives and associated metadata.
The documentation is at ./docs/README-specification.md
Platform: UNKNOWN
Classifier: Programming Language :: Python :: 3
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
Classifier: Operating System :: OS Independent
Classifier: Development Status :: 5 - Production/Stable
Description-Content-Type: text/markdown
Provides-Extra: testing
Provides-Extra: server
diff --git a/requirements-test.txt b/requirements-test.txt
index b5e4fdab..2e64c384 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -1,6 +1,7 @@
pytest
pytest-django
+pytest-mock
swh.scheduler[testing]
pytest-postgresql >= 2.1.0
requests_mock
-django-stubs
+django-stubs < 1.3.0
diff --git a/swh.deposit.egg-info/PKG-INFO b/swh.deposit.egg-info/PKG-INFO
index 9ae16c24..5e3d8fa8 100644
--- a/swh.deposit.egg-info/PKG-INFO
+++ b/swh.deposit.egg-info/PKG-INFO
@@ -1,35 +1,35 @@
Metadata-Version: 2.1
Name: swh.deposit
-Version: 0.0.78
+Version: 0.0.79
Summary: Software Heritage Deposit Server
Home-page: https://forge.softwareheritage.org/source/swh-deposit/
Author: Software Heritage developers
Author-email: swh-devel@inria.fr
License: UNKNOWN
Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest
Project-URL: Funding, https://www.softwareheritage.org/donate
Project-URL: Source, https://forge.softwareheritage.org/source/swh-deposit
Description: # swh-deposit
This is [Software Heritage](https://www.softwareheritage.org)'s
[SWORD 2.0](http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html) Server
implementation, as well as a simple client to upload deposits on the server.
**S.W.O.R.D** (**S**imple **W**eb-Service **O**ffering **R**epository
**D**eposit) is an interoperability standard for digital file deposit.
This implementation will permit interaction between a client (a
repository) and a server (SWH repository) to permit deposits of
software source code archives and associated metadata.
The documentation is at ./docs/README-specification.md
Platform: UNKNOWN
Classifier: Programming Language :: Python :: 3
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
Classifier: Operating System :: OS Independent
Classifier: Development Status :: 5 - Production/Stable
Description-Content-Type: text/markdown
Provides-Extra: testing
Provides-Extra: server
diff --git a/swh.deposit.egg-info/SOURCES.txt b/swh.deposit.egg-info/SOURCES.txt
index 5ec1f4a2..27c9c6c4 100644
--- a/swh.deposit.egg-info/SOURCES.txt
+++ b/swh.deposit.egg-info/SOURCES.txt
@@ -1,153 +1,169 @@
MANIFEST.in
Makefile
README.md
requirements-server.txt
requirements-swh-server.txt
requirements-swh.txt
requirements-test.txt
requirements.txt
setup.py
version.txt
swh/__init__.py
swh.deposit.egg-info/PKG-INFO
swh.deposit.egg-info/SOURCES.txt
swh.deposit.egg-info/dependency_links.txt
swh.deposit.egg-info/entry_points.txt
swh.deposit.egg-info/requires.txt
swh.deposit.egg-info/top_level.txt
swh/deposit/__init__.py
swh/deposit/apps.py
swh/deposit/auth.py
swh/deposit/config.py
swh/deposit/errors.py
+swh/deposit/gunicorn_config.py
swh/deposit/manage.py
swh/deposit/models.py
swh/deposit/parsers.py
swh/deposit/py.typed
swh/deposit/signals.py
swh/deposit/urls.py
swh/deposit/utils.py
swh/deposit/api/__init__.py
swh/deposit/api/common.py
swh/deposit/api/converters.py
swh/deposit/api/deposit.py
swh/deposit/api/deposit_content.py
swh/deposit/api/deposit_status.py
swh/deposit/api/deposit_update.py
swh/deposit/api/service_document.py
swh/deposit/api/urls.py
swh/deposit/api/private/__init__.py
swh/deposit/api/private/deposit_check.py
swh/deposit/api/private/deposit_list.py
swh/deposit/api/private/deposit_read.py
swh/deposit/api/private/deposit_update_status.py
swh/deposit/api/private/urls.py
swh/deposit/cli/__init__.py
swh/deposit/cli/admin.py
swh/deposit/cli/client.py
swh/deposit/client/__init__.py
swh/deposit/fixtures/__init__.py
swh/deposit/fixtures/deposit_data.yaml
swh/deposit/loader/__init__.py
swh/deposit/loader/checker.py
-swh/deposit/loader/loader.py
swh/deposit/loader/tasks.py
swh/deposit/migrations/0001_initial.py
swh/deposit/migrations/0002_depositrequest_archive.py
swh/deposit/migrations/0003_temporaryarchive.py
swh/deposit/migrations/0004_delete_temporaryarchive.py
swh/deposit/migrations/0005_auto_20171019_1436.py
swh/deposit/migrations/0006_depositclient_url.py
swh/deposit/migrations/0007_auto_20171129_1609.py
swh/deposit/migrations/0008_auto_20171130_1513.py
swh/deposit/migrations/0009_deposit_parent.py
swh/deposit/migrations/0010_auto_20180110_0953.py
swh/deposit/migrations/0011_auto_20180115_1510.py
swh/deposit/migrations/0012_deposit_status_detail.py
swh/deposit/migrations/0013_depositrequest_raw_metadata.py
swh/deposit/migrations/0014_auto_20180720_1221.py
swh/deposit/migrations/0015_depositrequest_typemigration.py
swh/deposit/migrations/0016_auto_20190507_1408.py
swh/deposit/migrations/0017_auto_20190925_0906.py
swh/deposit/migrations/__init__.py
swh/deposit/settings/__init__.py
swh/deposit/settings/common.py
swh/deposit/settings/development.py
swh/deposit/settings/production.py
swh/deposit/settings/testing.py
swh/deposit/static/robots.txt
swh/deposit/static/css/bootstrap-responsive.min.css
swh/deposit/static/css/style.css
swh/deposit/static/img/arrow-up-small.png
swh/deposit/static/img/swh-logo-deposit.png
swh/deposit/static/img/swh-logo-deposit.svg
swh/deposit/static/img/icons/swh-logo-32x32.png
swh/deposit/static/img/icons/swh-logo-deposit-180x180.png
swh/deposit/static/img/icons/swh-logo-deposit-192x192.png
swh/deposit/static/img/icons/swh-logo-deposit-270x270.png
swh/deposit/templates/__init__.py
+swh/deposit/templates/api.html
swh/deposit/templates/homepage.html
swh/deposit/templates/layout.html
swh/deposit/templates/deposit/__init__.py
swh/deposit/templates/deposit/content.xml
swh/deposit/templates/deposit/deposit_receipt.xml
swh/deposit/templates/deposit/error.xml
swh/deposit/templates/deposit/service_document.xml
swh/deposit/templates/deposit/status.xml
swh/deposit/templates/rest_framework/api.html
swh/deposit/tests/__init__.py
swh/deposit/tests/common.py
swh/deposit/tests/conftest.py
swh/deposit/tests/test_common.py
+swh/deposit/tests/test_gunicorn_config.py
swh/deposit/tests/test_utils.py
swh/deposit/tests/api/__init__.py
swh/deposit/tests/api/conftest.py
swh/deposit/tests/api/test_converters.py
swh/deposit/tests/api/test_deposit.py
swh/deposit/tests/api/test_deposit_atom.py
swh/deposit/tests/api/test_deposit_binary.py
swh/deposit/tests/api/test_deposit_delete.py
swh/deposit/tests/api/test_deposit_list.py
swh/deposit/tests/api/test_deposit_multipart.py
swh/deposit/tests/api/test_deposit_private_check.py
swh/deposit/tests/api/test_deposit_private_read_archive.py
swh/deposit/tests/api/test_deposit_private_read_metadata.py
swh/deposit/tests/api/test_deposit_private_update_status.py
swh/deposit/tests/api/test_deposit_status.py
swh/deposit/tests/api/test_deposit_update.py
swh/deposit/tests/api/test_parser.py
swh/deposit/tests/api/test_service_document.py
swh/deposit/tests/api/data/atom/codemeta-sample.xml
swh/deposit/tests/api/data/atom/entry-data-badly-formatted.xml
swh/deposit/tests/api/data/atom/entry-data-deposit-binary.xml
swh/deposit/tests/api/data/atom/entry-data-empty-body.xml
swh/deposit/tests/api/data/atom/entry-data-ko.xml
swh/deposit/tests/api/data/atom/entry-data-minimal.xml
swh/deposit/tests/api/data/atom/entry-data-parsing-error-prone.xml
swh/deposit/tests/api/data/atom/entry-data0.xml
swh/deposit/tests/api/data/atom/entry-data1.xml
swh/deposit/tests/api/data/atom/entry-data2.xml
swh/deposit/tests/api/data/atom/entry-data3.xml
swh/deposit/tests/api/data/atom/entry-update-in-place.xml
swh/deposit/tests/api/data/atom/error-with-decimal.xml
swh/deposit/tests/api/data/atom/metadata.xml
swh/deposit/tests/api/data/atom/tei-sample.xml
swh/deposit/tests/cli/__init__.py
swh/deposit/tests/cli/test_client.py
+swh/deposit/tests/cli/data/atom/codemeta-sample.xml
+swh/deposit/tests/cli/data/atom/entry-data-badly-formatted.xml
+swh/deposit/tests/cli/data/atom/entry-data-deposit-binary.xml
+swh/deposit/tests/cli/data/atom/entry-data-empty-body.xml
+swh/deposit/tests/cli/data/atom/entry-data-ko.xml
+swh/deposit/tests/cli/data/atom/entry-data-minimal.xml
+swh/deposit/tests/cli/data/atom/entry-data-parsing-error-prone.xml
+swh/deposit/tests/cli/data/atom/entry-data0.xml
+swh/deposit/tests/cli/data/atom/entry-data1.xml
+swh/deposit/tests/cli/data/atom/entry-data2.xml
+swh/deposit/tests/cli/data/atom/entry-data3.xml
+swh/deposit/tests/cli/data/atom/entry-update-in-place.xml
+swh/deposit/tests/cli/data/atom/error-with-decimal.xml
+swh/deposit/tests/cli/data/atom/metadata.xml
+swh/deposit/tests/cli/data/atom/tei-sample.xml
swh/deposit/tests/loader/__init__.py
swh/deposit/tests/loader/common.py
swh/deposit/tests/loader/conftest.py
swh/deposit/tests/loader/test_checker.py
swh/deposit/tests/loader/test_client.py
-swh/deposit/tests/loader/test_loader.py
swh/deposit/tests/loader/test_tasks.py
swh/deposit/tests/loader/data/http_example.org/hello.json
swh/deposit/tests/loader/data/http_example.org/hello_you
swh/deposit/tests/loader/data/https_deposit.softwareheritage.org/1_private_test_1_check
swh/deposit/tests/loader/data/https_deposit.softwareheritage.org/1_private_test_2_check
swh/deposit/tests/loader/data/https_deposit.softwareheritage.org/1_private_test_999_meta
swh/deposit/tests/loader/data/https_deposit.softwareheritage.org/1_private_test_999_raw
swh/deposit/tests/loader/data/https_deposit.softwareheritage.org/1_private_test_999_update
swh/deposit/tests/loader/data/https_nowhere.org/1_private_test_1_check
swh/deposit/tests/loader/data/https_nowhere.org/1_private_test_1_metadata
swh/deposit/tests/loader/data/https_nowhere.org/1_private_test_1_raw
\ No newline at end of file
diff --git a/swh.deposit.egg-info/requires.txt b/swh.deposit.egg-info/requires.txt
index c6857e04..79807d04 100644
--- a/swh.deposit.egg-info/requires.txt
+++ b/swh.deposit.egg-info/requires.txt
@@ -1,30 +1,31 @@
vcversioner
click
xmltodict
iso8601
requests
swh.core>=0.0.75
[server]
Django<2.0
djangorestframework
swh.core[http]
swh.loader.tar>=0.0.39
swh.loader.core>=0.0.43
swh.scheduler>=0.0.39
swh.model>=0.0.26
[testing]
pytest
pytest-django
+pytest-mock
swh.scheduler[testing]
pytest-postgresql>=2.1.0
requests_mock
-django-stubs
+django-stubs<1.3.0
Django<2.0
djangorestframework
swh.core[http]
swh.loader.tar>=0.0.39
swh.loader.core>=0.0.43
swh.scheduler>=0.0.39
swh.model>=0.0.26
diff --git a/swh/deposit/api/urls.py b/swh/deposit/api/urls.py
index cc516092..b65f0be0 100644
--- a/swh/deposit/api/urls.py
+++ b/swh/deposit/api/urls.py
@@ -1,57 +1,64 @@
-# Copyright (C) 2017-2018 The Software Heritage developers
+# Copyright (C) 2017-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-"""swh URL Configuration
+"""SWH's deposit api URL Configuration
"""
from django.conf.urls import url
+from django.shortcuts import render
from ..config import EDIT_SE_IRI, EM_IRI, CONT_FILE_IRI
from ..config import SD_IRI, COL_IRI, STATE_IRI
from .deposit import SWHDeposit
from .deposit_status import SWHDepositStatus
from .deposit_update import SWHUpdateMetadataDeposit
from .deposit_update import SWHUpdateArchiveDeposit
from .deposit_content import SWHDepositContent
from .service_document import SWHServiceDocument
-urlpatterns = [
- # PUBLIC API
+def api_view(req):
+ return render(req, "api.html")
+
+
+# PUBLIC API
+urlpatterns = [
+ # simple view on the api
+ url(r'^$', api_view, name='api'),
# SD IRI - Service Document IRI
# -> GET
url(r'^servicedocument/', SWHServiceDocument.as_view(),
name=SD_IRI),
# Col IRI - Collection IRI
# -> POST
url(r'^(?P[^/]+)/$', SWHDeposit.as_view(),
name=COL_IRI),
# EM IRI - Atom Edit Media IRI (update archive IRI)
# -> PUT (update-in-place existing archive)
# -> POST (add new archive)
url(r'^(?P[^/]+)/(?P[^/]+)/media/$',
SWHUpdateArchiveDeposit.as_view(),
name=EM_IRI),
# Edit IRI - Atom Entry Edit IRI (update metadata IRI)
# SE IRI - Sword Edit IRI ;; possibly same as Edit IRI
# -> PUT (update in place)
# -> POST (add new metadata)
url(r'^(?P[^/]+)/(?P[^/]+)/metadata/$',
SWHUpdateMetadataDeposit.as_view(),
name=EDIT_SE_IRI),
# State IRI
# -> GET
url(r'^(?P[^/]+)/(?P[^/]+)/status/$',
SWHDepositStatus.as_view(),
name=STATE_IRI),
# Cont/File IRI
# -> GET
url(r'^(?P[^/]+)/(?P[^/]+)/content/$',
SWHDepositContent.as_view(),
name=CONT_FILE_IRI), # specification is not clear about
# FILE-IRI, we assume it's the same as
# the CONT-IRI one
]
diff --git a/swh/deposit/cli/client.py b/swh/deposit/cli/client.py
index 2962f636..8cc0f609 100644
--- a/swh/deposit/cli/client.py
+++ b/swh/deposit/cli/client.py
@@ -1,394 +1,381 @@
# Copyright (C) 2017-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import os
import logging
+import sys
import tempfile
import uuid
import click
import xmltodict
from swh.deposit.client import PublicApiDepositClient
from swh.deposit.cli import deposit
logger = logging.getLogger(__name__)
class InputError(ValueError):
"""Input script error
"""
pass
def generate_slug():
"""Generate a slug (sample purposes).
"""
return str(uuid.uuid4())
def _url(url):
"""Force the /1 api version at the end of the url (avoiding confusing
issues without it).
Args:
url (str): api url used by cli users
Returns:
Top level api url to actually request
"""
if not url.endswith('/1'):
url = '%s/1' % url
return url
-def generate_metadata_file(name, external_id, authors):
+def generate_metadata_file(name, external_id, authors, temp_dir):
"""Generate a temporary metadata file with the minimum required metadata
This generates a xml file in a temporary location and returns the
path to that file.
This is up to the client of that function to clean up the
temporary file.
Args:
name (str): Software's name
external_id (str): External identifier (slug) or generated one
authors (List[str]): List of author names
Returns:
Filepath to the metadata generated file
"""
- _, tmpfile = tempfile.mkstemp(prefix='swh.deposit.cli.')
-
+ path = os.path.join(temp_dir, 'metadata.xml')
# generate a metadata file with the minimum required metadata
codemetadata = {
'entry': {
'@xmlns': "http://www.w3.org/2005/Atom",
'@xmlns:codemeta': "https://doi.org/10.5063/SCHEMA/CODEMETA-2.0",
'codemeta:name': name,
'codemeta:identifier': external_id,
'codemeta:author': [{
'codemeta:name': author_name
} for author_name in authors],
},
}
- logging.debug('Temporary file: %s', tmpfile)
+ logging.debug('Temporary file: %s', path)
logging.debug('Metadata dict to generate as xml: %s', codemetadata)
s = xmltodict.unparse(codemetadata, pretty=True)
logging.debug('Metadata dict as xml generated: %s', s)
- with open(tmpfile, 'w') as fp:
+ with open(path, 'w') as fp:
fp.write(s)
- return tmpfile
-
-
-def _cleanup_tempfile(config):
- """Clean up the temporary metadata file generated.
-
- Args:
-
- config (Dict): A configuration dict with 2 important keys for
- that routine, 'cleanup_tempfile' (bool) and 'metadata' (path
- to eventually clean up)
-
- """
- if config['cleanup_tempfile']:
- path = config['metadata']
- if os.path.exists(path):
- os.unlink(path)
+ return path
def _client(url, username, password):
"""Instantiate a client to access the deposit api server
Args:
url (str): Deposit api server
username (str): User
password (str): User's password
"""
client = PublicApiDepositClient({
'url': url,
'auth': {
'username': username,
'password': password
},
})
return client
def _collection(client):
"""Retrieve the client's collection
"""
# retrieve user's collection
sd_content = client.service_document()
if 'error' in sd_content:
raise InputError('Service document retrieval: %s' % (
sd_content['error'], ))
collection = sd_content[
'service']['workspace']['collection']['sword:name']
return collection
def client_command_parse_input(
username, password, archive, metadata,
archive_deposit, metadata_deposit,
collection, slug, partial, deposit_id, replace,
- url, name, authors):
+ url, name, authors, temp_dir):
"""Parse the client subcommand options and make sure the combination
is acceptable*. If not, an InputError exception is raised
explaining the issue.
By acceptable, we mean:
- A multipart deposit (create or update) requires:
- an existing software archive
- an existing metadata file or author(s) and name provided in
params
- A binary deposit (create/update) requires an existing software
archive
- A metadata deposit (create/update) requires an existing metadata
file or author(s) and name provided in params
- A deposit update requires a deposit_id
This will not prevent all failure cases though. The remaining
errors are already dealt with by the underlying api client.
Raises:
InputError explaining the issue
Returns:
dict with the following keys:
'archive': the software archive to deposit
'username': username
'password': associated password
'metadata': the metadata file to deposit
'collection': the username's associated client
'slug': the slug or external id identifying the deposit to make
'partial': if the deposit is partial or not
'client': instantiated class
'url': deposit's server main entry point
'deposit_type': deposit's type (binary, multipart, metadata)
'deposit_id': optional deposit identifier
"""
- cleanup_tempfile = False
-
- try:
- if archive_deposit and metadata_deposit:
- # too many flags use, remove redundant ones (-> multipart deposit)
- archive_deposit = False
- metadata_deposit = False
+ if archive_deposit and metadata_deposit:
+ # too many flags use, remove redundant ones (-> multipart deposit)
+ archive_deposit = False
+ metadata_deposit = False
+
+ if not slug: # generate one as this is mandatory
+ slug = generate_slug()
+
+ if not metadata:
+ if name and authors:
+ metadata = generate_metadata_file(name, slug, authors, temp_dir)
+ elif not archive_deposit and not partial and not deposit_id:
+ # If we meet all the following conditions:
+ # * there is not an archive-only deposit
+ # * it is not part of a multipart deposit (either create/update
+ # or finish)
+ # * it misses either name or authors
+ raise InputError(
+ "Either a metadata file (--metadata) or both --author and "
+ "--name must be provided, unless this is an archive-only "
+ "deposit.")
+ elif name or authors:
+ # If we are generating metadata, then all mandatory metadata
+ # must be present
+ raise InputError(
+ "Either a metadata file (--metadata) or both --author and "
+ "--name must be provided.")
+ else:
+ # TODO: this is a multipart deposit, we might want to check that
+ # metadata are deposited at some point
+ pass
+ elif name or authors:
+ raise InputError(
+ "Using a metadata file (--metadata) is incompatible with "
+ "--author and --name, which are used to generate one.")
- if not slug: # generate one as this is mandatory
- slug = generate_slug()
+ if metadata_deposit:
+ archive = None
- if not metadata and name and authors:
- metadata = generate_metadata_file(name, slug, authors)
- cleanup_tempfile = True
+ if archive_deposit:
+ metadata = None
- if metadata_deposit:
- archive = None
+ if metadata_deposit and not metadata:
+ raise InputError(
+ "Metadata deposit must be provided for metadata "
+ "deposit (either a filepath or --name and --author)")
- if archive_deposit:
- metadata = None
+ if not archive and not metadata and partial:
+ raise InputError(
+ 'Please provide an actionable command. See --help for more '
+ 'information')
- if metadata_deposit and not metadata:
- raise InputError(
- "Metadata deposit must be provided for metadata "
- "deposit (either a filepath or --name and --author)")
+ if replace and not deposit_id:
+ raise InputError(
+ 'To update an existing deposit, you must provide its id')
- if not archive and not metadata:
- raise InputError(
- 'Please provide an actionable command. See --help for more '
- 'information')
+ client = _client(url, username, password)
- if replace and not deposit_id:
- raise InputError(
- 'To update an existing deposit, you must provide its id')
-
- client = _client(url, username, password)
-
- if not collection:
- collection = _collection(client)
+ if not collection:
+ collection = _collection(client)
- return {
- 'archive': archive,
- 'username': username,
- 'password': password,
- 'metadata': metadata,
- 'cleanup_tempfile': cleanup_tempfile,
- 'collection': collection,
- 'slug': slug,
- 'in_progress': partial,
- 'client': client,
- 'url': url,
- 'deposit_id': deposit_id,
- 'replace': replace,
- }
- except Exception: # to be clean, cleanup prior to raise
- _cleanup_tempfile({
- 'cleanup_tempfile': cleanup_tempfile,
- 'metadata': metadata
- })
- raise
+ return {
+ 'archive': archive,
+ 'username': username,
+ 'password': password,
+ 'metadata': metadata,
+ 'collection': collection,
+ 'slug': slug,
+ 'in_progress': partial,
+ 'client': client,
+ 'url': url,
+ 'deposit_id': deposit_id,
+ 'replace': replace,
+ }
def _subdict(d, keys):
'return a dict from d with only given keys'
return {k: v for k, v in d.items() if k in keys}
def deposit_create(config, logger):
"""Delegate the actual deposit to the deposit client.
"""
logger.debug('Create deposit')
client = config['client']
keys = ('collection', 'archive', 'metadata', 'slug', 'in_progress')
return client.deposit_create(
**_subdict(config, keys))
def deposit_update(config, logger):
"""Delegate the actual deposit to the deposit client.
"""
logger.debug('Update deposit')
client = config['client']
keys = ('collection', 'deposit_id', 'archive', 'metadata',
'slug', 'in_progress', 'replace')
return client.deposit_update(
**_subdict(config, keys))
@deposit.command()
@click.option('--username', required=True,
help="(Mandatory) User's name")
@click.option('--password', required=True,
help="(Mandatory) User's associated password")
@click.option('--archive', type=click.Path(exists=True),
help='(Optional) Software archive to deposit')
@click.option('--metadata', type=click.Path(exists=True),
help="(Optional) Path to xml metadata file. If not provided, this will use a file named .metadata.xml") # noqa
@click.option('--archive-deposit/--no-archive-deposit', default=False,
help='(Optional) Software archive only deposit')
@click.option('--metadata-deposit/--no-metadata-deposit', default=False,
help='(Optional) Metadata only deposit')
@click.option('--collection',
help="(Optional) User's collection. If not provided, this will be fetched.") # noqa
@click.option('--slug',
help="""(Optional) External system information identifier. If not provided, it will be generated""") # noqa
@click.option('--partial/--no-partial', default=False,
help='(Optional) The deposit will be partial, other deposits will have to take place to finalize it.') # noqa
@click.option('--deposit-id', default=None,
help='(Optional) Update an existing partial deposit with its identifier') # noqa
@click.option('--replace/--no-replace', default=False,
help='(Optional) Update by replacing existing metadata to a deposit') # noqa
@click.option('--url', default='https://deposit.softwareheritage.org',
help="(Optional) Deposit server api endpoint. By default, https://deposit.softwareheritage.org/1") # noqa
@click.option('--verbose/--no-verbose', default=False,
help='Verbose mode')
@click.option('--name',
help='Software name')
@click.option('--author', multiple=True,
help='Software author(s), this can be repeated as many times'
' as there are authors')
@click.pass_context
def upload(ctx,
username, password, archive=None, metadata=None,
archive_deposit=False, metadata_deposit=False,
collection=None, slug=None, partial=False, deposit_id=None,
replace=False, url='https://deposit.softwareheritage.org',
verbose=False, name=None, author=None):
"""Software Heritage Public Deposit Client
Create/Update deposit through the command line.
More documentation can be found at
https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html.
"""
url = _url(url)
config = {}
- try:
- logger.debug('Parsing cli options')
- config = client_command_parse_input(
- username, password, archive, metadata, archive_deposit,
- metadata_deposit, collection, slug, partial, deposit_id,
- replace, url, name, author)
- except InputError as e:
- msg = 'Problem during parsing options: %s' % e
- r = {
- 'error': msg,
- }
- logger.info(r)
- return 1
+ with tempfile.TemporaryDirectory() as temp_dir:
+ try:
+ logger.debug('Parsing cli options')
+ config = client_command_parse_input(
+ username, password, archive, metadata, archive_deposit,
+ metadata_deposit, collection, slug, partial, deposit_id,
+ replace, url, name, author, temp_dir)
+ except InputError as e:
+ logger.error('Problem during parsing options: %s', e)
+ sys.exit(1)
- try:
if verbose:
logger.info("Parsed configuration: %s" % (
config, ))
deposit_id = config['deposit_id']
if deposit_id:
r = deposit_update(config, logger)
else:
r = deposit_create(config, logger)
logger.info(r)
- finally:
- _cleanup_tempfile(config)
-
@deposit.command()
@click.option('--url', default='https://deposit.softwareheritage.org',
help="(Optional) Deposit server api endpoint. By default, "
"https://deposit.softwareheritage.org/1")
@click.option('--username', required=True,
help="(Mandatory) User's name")
@click.option('--password', required=True,
help="(Mandatory) User's associated password")
@click.option('--deposit-id', default=None,
required=True,
help="Deposit identifier.")
@click.pass_context
def status(ctx, url, username, password, deposit_id):
"""Deposit's status
"""
url = _url(url)
logger.debug('Status deposit')
try:
client = _client(url, username, password)
collection = _collection(client)
except InputError as e:
- msg = 'Problem during parsing options: %s' % e
- r = {
- 'error': msg,
- }
- logger.info(r)
- return 1
+ logger.error('Problem during parsing options: %s', e)
+ sys.exit(1)
r = client.deposit_status(
collection=collection, deposit_id=deposit_id)
logger.info(r)
diff --git a/swh/deposit/gunicorn_config.py b/swh/deposit/gunicorn_config.py
new file mode 100644
index 00000000..9c9a5b5c
--- /dev/null
+++ b/swh/deposit/gunicorn_config.py
@@ -0,0 +1,14 @@
+# Copyright (C) 2019 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from sentry_sdk.integrations.django import DjangoIntegration
+
+from swh.core.api.gunicorn_config import * # noqa
+from swh.core.api.gunicorn_config import post_fork as _post_fork
+
+
+def post_fork(server, worker): # type: ignore
+ _post_fork(server, worker,
+ flask=False, sentry_integrations=[DjangoIntegration()])
diff --git a/swh/deposit/loader/loader.py b/swh/deposit/loader/loader.py
deleted file mode 100644
index af12adf0..00000000
--- a/swh/deposit/loader/loader.py
+++ /dev/null
@@ -1,142 +0,0 @@
-# Copyright (C) 2017-2018 The Software Heritage developers
-# See the AUTHORS file at the top-level directory of this distribution
-# License: GNU General Public License version 3, or any later version
-# See top-level LICENSE file for more information
-
-import os
-import tempfile
-
-from swh.model import hashutil
-from swh.loader.tar import loader
-from swh.loader.core.loader import BufferedLoader
-
-from ..client import PrivateApiDepositClient
-
-
-class DepositLoader(loader.LegacyLocalTarLoader):
- """Deposit loader implementation.
-
- This is a subclass of the :class:TarLoader as the main goal of
- this class is to first retrieve the deposit's tarball contents as
- one and its associated metadata. Then provide said tarball to be
- loaded by the TarLoader.
-
- This will:
-
- - retrieves the deposit's archive locally
- - provide the archive to be loaded by the tar loader
- - clean up the temporary location used to retrieve the archive locally
- - update the deposit's status accordingly
-
- """
- CONFIG_BASE_FILENAME = 'loader/deposit'
-
- ADDITIONAL_CONFIG = {
- 'extraction_dir': ('str', '/tmp/swh.deposit.loader/'),
- }
-
- visit_type = 'deposit'
-
- def __init__(self, client=None):
- super().__init__(
- logging_class='swh.deposit.loader.loader.DepositLoader')
- self.deposit_client = client if client else PrivateApiDepositClient()
-
- def load(self, *, archive_url, deposit_meta_url, deposit_update_url):
- return BufferedLoader.load(
- self,
- archive_url=archive_url,
- deposit_meta_url=deposit_meta_url,
- deposit_update_url=deposit_update_url)
-
- def prepare_origin_visit(self, *, deposit_meta_url, **kwargs):
- self.metadata = self.deposit_client.metadata_get(
- deposit_meta_url)
- self.origin = self.metadata['origin']
- self.visit_date = None
-
- def prepare(self, *, archive_url, deposit_meta_url, deposit_update_url):
- """Prepare the loading by first retrieving the deposit's raw archive
- content.
-
- """
- self.deposit_update_url = deposit_update_url
- self.deposit_client.status_update(deposit_update_url, 'loading')
-
- temporary_directory = tempfile.TemporaryDirectory()
- self.temporary_directory = temporary_directory
- archive_path = os.path.join(temporary_directory.name, 'archive.zip')
- archive = self.deposit_client.archive_get(
- archive_url, archive_path)
-
- metadata = self.metadata
- revision = metadata['revision']
- branch_name = metadata['branch_name']
- self.origin_metadata = metadata['origin_metadata']
- self.prepare_metadata()
-
- super().prepare(tar_path=archive,
- origin=self.origin,
- revision=revision,
- branch_name=branch_name)
-
- def store_metadata(self):
- """Storing the origin_metadata during the load processus.
-
- Provider_id and tool_id are resolved during the prepare() method.
-
- """
- visit_date = self.visit_date
- provider_id = self.origin_metadata['provider']['provider_id']
- tool_id = self.origin_metadata['tool']['tool_id']
- metadata = self.origin_metadata['metadata']
- try:
- self.send_origin_metadata(visit_date, provider_id,
- tool_id, metadata)
- except Exception:
- self.log.exception('Problem when storing origin_metadata')
- raise
-
- def post_load(self, success=True):
- """Updating the deposit's status according to its loading status.
-
- If not successful, we update its status to 'failed'.
- Otherwise, we update its status to 'done' and pass along its
- associated revision.
-
- """
- try:
- if not success:
- self.deposit_client.status_update(self.deposit_update_url,
- status='failed')
- return
-
- revisions = self.objects['revision']
- # Retrieve the revision
- [rev_id] = revisions.keys()
- rev = revisions[rev_id]
- if rev_id:
- rev_id = hashutil.hash_to_hex(rev_id)
-
- dir_id = rev['directory']
- if dir_id:
- dir_id = hashutil.hash_to_hex(dir_id)
-
- # update the deposit's status to success with its
- # revision-id and directory-id
- self.deposit_client.status_update(
- self.deposit_update_url,
- status='done',
- revision_id=rev_id,
- directory_id=dir_id,
- origin_url=self.origin['url'])
- except Exception:
- self.log.exception(
- 'Problem when trying to update the deposit\'s status')
-
- def cleanup(self):
- """Clean up temporary directory where we retrieved the tarball.
-
- """
- super().cleanup()
- self.temporary_directory.cleanup()
diff --git a/swh/deposit/loader/tasks.py b/swh/deposit/loader/tasks.py
index b075addb..7bc23248 100644
--- a/swh/deposit/loader/tasks.py
+++ b/swh/deposit/loader/tasks.py
@@ -1,38 +1,18 @@
# Copyright (C) 2015-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from celery import shared_task
-from swh.deposit.loader.loader import DepositLoader
from swh.deposit.loader.checker import DepositChecker
-@shared_task(name=__name__ + '.LoadDepositArchiveTsk')
-def load_deposit(archive_url, deposit_meta_url, deposit_update_url):
- """Deposit archive loading task described by the following steps:
-
- 1. Retrieve tarball from deposit's private api and store
- locally in a temporary directory
- 2. Trigger the loading
- 3. clean up the temporary directory
- 4. Update the deposit's status according to result using the
- deposit's private update status api
-
- """
- loader = DepositLoader()
- return loader.load(
- archive_url=archive_url,
- deposit_meta_url=deposit_meta_url,
- deposit_update_url=deposit_update_url)
-
-
@shared_task(name=__name__ + '.ChecksDepositTsk')
def check_deposit(deposit_check_url):
"""Check a deposit's status
Args: see :func:`DepositChecker.check`.
"""
checker = DepositChecker()
return checker.check(deposit_check_url)
diff --git a/swh/deposit/signals.py b/swh/deposit/signals.py
index d3396de7..fa43da00 100644
--- a/swh/deposit/signals.py
+++ b/swh/deposit/signals.py
@@ -1,119 +1,104 @@
# Copyright (C) 2017-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
"""Module in charge of defining some uncoupled actions on deposit.
Typically, checking that the archives deposited are ok are not
directly testing in the request/answer to avoid too long
computations.
So this is done in the deposit_on_status_ready_for_check callback.
"""
from swh.deposit import utils
from django.db.models.signals import post_save
from django.dispatch import receiver
from .models import Deposit
from .config import SWHDefaultConfig, DEPOSIT_STATUS_VERIFIED
from .config import DEPOSIT_STATUS_DEPOSITED
def schedule_task(scheduler, task):
"""Schedule the task and return its identifier
Args:
task (dict): Task to schedule
Returns:
The task identifier
"""
tasks = scheduler.create_tasks([task])
if tasks:
created_task = tasks[0]
return created_task['id']
@receiver(post_save, sender=Deposit)
def post_deposit_save(sender, instance, created, raw, using,
update_fields, **kwargs):
"""When a deposit is saved, check for the deposit's status change and
schedule actions accordingly.
When the status passes to deposited, schedule checks.
When the status pass to ready, schedule loading. Otherwise, do
nothing.
Args:
sender (Deposit): The model class
instance (Deposit): The actual instance being saved
created (bool): True if a new record was created
raw (bool): True if the model is saved exactly as presented
(i.e. when loading a fixture). One should not
query/modify other records in the database as the
database might not be in a consistent state yet
using: The database alias being used
update_fields: The set of fields to update as passed to
Model.save(), or None if update_fields wasn’t
passed to save()
"""
default_config = SWHDefaultConfig()
if not default_config.config['checks']:
return
if instance.status not in {DEPOSIT_STATUS_DEPOSITED,
DEPOSIT_STATUS_VERIFIED}:
return
from django.urls import reverse
from swh.scheduler.utils import create_oneshot_task_dict
args = [instance.collection.name, instance.id]
# In the following, we are checking the instance.*task_id are not already
# populated because the `instance.save()` call will also trigger a call to
# that very function.
if (instance.status == DEPOSIT_STATUS_DEPOSITED and
not instance.check_task_id):
# schedule deposit's checks
from swh.deposit.config import PRIVATE_CHECK_DEPOSIT
check_url = reverse(PRIVATE_CHECK_DEPOSIT, args=args)
task = create_oneshot_task_dict('check-deposit',
deposit_check_url=check_url)
check_task_id = schedule_task(default_config.scheduler, task)
instance.check_task_id = check_task_id
instance.save()
elif (instance.status == DEPOSIT_STATUS_VERIFIED and
not instance.load_task_id):
- version = default_config.config.get('loader-version', 1)
- # schedule deposit loading
- if int(version) == 1: # default version
- from swh.deposit.config import PRIVATE_GET_RAW_CONTENT
- from swh.deposit.config import PRIVATE_GET_DEPOSIT_METADATA
- from swh.deposit.config import PRIVATE_PUT_DEPOSIT
- archive_url = reverse(PRIVATE_GET_RAW_CONTENT, args=args)
- meta_url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=args)
- update_url = reverse(PRIVATE_PUT_DEPOSIT, args=args)
-
- task = create_oneshot_task_dict('load-deposit',
- archive_url=archive_url,
- deposit_meta_url=meta_url,
- deposit_update_url=update_url)
- else: # new version
- url = utils.origin_url_from(instance)
- task = create_oneshot_task_dict(
- 'load-deposit',
- url=url, deposit_id=instance.id)
+ url = utils.origin_url_from(instance)
+ task = create_oneshot_task_dict(
+ 'load-deposit',
+ url=url, deposit_id=instance.id)
load_task_id = schedule_task(default_config.scheduler, task)
instance.load_task_id = load_task_id
instance.save()
diff --git a/swh/deposit/templates/api.html b/swh/deposit/templates/api.html
new file mode 100644
index 00000000..77dd1773
--- /dev/null
+++ b/swh/deposit/templates/api.html
@@ -0,0 +1,17 @@
+{% extends "layout.html" %}
+{% load static %}
+{% block title %}The Software Heritage Deposit Api{% endblock %}
+
+{% block content %}
+
+Documentation
+
+
+
+
+
+{% endblock %}
diff --git a/swh/deposit/templates/homepage.html b/swh/deposit/templates/homepage.html
index 7d6c3316..063966b1 100644
--- a/swh/deposit/templates/homepage.html
+++ b/swh/deposit/templates/homepage.html
@@ -1,45 +1,42 @@
{% extends "layout.html" %}
{% load static %}
{% block title %}The Software Heritage Deposit{% endblock %}
{% block content %}
Welcome to the Software Heritage deposit
Overview
The long term goal of the Software Heritage initiative is
to collect all publicly available software in source
code form together with its development history, replicate it
massively to ensure its preservation,
and share it with everyone who needs it.
What's the deposit?
This is Software Heritage's
SWORD
2.0 Server implementation.
S.W.O.R.D (Simple Web-Service Offering Repository
Deposit) is an interoperability standard for digital
file deposit.
-This implementation will permit interaction between a client (a
-repository, e.g. hal) and a server
-(SWH repository) to
-permit deposits of software source code archives with associated
-metadata.
-
+This implementation allows a client (a repository, e.g. hal) to deposit
+software source archives and its associated metadata to the (SWH archive).
Documentation
{% endblock %}
diff --git a/swh/deposit/tests/__init__.py b/swh/deposit/tests/__init__.py
index f8ec3db7..2b34b26e 100644
--- a/swh/deposit/tests/__init__.py
+++ b/swh/deposit/tests/__init__.py
@@ -1,73 +1,42 @@
-# Copyright (C) 2017 The Software Heritage developers
+# Copyright (C) 2017-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from swh.deposit.config import setup_django_for
from swh.deposit.config import SWHDefaultConfig # noqa
-from swh.loader.core.loader import BufferedLoader
-
TEST_CONFIG = {
'max_upload_size': 500,
'extraction_dir': '/tmp/swh-deposit/test/extraction-dir',
'checks': False,
'provider': {
'provider_name': '',
'provider_type': 'deposit_client',
'provider_url': '',
'metadata': {
}
},
'tool': {
'name': 'swh-deposit',
'version': '0.0.1',
'configuration': {
'sword_version': '2'
}
}
}
def parse_deposit_config_file(base_filename=None, config_filename=None,
additional_configs=None, global_config=True):
return TEST_CONFIG
-TEST_LOADER_CONFIG = {
- 'extraction_dir': '/tmp/swh-loader-tar/test/',
- 'working_dir': '/tmp/swh-loader-tar/test/working-dir',
- 'debug': False,
- 'storage': {
- 'cls': 'memory',
- 'args': {
- }
- },
- 'send_contents': True,
- 'send_directories': True,
- 'send_revisions': True,
- 'send_releases': True,
- 'send_snapshot': True,
-
- 'content_size_limit': 100 * 1024 * 1024,
- 'content_packet_size': 10,
- 'content_packet_size_bytes': 100 * 1024 * 1024,
- 'directory_packet_size': 10,
- 'revision_packet_size': 10,
- 'release_packet_size': 10,
-}
-
-
-def parse_loader_config_file(base_filename=None, config_filename=None,
- additional_configs=None, global_config=True):
- return TEST_LOADER_CONFIG
-
-
# monkey patch classes method permits to override, for tests purposes,
# the default configuration without side-effect, i.e do not load the
# configuration from disk
SWHDefaultConfig.parse_config_file = parse_deposit_config_file # type: ignore
-BufferedLoader.parse_config_file = parse_loader_config_file # type: ignore
+
setup_django_for('testing')
diff --git a/swh/deposit/tests/api/conftest.py b/swh/deposit/tests/api/conftest.py
index 20819c52..b348b291 100644
--- a/swh/deposit/tests/api/conftest.py
+++ b/swh/deposit/tests/api/conftest.py
@@ -1,108 +1,84 @@
# Copyright (C) 2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import hashlib
import pytest
from django.urls import reverse
-from os import path, listdir
-from typing import Mapping
from swh.deposit.config import (
DEPOSIT_STATUS_DEPOSITED, COL_IRI, DEPOSIT_STATUS_VERIFIED
)
from swh.deposit.models import Deposit
from swh.deposit.parsers import parse_xml
from swh.deposit.api.private.deposit_check import SWHChecksDeposit
-@pytest.fixture
-def atom_dataset(datadir) -> Mapping[str, bytes]:
- """Compute the paths to atom files.
-
- Returns:
- Dict of atom name per content (bytes)
-
- """
- atom_path = path.join(datadir, 'atom')
- data = {}
- for filename in listdir(atom_path):
- filepath = path.join(atom_path, filename)
- with open(filepath, 'rb') as f:
- raw_content = f.read()
-
- # Keep the filename without extension
- atom_name = filename.split('.')[0]
- data[atom_name] = raw_content
-
- return data
-
-
@pytest.fixture
def ready_deposit_ok(partial_deposit_with_metadata):
"""Returns a deposit ready for checks (it will pass the checks).
"""
deposit = partial_deposit_with_metadata
deposit.status = DEPOSIT_STATUS_DEPOSITED
deposit.save()
return deposit
@pytest.fixture
def ready_deposit_verified(partial_deposit_with_metadata):
"""Returns a deposit ready for checks (it will pass the checks).
"""
deposit = partial_deposit_with_metadata
deposit.status = DEPOSIT_STATUS_VERIFIED
deposit.save()
return deposit
@pytest.fixture
def ready_deposit_only_metadata(partial_deposit_only_metadata):
"""Deposit in status ready that will fail the checks (because missing
archive).
"""
deposit = partial_deposit_only_metadata
deposit.status = DEPOSIT_STATUS_DEPOSITED
deposit.save()
return deposit
@pytest.fixture
def ready_deposit_invalid_archive(authenticated_client, deposit_collection):
url = reverse(COL_IRI, args=[deposit_collection.name])
data = b'some data which is clearly not a zip file'
md5sum = hashlib.md5(data).hexdigest()
# when
response = authenticated_client.post(
url,
content_type='application/zip', # as zip
data=data,
# + headers
CONTENT_LENGTH=len(data),
# other headers needs HTTP_ prefix to be taken into account
HTTP_SLUG='external-id-invalid',
HTTP_CONTENT_MD5=md5sum,
HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip',
HTTP_CONTENT_DISPOSITION='attachment; filename=filename0')
response_content = parse_xml(response.content)
deposit_id = int(response_content['deposit_id'])
deposit = Deposit.objects.get(pk=deposit_id)
deposit.status = DEPOSIT_STATUS_DEPOSITED
deposit.save()
return deposit
@pytest.fixture
def swh_checks_deposit():
return SWHChecksDeposit()
diff --git a/swh/deposit/tests/cli/data/atom/codemeta-sample.xml b/swh/deposit/tests/cli/data/atom/codemeta-sample.xml
new file mode 100644
index 00000000..d804eff5
--- /dev/null
+++ b/swh/deposit/tests/cli/data/atom/codemeta-sample.xml
@@ -0,0 +1,51 @@
+
+
+ %s
+ hal-01587361
+ https://hal.inria.fr/hal-01587361
+ https://hal.inria.fr/hal-01587361/document
+ https://hal.inria.fr/hal-01587361/file/AffectationRO-v1.0.0.zip
+ doi:10.5281/zenodo.438684
+ The assignment problem
+ AffectationRO
+ Gruenpeter, Morane
+ [INFO] Computer Science [cs]
+ [INFO.INFO-RO] Computer Science [cs]/Operations Research [cs.RO]
+ SOFTWARE
+ Project in OR: The assignment problemA java implementation for the assignment problem first release
+ description fr
+ 2015-06-01
+ 2017-10-19
+ en
+
+
+ url stable
+ Version sur hal
+ Version entre par lutilisateur
+ Mots-cls
+ Commentaire
+ Rfrence interne
+
+ Collaboration/Projet
+ nom du projet
+ id
+
+ Voir aussi
+ Financement
+ Projet ANR
+ Projet Europen
+ Platform/OS
+ Dpendances
+ Etat du dveloppement
+
+ license
+ url spdx
+
+ Outils de dveloppement- outil no1
+ Outils de dveloppement- outil no2
+ http://code.com
+ language 1
+ language 2
+
diff --git a/swh/deposit/tests/cli/data/atom/entry-data-badly-formatted.xml b/swh/deposit/tests/cli/data/atom/entry-data-badly-formatted.xml
new file mode 100644
index 00000000..25a417fb
--- /dev/null
+++ b/swh/deposit/tests/cli/data/atom/entry-data-badly-formatted.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/swh/deposit/tests/cli/data/atom/entry-data-deposit-binary.xml b/swh/deposit/tests/cli/data/atom/entry-data-deposit-binary.xml
new file mode 100644
index 00000000..65b7f63b
--- /dev/null
+++ b/swh/deposit/tests/cli/data/atom/entry-data-deposit-binary.xml
@@ -0,0 +1,29 @@
+
+
+ Title
+ urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
+ 2005-10-07T17:17:08Z
+ Contributor
+ The abstract
+
+
+ The abstract
+ Access Rights
+ Alternative Title
+ Date Available
+ Bibliographic Citation # noqa
+ Contributor
+ Description
+ Has Part
+ Has Version
+ Identifier
+ Is Part Of
+ Publisher
+ References
+ Rights Holder
+ Source
+ Title
+ Type
+
+
diff --git a/swh/deposit/tests/cli/data/atom/entry-data-empty-body.xml b/swh/deposit/tests/cli/data/atom/entry-data-empty-body.xml
new file mode 100644
index 00000000..e4caf44f
--- /dev/null
+++ b/swh/deposit/tests/cli/data/atom/entry-data-empty-body.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/swh/deposit/tests/cli/data/atom/entry-data-ko.xml b/swh/deposit/tests/cli/data/atom/entry-data-ko.xml
new file mode 100644
index 00000000..3f5d8802
--- /dev/null
+++ b/swh/deposit/tests/cli/data/atom/entry-data-ko.xml
@@ -0,0 +1,6 @@
+
+
+
+ urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
+
diff --git a/swh/deposit/tests/cli/data/atom/entry-data-minimal.xml b/swh/deposit/tests/cli/data/atom/entry-data-minimal.xml
new file mode 100644
index 00000000..9432ac0e
--- /dev/null
+++ b/swh/deposit/tests/cli/data/atom/entry-data-minimal.xml
@@ -0,0 +1,4 @@
+
+
+ %s
+
diff --git a/swh/deposit/tests/cli/data/atom/entry-data-parsing-error-prone.xml b/swh/deposit/tests/cli/data/atom/entry-data-parsing-error-prone.xml
new file mode 100644
index 00000000..34710195
--- /dev/null
+++ b/swh/deposit/tests/cli/data/atom/entry-data-parsing-error-prone.xml
@@ -0,0 +1,5 @@
+
+
+ Composing a Web of Audio Applications
+
+
diff --git a/swh/deposit/tests/cli/data/atom/entry-data0.xml b/swh/deposit/tests/cli/data/atom/entry-data0.xml
new file mode 100644
index 00000000..2b0ccc00
--- /dev/null
+++ b/swh/deposit/tests/cli/data/atom/entry-data0.xml
@@ -0,0 +1,26 @@
+
+
+ Awesome Compiler
+ hal
+ urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
+ %s
+ 2017-10-07T15:17:08Z
+ some awesome author
+ something
+ awesome-compiler
+ This is an awesome compiler destined to
+awesomely compile stuff
+and other stuff
+ compiler,programming,language
+ 2005-10-07T17:17:08Z
+ 2005-10-07T17:17:08Z
+ release note
+ related link
+
+ Awesome
+ https://hoster.org/awesome-compiler
+ GNU/Linux
+ 0.0.1
+ running
+ all
+
diff --git a/swh/deposit/tests/cli/data/atom/entry-data1.xml b/swh/deposit/tests/cli/data/atom/entry-data1.xml
new file mode 100644
index 00000000..e4f415c7
--- /dev/null
+++ b/swh/deposit/tests/cli/data/atom/entry-data1.xml
@@ -0,0 +1,24 @@
+
+
+ hal
+ urn:uuid:2225c695-cfb8-4ebb-aaaa-80da344efa6a
+ 2017-10-07T15:17:08Z
+ some awesome author
+ something
+ awesome-compiler
+ This is an awesome compiler destined to
+awesomely compile stuff
+and other stuff
+ compiler,programming,language
+ 2005-10-07T17:17:08Z
+ 2005-10-07T17:17:08Z
+ release note
+ related link
+
+ Awesome
+ https://hoster.org/awesome-compiler
+ GNU/Linux
+ 0.0.1
+ running
+ all
+
diff --git a/swh/deposit/tests/cli/data/atom/entry-data2.xml b/swh/deposit/tests/cli/data/atom/entry-data2.xml
new file mode 100644
index 00000000..73cfafeb
--- /dev/null
+++ b/swh/deposit/tests/cli/data/atom/entry-data2.xml
@@ -0,0 +1,6 @@
+
+
+ some-external-id
+ https://hal-test.archives-ouvertes.fr/some-external-id
+ some awesome author
+
diff --git a/swh/deposit/tests/cli/data/atom/entry-data3.xml b/swh/deposit/tests/cli/data/atom/entry-data3.xml
new file mode 100644
index 00000000..c75d9739
--- /dev/null
+++ b/swh/deposit/tests/cli/data/atom/entry-data3.xml
@@ -0,0 +1,6 @@
+
+
+ another one
+ no one
+ 2017-10-07T15:17:08Z
+
diff --git a/swh/deposit/tests/cli/data/atom/entry-update-in-place.xml b/swh/deposit/tests/cli/data/atom/entry-update-in-place.xml
new file mode 100644
index 00000000..1a7d7bbb
--- /dev/null
+++ b/swh/deposit/tests/cli/data/atom/entry-update-in-place.xml
@@ -0,0 +1,7 @@
+
+
+ urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa7b
+ Title
+ Type
+
diff --git a/swh/deposit/tests/cli/data/atom/error-with-decimal.xml b/swh/deposit/tests/cli/data/atom/error-with-decimal.xml
new file mode 100644
index 00000000..be002442
--- /dev/null
+++ b/swh/deposit/tests/cli/data/atom/error-with-decimal.xml
@@ -0,0 +1,38 @@
+
+
+ Composing a Web of Audio Applications
+ hal
+ hal-01243065
+ hal-01243065
+ https://hal-test.archives-ouvertes.fr/hal-01243065
+ test
+
+
+ DSP programming,Web,Composability,Faust
+ 2017-05-03T16:08:47+02:00
+ The Web offers a great opportunity to share, deploy and use programs without installation difficulties. In this article we explore the idea of freely combining/composing real-time audio applications deployed on the Web using Faust audio DSP language.
+ 1
+ 10.4
+ phpstorm
+ stable
+
+ linux
+ php
+ python
+ C
+
+ GNU General Public License v3.0 only
+
+
+ CeCILL Free Software License Agreement v1.1
+
+
+ HAL
+ hal@ccsd.cnrs.fr
+
+
+ Someone Nice
+ someone@nice.fr
+ FFJ
+
+
diff --git a/swh/deposit/tests/cli/data/atom/metadata.xml b/swh/deposit/tests/cli/data/atom/metadata.xml
new file mode 100644
index 00000000..65f58543
--- /dev/null
+++ b/swh/deposit/tests/cli/data/atom/metadata.xml
@@ -0,0 +1,32 @@
+
+
+ Composing a Web of Audio Applications
+ hal
+ hal-01243065
+ hal-01243065
+ https://hal-test.archives-ouvertes.fr/hal-01243065
+ test
+ DSP programming
+ this is the description
+ 1
+ phpstorm
+ stable
+ php
+ python
+ C
+
+ GNU General Public License v3.0 only
+
+
+ CeCILL Free Software License Agreement v1.1
+
+
+ HAL
+ hal@ccsd.cnrs.fr
+
+
+ Morane Gruenpeter
+
+%s
+
diff --git a/swh/deposit/tests/cli/data/atom/tei-sample.xml b/swh/deposit/tests/cli/data/atom/tei-sample.xml
new file mode 100644
index 00000000..cf2266af
--- /dev/null
+++ b/swh/deposit/tests/cli/data/atom/tei-sample.xml
@@ -0,0 +1 @@
+HAL TEI export of hal-01587083CCSDDistributed under a Creative Commons Attribution 4.0 International LicenseHAL API platform
questionnaire software metadataMoraneGruenpeter7de56c632362954fa84172cad80afe4einria.fr1556733MoraneGruenpeterf85a43a5fb4a2e0778a77e017f28c8fdgmail.com2017-09-29 11:21:322017-10-03 17:20:132017-10-03 17:20:132017-09-292017-09-29contributorMoraneGruenpeterf85a43a5fb4a2e0778a77e017f28c8fdgmail.comCCSDhal-01587083https://hal.inria.fr/hal-01587083gruenpeter:hal-0158708320172017questionnaire software metadataMoraneGruenpeter7de56c632362954fa84172cad80afe4einria.fr1556733EnglishComputer Science [cs]SoftwareIRILLInitiative pour la Recherche et l'Innovation sur le Logiciel Libre[https://www.irill.org/]Universite Pierre et Marie Curie - Paris 6UPMC4 place Jussieu - 75005 Paris[http://www.upmc.fr/]Institut National de Recherche en Informatique et en AutomatiqueInriaDomaine de VoluceauRocquencourt - BP 10578153 Le Chesnay Cedex[http://www.inria.fr/en/]Universite Paris Diderot - Paris 7UPD75 rue Thomas-Mann - 75205 Paris cedex 13[http://www.univ-paris-diderot.fr]
diff --git a/swh/deposit/tests/cli/test_client.py b/swh/deposit/tests/cli/test_client.py
index 066f63d9..656ebf69 100644
--- a/swh/deposit/tests/cli/test_client.py
+++ b/swh/deposit/tests/cli/test_client.py
@@ -1,49 +1,323 @@
# Copyright (C) 2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-import pytest
-
+import contextlib
+import logging
+import os
+import re
from unittest.mock import MagicMock
+from click.testing import CliRunner
+import pytest
+
from swh.deposit.client import PublicApiDepositClient
-from swh.deposit.cli.client import _url, _client, _collection, InputError
+from swh.deposit.cli.client import (
+ generate_slug, _url, _client, _collection, InputError)
+from swh.deposit.cli import deposit as cli
+from ..conftest import TEST_USER
+
+
+EXAMPLE_SERVICE_DOCUMENT = {
+ 'service': {
+ 'workspace': {
+ 'collection': {
+ 'sword:name': 'softcol',
+ }
+ }
+ }
+}
+
+
+@pytest.fixture
+def slug():
+ return generate_slug()
+
+
+@pytest.fixture
+def client_mock(mocker, slug):
+ mocker.patch('swh.deposit.cli.client.generate_slug', return_value=slug)
+ mock_client = MagicMock()
+ mocker.patch(
+ 'swh.deposit.cli.client._client',
+ return_value=mock_client)
+ mock_client.service_document.return_value = EXAMPLE_SERVICE_DOCUMENT
+ mock_client.deposit_create.return_value = '{"foo": "bar"}'
+ return mock_client
def test_url():
assert _url('http://deposit') == 'http://deposit/1'
assert _url('https://other/1') == 'https://other/1'
def test_client():
client = _client('http://deposit', 'user', 'pass')
assert isinstance(client, PublicApiDepositClient)
def test_collection_error():
mock_client = MagicMock()
mock_client.service_document.return_value = {
'error': 'something went wrong'
}
with pytest.raises(InputError) as e:
_collection(mock_client)
assert 'Service document retrieval: something went wrong' == str(e.value)
def test_collection_ok():
mock_client = MagicMock()
- mock_client.service_document.return_value = {
- 'service': {
- 'workspace': {
- 'collection': {
- 'sword:name': 'softcol',
- }
- }
- }
- }
+ mock_client.service_document.return_value = EXAMPLE_SERVICE_DOCUMENT
collection_name = _collection(mock_client)
assert collection_name == 'softcol'
+
+
+def test_single_minimal_deposit(
+ sample_archive, mocker, caplog, client_mock, slug, tmp_path):
+ """ from:
+ https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html#single-deposit
+ """ # noqa
+
+ metadata_path = os.path.join(tmp_path, 'metadata.xml')
+ mocker.patch('swh.deposit.cli.client.tempfile.TemporaryDirectory',
+ return_value=contextlib.nullcontext(str(tmp_path)))
+
+ runner = CliRunner()
+ result = runner.invoke(cli, [
+ 'upload',
+ '--url', 'mock://deposit.swh/1',
+ '--username', TEST_USER['username'],
+ '--password', TEST_USER['password'],
+ '--name', 'test-project',
+ '--archive', sample_archive['path'],
+ '--author', 'Jane Doe',
+ ])
+
+ assert result.exit_code == 0, result.output
+ assert result.output == ''
+ assert caplog.record_tuples == [
+ ('swh.deposit.cli.client', logging.INFO, '{"foo": "bar"}'),
+ ]
+
+ client_mock.deposit_create.assert_called_once_with(
+ archive=sample_archive['path'],
+ collection='softcol', in_progress=False, metadata=metadata_path,
+ slug=slug)
+
+ with open(metadata_path) as fd:
+ assert fd.read() == f'''\
+
+
+\ttest-project
+\t{slug}
+\t
+\t\tJane Doe
+\t
+'''
+
+
+def test_metadata_validation(sample_archive, mocker, caplog, tmp_path):
+ """ from:
+ https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html#single-deposit
+ """ # noqa
+ slug = generate_slug()
+ mocker.patch('swh.deposit.cli.client.generate_slug', return_value=slug)
+ mock_client = MagicMock()
+ mocker.patch(
+ 'swh.deposit.cli.client._client',
+ return_value=mock_client)
+ mock_client.service_document.return_value = EXAMPLE_SERVICE_DOCUMENT
+ mock_client.deposit_create.return_value = '{"foo": "bar"}'
+
+ metadata_path = os.path.join(tmp_path, 'metadata.xml')
+ mocker.patch('swh.deposit.cli.client.tempfile.TemporaryDirectory',
+ return_value=contextlib.nullcontext(str(tmp_path)))
+ with open(metadata_path, 'a'):
+ pass # creates the file
+
+ runner = CliRunner()
+
+ # Test missing author
+ result = runner.invoke(cli, [
+ 'upload',
+ '--url', 'mock://deposit.swh/1',
+ '--username', TEST_USER['username'],
+ '--password', TEST_USER['password'],
+ '--name', 'test-project',
+ '--archive', sample_archive['path'],
+ ])
+
+ assert result.exit_code == 1, result.output
+ assert result.output == ''
+ assert len(caplog.record_tuples) == 1
+ (_logger, level, message) = caplog.record_tuples[0]
+ assert level == logging.ERROR
+ assert ' --author ' in message
+
+ # Clear mocking state
+ caplog.clear()
+ mock_client.reset_mock()
+
+ # Test missing name
+ result = runner.invoke(cli, [
+ 'upload',
+ '--url', 'mock://deposit.swh/1',
+ '--username', TEST_USER['username'],
+ '--password', TEST_USER['password'],
+ '--archive', sample_archive['path'],
+ '--author', 'Jane Doe',
+ ])
+
+ assert result.exit_code == 1, result.output
+ assert result.output == ''
+ assert len(caplog.record_tuples) == 1
+ (_logger, level, message) = caplog.record_tuples[0]
+ assert level == logging.ERROR
+ assert ' --name ' in message
+
+ # Clear mocking state
+ caplog.clear()
+ mock_client.reset_mock()
+
+ # Test both --metadata and --author
+ result = runner.invoke(cli, [
+ 'upload',
+ '--url', 'mock://deposit.swh/1',
+ '--username', TEST_USER['username'],
+ '--password', TEST_USER['password'],
+ '--archive', sample_archive['path'],
+ '--metadata', metadata_path,
+ '--author', 'Jane Doe',
+ ])
+
+ assert result.exit_code == 1, result.output
+ assert result.output == ''
+ assert len(caplog.record_tuples) == 1
+ (_logger, level, message) = caplog.record_tuples[0]
+ assert level == logging.ERROR
+ assert re.search('--metadata.*is incompatible with', message)
+
+ # Clear mocking state
+ caplog.clear()
+ mock_client.reset_mock()
+
+
+def test_single_deposit_slug_generation(
+ sample_archive, mocker, caplog, tmp_path, client_mock):
+ """ from:
+ https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html#single-deposit
+ """ # noqa
+ slug = 'my-slug'
+ collection = 'my-collection'
+
+ metadata_path = os.path.join(tmp_path, 'metadata.xml')
+ mocker.patch('swh.deposit.cli.client.tempfile.TemporaryDirectory',
+ return_value=contextlib.nullcontext(str(tmp_path)))
+
+ runner = CliRunner()
+ result = runner.invoke(cli, [
+ 'upload',
+ '--url', 'mock://deposit.swh/1',
+ '--username', TEST_USER['username'],
+ '--password', TEST_USER['password'],
+ '--name', 'test-project',
+ '--archive', sample_archive['path'],
+ '--slug', slug,
+ '--collection', collection,
+ '--author', 'Jane Doe',
+ ])
+
+ assert result.exit_code == 0, result.output
+ assert result.output == ''
+ assert caplog.record_tuples == [
+ ('swh.deposit.cli.client', logging.INFO, '{"foo": "bar"}'),
+ ]
+
+ client_mock.deposit_create.assert_called_once_with(
+ archive=sample_archive['path'],
+ collection=collection, in_progress=False, metadata=metadata_path,
+ slug=slug)
+
+ with open(metadata_path) as fd:
+ assert fd.read() == '''\
+
+
+\ttest-project
+\tmy-slug
+\t
+\t\tJane Doe
+\t
+'''
+
+
+def test_multisteps_deposit(
+ sample_archive, atom_dataset, mocker, caplog, datadir,
+ client_mock, slug):
+ """ from:
+ https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html#multisteps-deposit
+ """ # noqa
+ slug = generate_slug()
+ mocker.patch('swh.deposit.cli.client.generate_slug', return_value=slug)
+
+ # https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html#create-an-incomplete-deposit
+ client_mock.deposit_create.return_value = '{"deposit_id": "42"}'
+
+ runner = CliRunner()
+ result = runner.invoke(cli, [
+ 'upload',
+ '--url', 'mock://deposit.swh/1',
+ '--username', TEST_USER['username'],
+ '--password', TEST_USER['password'],
+ '--archive', sample_archive['path'],
+ '--partial',
+ ])
+
+ assert result.exit_code == 0, result.output
+ assert result.output == ''
+ assert caplog.record_tuples == [
+ ('swh.deposit.cli.client', logging.INFO, '{"deposit_id": "42"}'),
+ ]
+
+ client_mock.deposit_create.assert_called_once_with(
+ archive=sample_archive['path'],
+ collection='softcol', in_progress=True, metadata=None,
+ slug=slug)
+
+ # Clear mocking state
+ caplog.clear()
+ client_mock.reset_mock()
+
+ # https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html#add-content-or-metadata-to-the-deposit
+
+ metadata_path = os.path.join(
+ datadir, 'atom', 'entry-data-deposit-binary.xml')
+
+ result = runner.invoke(cli, [
+ 'upload',
+ '--url', 'mock://deposit.swh/1',
+ '--username', TEST_USER['username'],
+ '--password', TEST_USER['password'],
+ '--metadata', metadata_path,
+ ])
+
+ assert result.exit_code == 0, result.output
+ assert result.output == ''
+ assert caplog.record_tuples == [
+ ('swh.deposit.cli.client', logging.INFO, '{"deposit_id": "42"}'),
+ ]
+
+ client_mock.deposit_create.assert_called_once_with(
+ archive=None,
+ collection='softcol', in_progress=False, metadata=metadata_path,
+ slug=slug)
+
+ # Clear mocking state
+ caplog.clear()
+ client_mock.reset_mock()
diff --git a/swh/deposit/tests/conftest.py b/swh/deposit/tests/conftest.py
index 725bc20a..e95fad54 100644
--- a/swh/deposit/tests/conftest.py
+++ b/swh/deposit/tests/conftest.py
@@ -1,316 +1,338 @@
# Copyright (C) 2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import os
import base64
import pytest
import psycopg2
from django.urls import reverse
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT
from rest_framework import status
from rest_framework.test import APIClient
from typing import Mapping
from swh.scheduler.tests.conftest import * # noqa
from swh.deposit.parsers import parse_xml
from swh.deposit.config import (
COL_IRI, EDIT_SE_IRI, DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_REJECTED,
DEPOSIT_STATUS_PARTIAL, DEPOSIT_STATUS_LOAD_SUCCESS,
DEPOSIT_STATUS_VERIFIED, DEPOSIT_STATUS_LOAD_FAILURE
)
from swh.deposit.tests.common import create_arborescence_archive
TEST_USER = {
'username': 'test',
'password': 'password',
'email': 'test@example.org',
'provider_url': 'https://hal-test.archives-ouvertes.fr/',
'domain': 'archives-ouvertes.fr/',
'collection': {
'name': 'test'
},
}
def execute_sql(sql):
"""Execute sql to postgres db"""
with psycopg2.connect(database='postgres') as conn:
conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
cur = conn.cursor()
cur.execute(sql)
@pytest.hookimpl(tryfirst=True)
def pytest_load_initial_conftests(early_config, parser, args):
"""This hook is done prior to django loading.
Used to initialize the deposit's server db.
"""
import project.app.signals # type: ignore
def prepare_db(*args, **kwargs):
from django.conf import settings
db_name = 'tests'
# work around db settings for django
for k, v in [
('ENGINE', 'django.db.backends.postgresql'),
('NAME', 'tests'),
('USER', postgresql_proc.user), # noqa
('HOST', postgresql_proc.host), # noqa
('PORT', postgresql_proc.port), # noqa
]:
settings.DATABASES['default'][k] = v
execute_sql('DROP DATABASE IF EXISTS %s' % db_name)
execute_sql('CREATE DATABASE %s TEMPLATE template0' % db_name)
project.app.signals.something = prepare_db
@pytest.fixture(autouse=True, scope='session')
def swh_proxy():
"""Automatically inject this fixture in all tests to ensure no outside
connection takes place.
"""
os.environ['http_proxy'] = 'http://localhost:999'
os.environ['https_proxy'] = 'http://localhost:999'
def create_deposit_collection(collection_name: str):
"""Create a deposit collection with name collection_name
"""
from swh.deposit.models import DepositCollection
try:
collection = DepositCollection._default_manager.get(
name=collection_name)
except DepositCollection.DoesNotExist:
collection = DepositCollection(name=collection_name)
collection.save()
return collection
def deposit_collection_factory(
collection_name=TEST_USER['collection']['name']):
@pytest.fixture
def _deposit_collection(db, collection_name=collection_name):
return create_deposit_collection(collection_name)
return _deposit_collection
deposit_collection = deposit_collection_factory()
deposit_another_collection = deposit_collection_factory('another-collection')
@pytest.fixture
def deposit_user(db, deposit_collection):
"""Create/Return the test_user "test"
"""
from swh.deposit.models import DepositClient
try:
user = DepositClient._default_manager.get(
username=TEST_USER['username'])
except DepositClient.DoesNotExist:
user = DepositClient._default_manager.create_user(
username=TEST_USER['username'],
email=TEST_USER['email'],
password=TEST_USER['password'],
provider_url=TEST_USER['provider_url'],
domain=TEST_USER['domain'],
)
user.collections = [deposit_collection.id]
user.save()
return user
@pytest.fixture
def client():
"""Override pytest-django one which does not work for djangorestframework.
"""
return APIClient() # <- drf's client
@pytest.yield_fixture
def authenticated_client(client, deposit_user):
"""Returned a logged client
"""
_token = '%s:%s' % (deposit_user.username, TEST_USER['password'])
token = base64.b64encode(_token.encode('utf-8'))
authorization = 'Basic %s' % token.decode('utf-8')
client.credentials(HTTP_AUTHORIZATION=authorization)
yield client
client.logout()
@pytest.fixture
def sample_archive(tmp_path):
"""Returns a sample archive
"""
tmp_path = str(tmp_path) # pytest version limitation in previous version
archive = create_arborescence_archive(
tmp_path, 'archive1', 'file1', b'some content in file')
return archive
+@pytest.fixture
+def atom_dataset(datadir) -> Mapping[str, bytes]:
+ """Compute the paths to atom files.
+
+ Returns:
+ Dict of atom name per content (bytes)
+
+ """
+ atom_path = os.path.join(datadir, 'atom')
+ data = {}
+ for filename in os.listdir(atom_path):
+ filepath = os.path.join(atom_path, filename)
+ with open(filepath, 'rb') as f:
+ raw_content = f.read()
+
+ # Keep the filename without extension
+ atom_name = filename.split('.')[0]
+ data[atom_name] = raw_content
+
+ return data
+
+
def create_deposit(
authenticated_client, collection_name: str, sample_archive,
external_id: str, deposit_status=DEPOSIT_STATUS_DEPOSITED):
"""Create a skeleton shell deposit
"""
url = reverse(COL_IRI, args=[collection_name])
# when
response = authenticated_client.post(
url,
content_type='application/zip', # as zip
data=sample_archive['data'],
# + headers
CONTENT_LENGTH=sample_archive['length'],
HTTP_SLUG=external_id,
HTTP_CONTENT_MD5=sample_archive['md5sum'],
HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip',
HTTP_IN_PROGRESS='false',
HTTP_CONTENT_DISPOSITION='attachment; filename=%s' % (
sample_archive['name']))
# then
assert response.status_code == status.HTTP_201_CREATED
from swh.deposit.models import Deposit
deposit = Deposit._default_manager.get(external_id=external_id)
if deposit.status != deposit_status:
deposit.status = deposit_status
deposit.save()
assert deposit.status == deposit_status
return deposit
def create_binary_deposit(
authenticated_client, collection_name: str, sample_archive,
external_id: str, deposit_status: str = DEPOSIT_STATUS_DEPOSITED,
atom_dataset: Mapping[str, bytes] = {}):
"""Create a deposit with both metadata and archive set. Then alters its status
to `deposit_status`.
"""
deposit = create_deposit(
authenticated_client, collection_name, sample_archive,
external_id=external_id, deposit_status=DEPOSIT_STATUS_PARTIAL)
response = authenticated_client.post(
reverse(EDIT_SE_IRI, args=[collection_name, deposit.id]),
content_type='application/atom+xml;type=entry',
data=atom_dataset['entry-data0'] % deposit.external_id.encode('utf-8'),
HTTP_SLUG=deposit.external_id,
HTTP_IN_PROGRESS='true')
assert response.status_code == status.HTTP_201_CREATED
assert deposit.status == DEPOSIT_STATUS_PARTIAL
from swh.deposit.models import Deposit
deposit = Deposit._default_manager.get(pk=deposit.id)
if deposit.status != deposit_status:
deposit.status = deposit_status
deposit.save()
assert deposit.status == deposit_status
return deposit
def deposit_factory(deposit_status=DEPOSIT_STATUS_DEPOSITED):
"""Build deposit with a specific status
"""
@pytest.fixture()
def _deposit(sample_archive, deposit_collection, authenticated_client,
deposit_status=deposit_status):
external_id = 'external-id-%s' % deposit_status
return create_deposit(
authenticated_client, deposit_collection.name, sample_archive,
external_id=external_id, deposit_status=deposit_status
)
return _deposit
deposited_deposit = deposit_factory()
rejected_deposit = deposit_factory(deposit_status=DEPOSIT_STATUS_REJECTED)
partial_deposit = deposit_factory(deposit_status=DEPOSIT_STATUS_PARTIAL)
verified_deposit = deposit_factory(deposit_status=DEPOSIT_STATUS_VERIFIED)
completed_deposit = deposit_factory(deposit_status=DEPOSIT_STATUS_LOAD_SUCCESS)
failed_deposit = deposit_factory(deposit_status=DEPOSIT_STATUS_LOAD_FAILURE)
@pytest.fixture
def partial_deposit_with_metadata(
sample_archive, deposit_collection, authenticated_client,
atom_dataset):
"""Returns deposit with archive and metadata provided, status 'partial'
"""
return create_binary_deposit(
authenticated_client, deposit_collection.name, sample_archive,
external_id='external-id-partial',
deposit_status=DEPOSIT_STATUS_PARTIAL,
atom_dataset=atom_dataset
)
@pytest.fixture
def partial_deposit_only_metadata(
deposit_collection, authenticated_client,
atom_dataset):
response = authenticated_client.post(
reverse(COL_IRI, args=[deposit_collection.name]),
content_type='application/atom+xml;type=entry',
data=atom_dataset['entry-data1'],
HTTP_SLUG='external-id-partial',
HTTP_IN_PROGRESS=True)
assert response.status_code == status.HTTP_201_CREATED
response_content = parse_xml(response.content)
deposit_id = response_content['deposit_id']
from swh.deposit.models import Deposit
deposit = Deposit._default_manager.get(pk=deposit_id)
assert deposit.status == DEPOSIT_STATUS_PARTIAL
return deposit
@pytest.fixture
def complete_deposit(sample_archive, deposit_collection, authenticated_client):
"""Returns a completed deposit (load success)
"""
deposit = create_deposit(
authenticated_client, deposit_collection.name, sample_archive,
external_id='external-id-complete',
deposit_status=DEPOSIT_STATUS_LOAD_SUCCESS
)
_swh_id_context = 'https://hal.archives-ouvertes.fr/hal-01727745'
deposit.swh_id = 'swh:1:dir:42a13fc721c8716ff695d0d62fc851d641f3a12b'
deposit.swh_id_context = '%s;%s' % (
deposit.swh_id, _swh_id_context)
deposit.swh_anchor_id = \
'swh:rev:1:548b3c0a2bb43e1fca191e24b5803ff6b3bc7c10'
deposit.swh_anchor_id_context = '%s;%s' % (
deposit.swh_anchor_id, _swh_id_context)
deposit.save()
return deposit
@pytest.fixture()
def tmp_path(tmp_path):
return str(tmp_path) # issue with oldstable's pytest version
diff --git a/swh/deposit/tests/loader/conftest.py b/swh/deposit/tests/loader/conftest.py
index cd5081f0..addb2eb5 100644
--- a/swh/deposit/tests/loader/conftest.py
+++ b/swh/deposit/tests/loader/conftest.py
@@ -1,67 +1,61 @@
# Copyright (C) 2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import re
import os
import pytest
import yaml
from functools import partial
from swh.core.pytest_plugin import get_response_cb
from swh.scheduler.tests.conftest import * # noqa
from swh.storage.tests.conftest import * # noqa
from swh.deposit.loader.checker import DepositChecker
-from swh.deposit.loader.loader import DepositLoader
@pytest.fixture(scope='session') # type: ignore # expected redefinition
def celery_includes():
return [
'swh.deposit.loader.tasks',
]
@pytest.fixture
def swh_config(tmp_path, swh_storage_postgresql, monkeypatch):
storage_config = {
'url': 'https://deposit.softwareheritage.org/',
'storage': {
'cls': 'local',
'args': {
'db': swh_storage_postgresql.dsn,
'objstorage': {
'cls': 'memory',
'args': {}
},
},
},
}
conffile = os.path.join(tmp_path, 'deposit.yml')
with open(conffile, 'w') as f:
f.write(yaml.dump(storage_config))
monkeypatch.setenv('SWH_CONFIG_FILENAME', conffile)
return conffile
@pytest.fixture
def deposit_checker(swh_config):
return DepositChecker()
-@pytest.fixture
-def deposit_loader(swh_config):
- return DepositLoader()
-
-
@pytest.fixture
def requests_mock_datadir(datadir, requests_mock_datadir):
"""Override default behavior to deal with put method
"""
cb = partial(get_response_cb, datadir=datadir)
requests_mock_datadir.put(re.compile('https://'), body=cb)
return requests_mock_datadir
diff --git a/swh/deposit/tests/loader/test_loader.py b/swh/deposit/tests/loader/test_loader.py
deleted file mode 100644
index 3c6924df..00000000
--- a/swh/deposit/tests/loader/test_loader.py
+++ /dev/null
@@ -1,108 +0,0 @@
-# Copyright (C) 2017-2019 The Software Heritage developers
-# See the AUTHORS file at the top-level directory of this distribution
-# License: GNU General Public License version 3, or any later version
-# See top-level LICENSE file for more information
-
-from swh.deposit.config import (
- PRIVATE_GET_RAW_CONTENT, PRIVATE_GET_DEPOSIT_METADATA, PRIVATE_PUT_DEPOSIT
-)
-from django.urls import reverse
-from swh.model.hashutil import hash_to_bytes
-
-from .common import get_stats, check_snapshot
-
-
-def test_inject_deposit_ready(
- swh_config, requests_mock_datadir, datadir, deposit_loader):
- """Load a deposit which is ready
-
- """
- args = ['test', 999]
- archive_url = reverse(PRIVATE_GET_RAW_CONTENT, args=args)
- deposit_meta_url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=args)
- deposit_update_url = reverse(PRIVATE_PUT_DEPOSIT, args=args)
-
- # when
- res = deposit_loader.load(
- archive_url=archive_url,
- deposit_meta_url=deposit_meta_url,
- deposit_update_url=deposit_update_url)
-
- # then
- assert res['status'] == 'eventful'
- stats = get_stats(deposit_loader.storage)
-
- assert {
- 'content': 303,
- 'skipped_content': 0,
- 'directory': 12,
- 'origin': 1,
- 'origin_visit': 1,
- 'person': 1,
- 'release': 0,
- 'revision': 1,
- 'snapshot': 1,
- } == stats
-
- origin_url = 'https://hal-test.archives-ouvertes.fr/some-external-id'
- rev_id = 'b1bef04d90ef3ba645df4c4f945748c173a4e9a2'
- dir_id = 'bed9acbf2a4502499f659e65a2ab77096bd46a1d'
-
- expected_revision = {
- 'author': {
- 'name': b'Software Heritage',
- 'fullname': b'Software Heritage',
- 'email': b'robot@softwareheritage.org'},
- 'committer': {
- 'name': b'Software Heritage',
- 'fullname': b'Software Heritage',
- 'email': b'robot@softwareheritage.org'},
- 'committer_date': {
- 'negative_utc': 'false',
- 'offset': 0,
- 'timestamp': {'microseconds': 0, 'seconds': 1507389428}},
- 'date': {
- 'negative_utc': 'false',
- 'offset': 0,
- 'timestamp': {'microseconds': 0, 'seconds': 1507389428}},
- 'message': b'test: Deposit 999 in collection test',
- 'metadata': {
- '@xmlns': ['http://www.w3.org/2005/Atom'],
- 'author': ['some awesome author', 'another one', 'no one'],
- 'codemeta:dateCreated': '2017-10-07T15:17:08Z',
- 'external_identifier': 'some-external-id',
- 'url': origin_url,
- 'original_artifact': [
- {
- 'name': 'archive.zip',
- 'archive_type': 'tar',
- 'length': 725946,
- 'blake2s256': '04fffd328441d216c92492ad72d37388d8c77889880b069151298786fd48d889', # noqa
- 'sha256': '31e066137a962676e89f69d1b65382de95a7ef7d914b8cb956f41ea72e0f516b', # noqa
- 'sha1': 'f7bebf6f9c62a2295e889f66e05ce9bfaed9ace3',
- 'sha1_git': 'cae6b33cc33faafd2d6bd86c6b4273f9338c69c2'
- }
- ]
- },
- 'synthetic': True,
- 'type': 'tar',
- 'parents': [],
- 'directory': hash_to_bytes(dir_id),
- 'id': hash_to_bytes(rev_id),
- }
-
- rev = next(deposit_loader.storage.revision_get([hash_to_bytes(rev_id)]))
- assert rev is not None
- assert expected_revision == rev
-
- expected_snapshot = {
- 'id': '823109c16f9948c6f88cc5dec8e278da1487f06d',
- 'branches': {
- 'master': {
- 'target': rev_id,
- 'target_type': 'revision'
- }
- }
- }
-
- check_snapshot(expected_snapshot, deposit_loader.storage)
diff --git a/swh/deposit/tests/loader/test_tasks.py b/swh/deposit/tests/loader/test_tasks.py
index c522d2fc..9c9516ba 100644
--- a/swh/deposit/tests/loader/test_tasks.py
+++ b/swh/deposit/tests/loader/test_tasks.py
@@ -1,39 +1,21 @@
-# Copyright (C) 2018 The Software Heritage developers
+# Copyright (C) 2018-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from unittest.mock import patch
-@patch('swh.deposit.loader.loader.DepositLoader.load')
-def deposit_load(loader, swh_config, swh_app, celery_session_worker):
- loader.return_value = {'status': 'eventful'}
-
- res = swh_app.send_task(
- 'swh.deposit.loader.tasks.LoadDepositArchiveTsk',
- args=('archive_url', 'deposit_meta_url', 'deposit_update_url'))
- assert res
- res.wait()
- assert res.successful()
-
- assert res.result == {'status': 'eventful'}
- loader.assert_called_once_with(
- archive_url='archive_url',
- deposit_meta_url='deposit_meta_url',
- deposit_update_url='deposit_update_url')
-
-
@patch('swh.deposit.loader.checker.DepositChecker.check')
def deposit_check(checker, swh_config, swh_app, celery_session_worker):
checker.return_value = {'status': 'uneventful'}
res = swh_app.send_task(
'swh.deposit.loader.tasks.ChecksDepositTsk',
args=['check_deposit_url'])
assert res
res.wait()
assert res.successful()
assert res.result == {'status': 'uneventful'}
checker.assert_called_once_with('check_deposit_url')
diff --git a/swh/deposit/tests/test_gunicorn_config.py b/swh/deposit/tests/test_gunicorn_config.py
new file mode 100644
index 00000000..87b07119
--- /dev/null
+++ b/swh/deposit/tests/test_gunicorn_config.py
@@ -0,0 +1,47 @@
+# Copyright (C) 2019 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import os
+from unittest.mock import patch
+
+import swh.deposit.gunicorn_config as gunicorn_config
+
+
+def test_post_fork_default():
+ with patch('sentry_sdk.init') as sentry_sdk_init:
+ gunicorn_config.post_fork(None, None)
+
+ sentry_sdk_init.assert_not_called()
+
+
+def test_post_fork_with_dsn_env():
+ django_integration = object() # unique object to check for equality
+ with patch('swh.deposit.gunicorn_config.DjangoIntegration',
+ new=lambda: django_integration):
+ with patch('sentry_sdk.init') as sentry_sdk_init:
+ with patch.dict(os.environ, {'SWH_SENTRY_DSN': 'test_dsn'}):
+ gunicorn_config.post_fork(None, None)
+
+ sentry_sdk_init.assert_called_once_with(
+ dsn='test_dsn',
+ integrations=[django_integration],
+ debug=False,
+ )
+
+
+def test_post_fork_debug():
+ django_integration = object() # unique object to check for equality
+ with patch('swh.deposit.gunicorn_config.DjangoIntegration',
+ new=lambda: django_integration):
+ with patch('sentry_sdk.init') as sentry_sdk_init:
+ with patch.dict(os.environ, {'SWH_SENTRY_DSN': 'test_dsn',
+ 'SWH_SENTRY_DEBUG': '1'}):
+ gunicorn_config.post_fork(None, None)
+
+ sentry_sdk_init.assert_called_once_with(
+ dsn='test_dsn',
+ integrations=[django_integration],
+ debug=True,
+ )
diff --git a/swh/deposit/urls.py b/swh/deposit/urls.py
index 355029b5..f0d26d13 100644
--- a/swh/deposit/urls.py
+++ b/swh/deposit/urls.py
@@ -1,30 +1,30 @@
-# Copyright (C) 2017-2018 The Software Heritage developers
+# Copyright (C) 2017-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-"""swhdeposit URL Configuration
+"""SWH's main deposit URL Configuration
"""
from django.conf.urls import url, include
from django.shortcuts import render
from django.views.generic.base import RedirectView
from rest_framework.urlpatterns import format_suffix_patterns
favicon_view = RedirectView.as_view(url='/static/img/icons/swh-logo-32x32.png',
permanent=True)
def default_view(req):
return render(req, "homepage.html")
urlpatterns = [
url(r'^favicon\.ico$', favicon_view),
url(r'^1/', include('swh.deposit.api.urls')),
url(r'^1/private/', include('swh.deposit.api.private.urls')),
url(r'^$', default_view, name='home'),
]
urlpatterns = format_suffix_patterns(urlpatterns)
diff --git a/version.txt b/version.txt
index 4cada929..b0d69697 100644
--- a/version.txt
+++ b/version.txt
@@ -1 +1 @@
-v0.0.78-0-ga51bf9b
\ No newline at end of file
+v0.0.79-0-g76fc968
\ No newline at end of file