diff --git a/requirements-server.txt b/requirements-server.txt
index d2631e2c..5a906819 100644
--- a/requirements-server.txt
+++ b/requirements-server.txt
@@ -1,2 +1,3 @@
Django < 3
djangorestframework
+setuptools
diff --git a/swh/deposit/api/__init__.py b/swh/deposit/api/__init__.py
index e69de29b..65c78f3d 100644
--- a/swh/deposit/api/__init__.py
+++ b/swh/deposit/api/__init__.py
@@ -0,0 +1,11 @@
+# Copyright (C) 2020 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import pkg_resources
+
+try:
+ __version__ = pkg_resources.get_distribution("swh.deposit").version
+except pkg_resources.DistributionNotFound:
+ __version__ = "devel"
diff --git a/swh/deposit/api/private/deposit_read.py b/swh/deposit/api/private/deposit_read.py
index f67f1306..4a5f388a 100644
--- a/swh/deposit/api/private/deposit_read.py
+++ b/swh/deposit/api/private/deposit_read.py
@@ -1,192 +1,197 @@
# Copyright (C) 2017-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from contextlib import contextmanager
import json
import os
import shutil
import tempfile
from django.http import FileResponse
from rest_framework import status
from swh.core import tarball
+from swh.deposit.api import __version__
from swh.deposit.utils import normalize_date
from swh.model import identifiers
from . import APIPrivateView, DepositReadMixin
from ...config import ARCHIVE_TYPE, SWH_PERSON
from ...models import Deposit
from ..common import APIGet
@contextmanager
def aggregate_tarballs(extraction_dir, archive_paths):
"""Aggregate multiple tarballs into one and returns this new archive's
path.
Args:
extraction_dir (path): Path to use for the tarballs computation
archive_paths ([str]): Deposit's archive paths
Returns:
Tuple (directory to clean up, archive path (aggregated or not))
"""
# rebuild one zip archive from (possibly) multiple ones
os.makedirs(extraction_dir, 0o755, exist_ok=True)
dir_path = tempfile.mkdtemp(prefix="swh.deposit-", dir=extraction_dir)
# root folder to build an aggregated tarball
aggregated_tarball_rootdir = os.path.join(dir_path, "aggregate")
os.makedirs(aggregated_tarball_rootdir, 0o755, exist_ok=True)
# uncompress in a temporary location all archives
for archive_path in archive_paths:
tarball.uncompress(archive_path, aggregated_tarball_rootdir)
# Aggregate into one big tarball the multiple smaller ones
temp_tarpath = shutil.make_archive(
aggregated_tarball_rootdir, "zip", aggregated_tarball_rootdir
)
# can already clean up temporary directory
shutil.rmtree(aggregated_tarball_rootdir)
try:
yield temp_tarpath
finally:
shutil.rmtree(dir_path)
class APIReadArchives(APIPrivateView, APIGet, DepositReadMixin):
"""Dedicated class to read a deposit's raw archives content.
Only GET is supported.
"""
def __init__(self):
super().__init__()
self.extraction_dir = self.config["extraction_dir"]
if not os.path.exists(self.extraction_dir):
os.makedirs(self.extraction_dir)
def process_get(self, request, collection_name, deposit_id):
"""Build a unique tarball from the multiple received and stream that
content to the client.
Args:
request (Request):
collection_name (str): Collection owning the deposit
deposit_id (id): Deposit concerned by the reading
Returns:
Tuple status, stream of content, content-type
"""
archive_paths = [
r.archive.path
for r in self._deposit_requests(deposit_id, request_type=ARCHIVE_TYPE)
]
with aggregate_tarballs(self.extraction_dir, archive_paths) as path:
return FileResponse(
open(path, "rb"),
status=status.HTTP_200_OK,
content_type="application/zip",
)
class APIReadMetadata(APIPrivateView, APIGet, DepositReadMixin):
"""Class in charge of aggregating metadata on a deposit.
"""
def __init__(self):
super().__init__()
self.provider = self.config["provider"]
- self.tool = self.config["tool"]
+ self.tool = {
+ "name": "swh-deposit",
+ "version": __version__,
+ "configuration": {"sword_version": "2"},
+ }
def _normalize_dates(self, deposit, metadata):
"""Normalize the date to use as a tuple of author date, committer date
from the incoming metadata.
Args:
deposit (Deposit): Deposit model representation
metadata (Dict): Metadata dict representation
Returns:
Tuple of author date, committer date. Those dates are
swh normalized.
"""
commit_date = metadata.get("codemeta:datePublished")
author_date = metadata.get("codemeta:dateCreated")
if author_date and commit_date:
pass
elif commit_date:
author_date = commit_date
elif author_date:
commit_date = author_date
else:
author_date = deposit.complete_date
commit_date = deposit.complete_date
return (normalize_date(author_date), normalize_date(commit_date))
def metadata_read(self, deposit):
"""Read and aggregate multiple data on deposit into one unified data
dictionary.
Args:
deposit (Deposit): Deposit concerned by the data aggregation.
Returns:
Dictionary of data representing the deposit to inject in swh.
"""
metadata = self._metadata_get(deposit)
# Read information metadata
data = {"origin": {"type": "deposit", "url": deposit.origin_url,}}
# metadata provider
self.provider["provider_name"] = deposit.client.last_name
self.provider["provider_url"] = deposit.client.provider_url
author_date, commit_date = self._normalize_dates(deposit, metadata)
if deposit.parent:
swh_persistent_id = deposit.parent.swh_id
swhid = identifiers.parse_swhid(swh_persistent_id)
parent_revision = swhid.object_id
parents = [parent_revision]
else:
parents = []
data["origin_metadata"] = {
"provider": self.provider,
"tool": self.tool,
"metadata": metadata,
}
data["deposit"] = {
"id": deposit.id,
"client": deposit.client.username,
"collection": deposit.collection.name,
"author": SWH_PERSON,
"author_date": author_date,
"committer": SWH_PERSON,
"committer_date": commit_date,
"revision_parents": parents,
}
return data
def process_get(self, request, collection_name, deposit_id):
deposit = Deposit.objects.get(pk=deposit_id)
data = self.metadata_read(deposit)
d = {}
if data:
d = json.dumps(data)
return status.HTTP_200_OK, d, "application/json"
diff --git a/swh/deposit/tests/api/test_deposit_private_read_metadata.py b/swh/deposit/tests/api/test_deposit_private_read_metadata.py
index b04fb3dd..a66336d4 100644
--- a/swh/deposit/tests/api/test_deposit_private_read_metadata.py
+++ b/swh/deposit/tests/api/test_deposit_private_read_metadata.py
@@ -1,550 +1,551 @@
-# Copyright (C) 2017-2019 The Software Heritage developers
+# Copyright (C) 2017-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from django.urls import reverse
from rest_framework import status
+from swh.deposit.api import __version__
from swh.deposit.config import EDIT_SE_IRI, PRIVATE_GET_DEPOSIT_METADATA, SWH_PERSON
from swh.deposit.models import Deposit
PRIVATE_GET_DEPOSIT_METADATA_NC = PRIVATE_GET_DEPOSIT_METADATA + "-nc"
def private_get_raw_url_endpoints(collection, deposit):
"""There are 2 endpoints to check (one with collection, one without)"""
deposit_id = deposit if isinstance(deposit, int) else deposit.id
return [
reverse(PRIVATE_GET_DEPOSIT_METADATA, args=[collection.name, deposit_id]),
reverse(PRIVATE_GET_DEPOSIT_METADATA_NC, args=[deposit_id]),
]
def update_deposit(authenticated_client, collection, deposit, atom_dataset):
for atom_data in ["entry-data2", "entry-data3"]:
update_deposit_with_metadata(
authenticated_client, collection, deposit, atom_dataset[atom_data]
)
return deposit
def update_deposit_with_metadata(authenticated_client, collection, deposit, metadata):
# update deposit's metadata
response = authenticated_client.post(
reverse(EDIT_SE_IRI, args=[collection.name, deposit.id]),
content_type="application/atom+xml;type=entry",
data=metadata,
HTTP_SLUG=deposit.external_id,
HTTP_IN_PROGRESS=True,
)
assert response.status_code == status.HTTP_201_CREATED
return deposit
def test_read_metadata(
authenticated_client, deposit_collection, partial_deposit, atom_dataset
):
"""Private metadata read api to existing deposit should return metadata
"""
deposit = partial_deposit
deposit.external_id = "some-external-id"
deposit.save()
deposit = update_deposit(
authenticated_client, deposit_collection, deposit, atom_dataset
)
for url in private_get_raw_url_endpoints(deposit_collection, deposit):
response = authenticated_client.get(url)
assert response.status_code == status.HTTP_200_OK
assert response._headers["content-type"][1] == "application/json"
data = response.json()
expected_meta = {
"origin": {
"type": "deposit",
"url": "https://hal-test.archives-ouvertes.fr/some-external-id",
},
"origin_metadata": {
"metadata": {
"@xmlns": ["http://www.w3.org/2005/Atom"],
"author": ["some awesome author", "another one", "no one"],
"codemeta:dateCreated": "2017-10-07T15:17:08Z",
"external_identifier": "some-external-id",
"url": "https://hal-test.archives-ouvertes.fr/some-external-id", # noqa
},
"provider": {
"metadata": {},
"provider_name": "",
"provider_type": "deposit_client",
"provider_url": "https://hal-test.archives-ouvertes.fr/",
},
"tool": {
"configuration": {"sword_version": "2"},
"name": "swh-deposit",
- "version": "0.0.1",
+ "version": __version__,
},
},
"deposit": {
"author": SWH_PERSON,
"committer": SWH_PERSON,
"committer_date": {
"negative_utc": False,
"offset": 0,
"timestamp": {"microseconds": 0, "seconds": 1507389428},
},
"author_date": {
"negative_utc": False,
"offset": 0,
"timestamp": {"microseconds": 0, "seconds": 1507389428},
},
"client": "test",
"id": deposit.id,
"collection": "test",
"revision_parents": [],
},
}
assert data == expected_meta
def test_read_metadata_revision_with_parent(
authenticated_client, deposit_collection, partial_deposit, atom_dataset
):
"""Private read metadata to a deposit (with parent) returns metadata
"""
deposit = partial_deposit
deposit.external_id = "some-external-id"
deposit.save()
deposit = update_deposit(
authenticated_client, deposit_collection, deposit, atom_dataset
)
rev_id = "da78a9d4cf1d5d29873693fd496142e3a18c20fa"
swh_id = "swh:1:rev:%s" % rev_id
fake_parent = Deposit(
swh_id=swh_id, client=deposit.client, collection=deposit.collection
)
fake_parent.save()
deposit.parent = fake_parent
deposit.save()
for url in private_get_raw_url_endpoints(deposit_collection, deposit):
response = authenticated_client.get(url)
assert response.status_code == status.HTTP_200_OK
assert response._headers["content-type"][1] == "application/json"
data = response.json()
expected_meta = {
"origin": {
"type": "deposit",
"url": "https://hal-test.archives-ouvertes.fr/some-external-id",
},
"origin_metadata": {
"metadata": {
"@xmlns": ["http://www.w3.org/2005/Atom"],
"author": ["some awesome author", "another one", "no one"],
"codemeta:dateCreated": "2017-10-07T15:17:08Z",
"external_identifier": "some-external-id",
"url": "https://hal-test.archives-ouvertes.fr/some-external-id", # noqa
},
"provider": {
"metadata": {},
"provider_name": "",
"provider_type": "deposit_client",
"provider_url": "https://hal-test.archives-ouvertes.fr/",
},
"tool": {
"configuration": {"sword_version": "2"},
"name": "swh-deposit",
- "version": "0.0.1",
+ "version": __version__,
},
},
"deposit": {
"author": SWH_PERSON,
"committer": SWH_PERSON,
"committer_date": {
"negative_utc": False,
"offset": 0,
"timestamp": {"microseconds": 0, "seconds": 1507389428},
},
"author_date": {
"negative_utc": False,
"offset": 0,
"timestamp": {"microseconds": 0, "seconds": 1507389428},
},
"client": "test",
"id": deposit.id,
"collection": "test",
"revision_parents": [rev_id],
},
}
assert data == expected_meta
def test_read_metadata_3(
authenticated_client, deposit_collection, partial_deposit, atom_dataset
):
"""date(Created|Published) provided, uses author/committer date
"""
deposit = partial_deposit
deposit.external_id = "hal-01243065"
deposit.save()
deposit = update_deposit(
authenticated_client, deposit_collection, deposit, atom_dataset
)
# add metadata to the deposit with datePublished and dateCreated
codemeta_entry_data = (
atom_dataset["metadata"]
% """
2015-04-06T17:08:47+02:00
2017-05-03T16:08:47+02:00
"""
)
update_deposit_with_metadata(
authenticated_client, deposit_collection, deposit, codemeta_entry_data
)
for url in private_get_raw_url_endpoints(deposit_collection, deposit):
response = authenticated_client.get(url)
assert response.status_code == status.HTTP_200_OK
assert response._headers["content-type"][1] == "application/json"
data = response.json()
metadata = {
"@xmlns": ["http://www.w3.org/2005/Atom"],
"@xmlns:codemeta": "https://doi.org/10.5063/SCHEMA/CODEMETA-2.0",
"author": [
"some awesome author",
"another one",
"no one",
{"email": "hal@ccsd.cnrs.fr", "name": "HAL"},
],
"client": "hal",
"codemeta:applicationCategory": "test",
"codemeta:author": {"codemeta:name": "Morane Gruenpeter"},
"codemeta:dateCreated": [
"2017-10-07T15:17:08Z",
"2015-04-06T17:08:47+02:00",
],
"codemeta:datePublished": "2017-05-03T16:08:47+02:00",
"codemeta:description": "this is the description",
"codemeta:developmentStatus": "stable",
"codemeta:keywords": "DSP programming",
"codemeta:license": [
{"codemeta:name": "GNU General Public License v3.0 only"},
{
"codemeta:name": "CeCILL "
"Free "
"Software "
"License "
"Agreement "
"v1.1"
},
],
"codemeta:programmingLanguage": ["php", "python", "C"],
"codemeta:runtimePlatform": "phpstorm",
"codemeta:url": "https://hal-test.archives-ouvertes.fr/hal-01243065", # noqa
"codemeta:version": "1",
"external_identifier": ["some-external-id", "hal-01243065"],
"id": "hal-01243065",
"title": "Composing a Web of Audio " "Applications",
"url": "https://hal-test.archives-ouvertes.fr/some-external-id",
}
expected_meta = {
"origin": {
"type": "deposit",
"url": "https://hal-test.archives-ouvertes.fr/hal-01243065",
},
"origin_metadata": {
"metadata": metadata,
"provider": {
"metadata": {},
"provider_name": "",
"provider_type": "deposit_client",
"provider_url": "https://hal-test.archives-ouvertes.fr/",
},
"tool": {
"configuration": {"sword_version": "2"},
"name": "swh-deposit",
- "version": "0.0.1",
+ "version": __version__,
},
},
"deposit": {
"author": SWH_PERSON,
"committer": SWH_PERSON,
"committer_date": {
"negative_utc": False,
"offset": 120,
"timestamp": {"microseconds": 0, "seconds": 1493820527},
},
"author_date": {
"negative_utc": False,
"offset": 0,
"timestamp": {"microseconds": 0, "seconds": 1507389428},
},
"client": deposit_collection.name,
"id": deposit.id,
"collection": deposit_collection.name,
"revision_parents": [],
},
}
assert data == expected_meta
def test_read_metadata_4(
authenticated_client, deposit_collection, atom_dataset, partial_deposit
):
"""dateCreated/datePublished not provided, revision uses complete_date
"""
deposit = partial_deposit
codemeta_entry_data = atom_dataset["metadata"] % ""
deposit = update_deposit_with_metadata(
authenticated_client, deposit_collection, deposit, codemeta_entry_data
)
# will use the deposit completed date as fallback date
deposit.complete_date = "2016-04-06"
deposit.save()
for url in private_get_raw_url_endpoints(deposit_collection, deposit):
response = authenticated_client.get(url)
assert response.status_code == status.HTTP_200_OK
assert response._headers["content-type"][1] == "application/json"
data = response.json()
metadata = {
"@xmlns": "http://www.w3.org/2005/Atom",
"@xmlns:codemeta": "https://doi.org/10.5063/SCHEMA/CODEMETA-2.0",
"author": {"email": "hal@ccsd.cnrs.fr", "name": "HAL"},
"client": "hal",
"codemeta:applicationCategory": "test",
"codemeta:author": {"codemeta:name": "Morane " "Gruenpeter"},
"codemeta:description": "this is the " "description",
"codemeta:developmentStatus": "stable",
"codemeta:keywords": "DSP programming",
"codemeta:license": [
{
"codemeta:name": "GNU "
"General "
"Public "
"License "
"v3.0 "
"only"
},
{
"codemeta:name": "CeCILL "
"Free "
"Software "
"License "
"Agreement "
"v1.1"
},
],
"codemeta:programmingLanguage": ["php", "python", "C"],
"codemeta:runtimePlatform": "phpstorm",
"codemeta:url": "https://hal-test.archives-ouvertes.fr/hal-01243065",
"codemeta:version": "1",
"external_identifier": "hal-01243065",
"id": "hal-01243065",
"title": "Composing a Web of Audio " "Applications",
}
expected_origin = {
"type": "deposit",
"url": "https://hal-test.archives-ouvertes.fr/%s" % (deposit.external_id),
}
expected_origin_metadata = {
"metadata": metadata,
"provider": {
"metadata": {},
"provider_name": "",
"provider_type": "deposit_client",
"provider_url": "https://hal-test.archives-ouvertes.fr/",
},
"tool": {
"configuration": {"sword_version": "2"},
"name": "swh-deposit",
- "version": "0.0.1",
+ "version": __version__,
},
}
expected_deposit_info = {
"author": SWH_PERSON,
"committer": SWH_PERSON,
"committer_date": {
"negative_utc": False,
"offset": 0,
"timestamp": {"microseconds": 0, "seconds": 1459900800},
},
"author_date": {
"negative_utc": False,
"offset": 0,
"timestamp": {"microseconds": 0, "seconds": 1459900800},
},
"client": deposit_collection.name,
"id": deposit.id,
"collection": deposit_collection.name,
"revision_parents": [],
}
expected_meta = {
"origin": expected_origin,
"origin_metadata": expected_origin_metadata,
"deposit": expected_deposit_info,
}
assert data == expected_meta
def test_read_metadata_5(
authenticated_client, deposit_collection, partial_deposit, atom_dataset
):
"""dateCreated/datePublished provided, revision uses author/committer
date
If multiple dateCreated provided, the first occurrence (of
dateCreated) is selected. If multiple datePublished provided,
the first occurrence (of datePublished) is selected.
"""
deposit = partial_deposit
# add metadata to the deposit with multiple datePublished/dateCreated
codemeta_entry_data = (
atom_dataset["metadata"]
% """
2015-04-06T17:08:47+02:00
2017-05-03T16:08:47+02:00
2016-04-06T17:08:47+02:00
2018-05-03T16:08:47+02:00
"""
)
deposit = update_deposit_with_metadata(
authenticated_client, deposit_collection, deposit, codemeta_entry_data
)
for url in private_get_raw_url_endpoints(deposit_collection, deposit):
response = authenticated_client.get(url)
assert response.status_code == status.HTTP_200_OK
assert response._headers["content-type"][1] == "application/json"
data = response.json()
expected_origin = {
"type": "deposit",
"url": "https://hal-test.archives-ouvertes.fr/external-id-partial",
}
metadata = {
"@xmlns": "http://www.w3.org/2005/Atom",
"@xmlns:codemeta": "https://doi.org/10.5063/SCHEMA/CODEMETA-2.0",
"author": {"email": "hal@ccsd.cnrs.fr", "name": "HAL"},
"client": "hal",
"codemeta:applicationCategory": "test",
"codemeta:author": {"codemeta:name": "Morane " "Gruenpeter"},
"codemeta:dateCreated": [
"2015-04-06T17:08:47+02:00",
"2016-04-06T17:08:47+02:00",
],
"codemeta:datePublished": [
"2017-05-03T16:08:47+02:00",
"2018-05-03T16:08:47+02:00",
],
"codemeta:description": "this is the description",
"codemeta:developmentStatus": "stable",
"codemeta:keywords": "DSP programming",
"codemeta:license": [
{
"codemeta:name": "GNU "
"General "
"Public "
"License "
"v3.0 "
"only"
},
{
"codemeta:name": "CeCILL "
"Free "
"Software "
"License "
"Agreement "
"v1.1"
},
],
"codemeta:programmingLanguage": ["php", "python", "C"],
"codemeta:runtimePlatform": "phpstorm",
"codemeta:url": "https://hal-test.archives-ouvertes.fr/hal-01243065", # noqa
"codemeta:version": "1",
"external_identifier": "hal-01243065",
"id": "hal-01243065",
"title": "Composing a Web of Audio " "Applications",
}
expected_origin_metadata = {
"metadata": metadata,
"provider": {
"metadata": {},
"provider_name": "",
"provider_type": "deposit_client",
"provider_url": "https://hal-test.archives-ouvertes.fr/",
},
"tool": {
"configuration": {"sword_version": "2"},
"name": "swh-deposit",
- "version": "0.0.1",
+ "version": __version__,
},
}
expected_deposit_info = {
"author": SWH_PERSON,
"committer": SWH_PERSON,
"committer_date": {
"negative_utc": False,
"offset": 120,
"timestamp": {"microseconds": 0, "seconds": 1493820527},
},
"author_date": {
"negative_utc": False,
"offset": 120,
"timestamp": {"microseconds": 0, "seconds": 1428332927},
},
"client": deposit_collection.name,
"id": deposit.id,
"collection": deposit_collection.name,
"revision_parents": [],
}
expected_meta = {
"origin": expected_origin,
"origin_metadata": expected_origin_metadata,
"deposit": expected_deposit_info,
}
assert data == expected_meta
def test_access_to_nonexisting_deposit_returns_404_response(
authenticated_client, deposit_collection,
):
"""Read unknown collection should return a 404 response
"""
unknown_id = 999
try:
Deposit.objects.get(pk=unknown_id)
except Deposit.DoesNotExist:
assert True
for url in private_get_raw_url_endpoints(deposit_collection, unknown_id):
response = authenticated_client.get(url)
assert response.status_code == status.HTTP_404_NOT_FOUND
msg = "Deposit with id %s does not exist" % unknown_id
assert msg in response.content.decode("utf-8")
diff --git a/swh/deposit/tests/test_init.py b/swh/deposit/tests/test_init.py
new file mode 100644
index 00000000..88fca573
--- /dev/null
+++ b/swh/deposit/tests/test_init.py
@@ -0,0 +1,10 @@
+# Copyright (C) 2020 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+def test_version():
+ from swh.deposit.api import __version__
+
+ assert __version__ is not None