diff --git a/swh/deposit/api/private/deposit_read.py b/swh/deposit/api/private/deposit_read.py
--- a/swh/deposit/api/private/deposit_read.py
+++ b/swh/deposit/api/private/deposit_read.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2017-2018 The Software Heritage developers
+# Copyright (C) 2017-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -9,6 +9,7 @@
import tempfile
from contextlib import contextmanager
+from dateutil import parser
from django.http import FileResponse
from rest_framework import status
@@ -136,6 +137,45 @@
if client_domain in metadata[field]:
return metadata[field]
+ def _prepare_date(self, date):
+ """Prepare date fields as normalized swh date
+
+ """
+ if isinstance(date, list):
+ date = date[0]
+ if isinstance(date, str):
+ date = parser.parse(date)
+
+ return identifiers.normalize_timestamp(date)
+
+ def _compute_date(self, deposit, metadata):
+ """Compute the date to use as a tuple of author date, committer date.
+ Each of those date are swh normalized immediately.
+
+ Args:
+ deposit (Deposit): Deposit model representation
+ metadata (Dict): Metadata dict representation
+
+ Returns:
+ Tuple of author date, committer date. Those dates are
+ swh normalized.
+
+ """
+ commit_date = metadata.get('codemeta:datePublished')
+ author_date = metadata.get('codemeta:dateCreated')
+
+ if author_date and commit_date:
+ t = (author_date, commit_date)
+ elif commit_date:
+ t = (commit_date, commit_date)
+ elif author_date:
+ t = (author_date, author_date)
+ else:
+ date = deposit.complete_date
+ t = (date, date)
+ return (
+ self._prepare_date(t[0]), self._prepare_date(t[1]))
+
def metadata_read(self, deposit):
"""Read and aggregate multiple data on deposit into one unified data
dictionary.
@@ -169,12 +209,13 @@
revision_type = 'tar'
revision_msg = '%s: Deposit %s in collection %s' % (
fullname, deposit.id, deposit.collection.name)
- complete_date = identifiers.normalize_timestamp(deposit.complete_date)
+
+ author_date, commit_date = self._compute_date(deposit, metadata)
data['revision'] = {
'synthetic': True,
- 'date': complete_date,
- 'committer_date': complete_date,
+ 'date': author_date,
+ 'committer_date': commit_date,
'author': author_committer,
'committer': author_committer,
'type': revision_type,
diff --git a/swh/deposit/tests/api/test_deposit_read_metadata.py b/swh/deposit/tests/api/test_deposit_read_metadata.py
--- a/swh/deposit/tests/api/test_deposit_read_metadata.py
+++ b/swh/deposit/tests/api/test_deposit_read_metadata.py
@@ -50,6 +50,7 @@
'metadata': {
'@xmlns': ['http://www.w3.org/2005/Atom'],
'author': ['some awesome author', 'another one', 'no one'],
+ 'codemeta:dateCreated': '2017-10-07T15:17:08Z',
'external_identifier': 'some-external-id',
'url': 'https://hal-test.archives-ouvertes.fr/' +
'some-external-id'
@@ -70,15 +71,30 @@
},
'revision': {
'synthetic': True,
- 'committer_date': None,
+ 'committer_date': {
+ 'timestamp': {
+ 'seconds': 1507389428,
+ 'microseconds': 0
+ },
+ 'offset': 0,
+ 'negative_utc': False
+ },
'message': 'hal: Deposit %s in collection hal' % deposit_id,
'author': SWH_PERSON,
'committer': SWH_PERSON,
- 'date': None,
+ 'date': {
+ 'timestamp': {
+ 'seconds': 1507389428,
+ 'microseconds': 0
+ },
+ 'offset': 0,
+ 'negative_utc': False
+ },
'metadata': {
'@xmlns': ['http://www.w3.org/2005/Atom'],
'author': ['some awesome author', 'another one', 'no one'],
'external_identifier': 'some-external-id',
+ 'codemeta:dateCreated': '2017-10-07T15:17:08Z',
'url': 'https://hal-test.archives-ouvertes.fr/' +
'some-external-id'
},
@@ -135,6 +151,7 @@
'metadata': {
'@xmlns': ['http://www.w3.org/2005/Atom'],
'author': ['some awesome author', 'another one', 'no one'],
+ 'codemeta:dateCreated': '2017-10-07T15:17:08Z',
'external_identifier': 'some-external-id',
'url': 'https://hal-test.archives-ouvertes.fr/' +
'some-external-id'
@@ -155,8 +172,22 @@
},
'revision': {
'synthetic': True,
- 'date': None,
- 'committer_date': None,
+ 'date': {
+ 'timestamp': {
+ 'seconds': 1507389428,
+ 'microseconds': 0
+ },
+ 'offset': 0,
+ 'negative_utc': False
+ },
+ 'committer_date': {
+ 'timestamp': {
+ 'seconds': 1507389428,
+ 'microseconds': 0
+ },
+ 'offset': 0,
+ 'negative_utc': False
+ },
'author': SWH_PERSON,
'committer': SWH_PERSON,
'type': 'tar',
@@ -164,6 +195,7 @@
'metadata': {
'@xmlns': ['http://www.w3.org/2005/Atom'],
'author': ['some awesome author', 'another one', 'no one'],
+ 'codemeta:dateCreated': '2017-10-07T15:17:08Z',
'external_identifier': 'some-external-id',
'url': 'https://hal-test.archives-ouvertes.fr/' +
'some-external-id'
@@ -175,6 +207,482 @@
self.assertEqual(data, expected_meta)
+ def test_read_metadata_3(self):
+ """dateCreated/datePublished provided, revision uses author/committer date
+
+ """
+ # add metadata to the deposit with datePublished and dateCreated
+ codemeta_entry_data = b"""
+
+ Composing a Web of Audio Applications
+ hal
+ hal-01243065
+ hal-01243065
+ https://hal-test.archives-ouvertes.fr/hal-01243065
+ test
+ DSP programming,Web
+ 2015-04-06T17:08:47+02:00
+ this is the description
+ 1
+ phpstorm
+ stable
+ php
+ python
+ C
+ 2017-05-03T16:08:47+02:00
+
+ GNU General Public License v3.0 only
+
+
+ CeCILL Free Software License Agreement v1.1
+
+
+ HAL
+ hal@ccsd.cnrs.fr
+
+
+ Morane Gruenpeter
+
+""" # noqa
+
+ deposit_id = self.create_deposit_partial_with_data_in_args(
+ codemeta_entry_data)
+
+ url = reverse(PRIVATE_GET_DEPOSIT_METADATA,
+ args=[self.collection.name, deposit_id])
+
+ response = self.client.get(url)
+
+ self.assertEqual(response.status_code,
+ status.HTTP_200_OK)
+ self.assertEqual(response._headers['content-type'][1],
+ 'application/json')
+ data = response.json()
+
+ expected_origin = {
+ 'type': 'deposit',
+ 'url': 'https://hal-test.archives-ouvertes.fr/hal-01243065'
+ }
+ expected_metadata = {
+ '@xmlns': 'http://www.w3.org/2005/Atom',
+ '@xmlns:codemeta':
+ 'https://doi.org/10.5063/SCHEMA/CODEMETA-2.0',
+ 'author': {
+ 'email': 'hal@ccsd.cnrs.fr',
+ 'name': 'HAL'
+ },
+ 'client': 'hal',
+ 'codemeta:applicationCategory': 'test',
+ 'codemeta:author': {
+ 'codemeta:name': 'Morane Gruenpeter'
+ },
+ 'codemeta:dateCreated': '2015-04-06T17:08:47+02:00',
+ 'codemeta:datePublished': '2017-05-03T16:08:47+02:00',
+ 'codemeta:description': 'this is the description',
+ 'codemeta:developmentStatus': 'stable',
+ 'codemeta:keywords': 'DSP programming,Web',
+ 'codemeta:license': [
+ {
+ 'codemeta:name': 'GNU General Public License v3.0 only'
+ },
+ {
+ 'codemeta:name':
+ 'CeCILL Free Software License Agreement v1.1'
+ }
+ ],
+ 'codemeta:programmingLanguage': [
+ 'php', 'python', 'C'
+ ],
+ 'codemeta:runtimePlatform': 'phpstorm',
+ 'codemeta:url': 'https://hal-test.archives-ouvertes.fr/hal-01243065', # noqa
+ 'codemeta:version': '1',
+ 'external_identifier': 'hal-01243065',
+ 'id': 'hal-01243065',
+ 'title': 'Composing a Web of Audio Applications'
+ }
+
+ expected_origin_metadata = {
+ 'metadata': expected_metadata,
+ 'provider': {
+ 'metadata': {},
+ 'provider_name': 'hal',
+ 'provider_type': 'deposit_client',
+ 'provider_url': 'https://hal-test.archives-ouvertes.fr/'
+ },
+ 'tool': {
+ 'configuration': {
+ 'sword_version': '2'
+ },
+ 'name': 'swh-deposit',
+ 'version': '0.0.1'
+ }
+ }
+
+ expected_revision = {
+ 'author': {
+ 'email': 'robot@softwareheritage.org',
+ 'fullname': 'Software Heritage',
+ 'name': 'Software Heritage'
+ },
+ 'committer': {
+ 'email': 'robot@softwareheritage.org',
+ 'fullname': 'Software Heritage',
+ 'name': 'Software Heritage'
+ },
+ 'committer_date': {
+ 'negative_utc': False,
+ 'offset': 120,
+ 'timestamp': {
+ 'microseconds': 0,
+ 'seconds': 1493820527
+ }
+ },
+ 'date': {
+ 'negative_utc': False,
+ 'offset': 120,
+ 'timestamp': {
+ 'microseconds': 0,
+ 'seconds': 1428332927
+ }
+ },
+ 'message': 'hal: Deposit %s in collection hal' % deposit_id,
+ 'metadata': expected_metadata,
+ 'synthetic': True,
+ 'type': 'tar'
+ }
+
+ expected_meta = {
+ 'branch_name': 'master',
+ 'origin': expected_origin,
+ 'origin_metadata': expected_origin_metadata,
+ 'revision': expected_revision,
+ }
+
+ self.assertEqual(data, expected_meta)
+
+ def test_read_metadata_4(self):
+ """dateCreated/datePublished not provided, revision uses complete_date
+
+ """
+ # add metadata to the deposit with datePublished and dateCreated
+ codemeta_entry_data = b"""
+
+ Composing a Web of Audio Applications
+ hal
+ hal-01243065
+ hal-01243065
+ https://hal-test.archives-ouvertes.fr/hal-01243065
+ test
+ DSP programming
+ this is the description
+ 1
+ phpstorm
+ stable
+ php
+ python
+ C
+
+ GNU General Public License v3.0 only
+
+
+ CeCILL Free Software License Agreement v1.1
+
+
+ HAL
+ hal@ccsd.cnrs.fr
+
+
+ Morane Gruenpeter
+
+""" # noqa
+
+ deposit_id = self.create_deposit_partial_with_data_in_args(
+ codemeta_entry_data)
+
+ # will use the deposit completed date as fallback date
+ deposit = Deposit.objects.get(pk=deposit_id)
+ deposit.complete_date = '2016-04-06'
+ deposit.save()
+
+ url = reverse(PRIVATE_GET_DEPOSIT_METADATA,
+ args=[self.collection.name, deposit_id])
+
+ response = self.client.get(url)
+
+ self.assertEqual(response.status_code,
+ status.HTTP_200_OK)
+ self.assertEqual(response._headers['content-type'][1],
+ 'application/json')
+ data = response.json()
+
+ expected_origin = {
+ 'type': 'deposit',
+ 'url': 'https://hal-test.archives-ouvertes.fr/hal-01243065'
+ }
+ expected_metadata = {
+ '@xmlns': 'http://www.w3.org/2005/Atom',
+ '@xmlns:codemeta':
+ 'https://doi.org/10.5063/SCHEMA/CODEMETA-2.0',
+ 'author': {
+ 'email': 'hal@ccsd.cnrs.fr',
+ 'name': 'HAL'
+ },
+ 'client': 'hal',
+ 'codemeta:applicationCategory': 'test',
+ 'codemeta:author': {
+ 'codemeta:name': 'Morane Gruenpeter'
+ },
+ 'codemeta:description': 'this is the description',
+ 'codemeta:developmentStatus': 'stable',
+ 'codemeta:keywords': 'DSP programming',
+ 'codemeta:license': [
+ {
+ 'codemeta:name': 'GNU General Public License v3.0 only'
+ },
+ {
+ 'codemeta:name':
+ 'CeCILL Free Software License Agreement v1.1'
+ }
+ ],
+ 'codemeta:programmingLanguage': [
+ 'php', 'python', 'C'
+ ],
+ 'codemeta:runtimePlatform': 'phpstorm',
+ 'codemeta:url': 'https://hal-test.archives-ouvertes.fr/hal-01243065', # noqa
+ 'codemeta:version': '1',
+ 'external_identifier': 'hal-01243065',
+ 'id': 'hal-01243065',
+ 'title': 'Composing a Web of Audio Applications'
+ }
+
+ expected_origin_metadata = {
+ 'metadata': expected_metadata,
+ 'provider': {
+ 'metadata': {},
+ 'provider_name': 'hal',
+ 'provider_type': 'deposit_client',
+ 'provider_url': 'https://hal-test.archives-ouvertes.fr/'
+ },
+ 'tool': {
+ 'configuration': {
+ 'sword_version': '2'
+ },
+ 'name': 'swh-deposit',
+ 'version': '0.0.1'
+ }
+ }
+
+ expected_revision = {
+ 'author': {
+ 'email': 'robot@softwareheritage.org',
+ 'fullname': 'Software Heritage',
+ 'name': 'Software Heritage'
+ },
+ 'committer': {
+ 'email': 'robot@softwareheritage.org',
+ 'fullname': 'Software Heritage',
+ 'name': 'Software Heritage'
+ },
+ 'committer_date': {
+ 'negative_utc': False,
+ 'offset': 0,
+ 'timestamp': {
+ 'microseconds': 0,
+ 'seconds': 1459900800
+ }
+ },
+ 'date': {
+ 'negative_utc': False,
+ 'offset': 0,
+ 'timestamp': {
+ 'microseconds': 0,
+ 'seconds': 1459900800
+ }
+ },
+ 'message': 'hal: Deposit %s in collection hal' % deposit_id,
+ 'metadata': expected_metadata,
+ 'synthetic': True,
+ 'type': 'tar'
+ }
+
+ expected_meta = {
+ 'branch_name': 'master',
+ 'origin': expected_origin,
+ 'origin_metadata': expected_origin_metadata,
+ 'revision': expected_revision,
+ }
+
+ self.assertEqual(data, expected_meta)
+
+ def test_read_metadata_5(self):
+ """dateCreated/datePublished provided, revision uses author/committer
+ date
+
+ If multiple dateCreated provided, the first occurrence (of
+ dateCreated) is selected. If multiple datePublished provided,
+ the first occurrence (of datePublished) is selected.
+
+ """
+ # add metadata to the deposit with datePublished and dateCreated
+ codemeta_entry_data = b"""
+
+ Composing a Web of Audio Applications
+ hal
+ hal-01243065
+ hal-01243065
+ https://hal-test.archives-ouvertes.fr/hal-01243065
+ test
+ DSP programming,Web
+ 2015-04-06T17:08:47+02:00
+ 2016-04-06T17:08:47+02:00
+ this is the description
+ 1
+ phpstorm
+ stable
+ php
+ python
+ C
+ 2017-05-03T16:08:47+02:00
+ 2018-05-03T16:08:47+02:00
+
+ GNU General Public License v3.0 only
+
+
+ CeCILL Free Software License Agreement v1.1
+
+
+ HAL
+ hal@ccsd.cnrs.fr
+
+
+ Morane Gruenpeter
+
+""" # noqa
+
+ deposit_id = self.create_deposit_partial_with_data_in_args(
+ codemeta_entry_data)
+
+ url = reverse(PRIVATE_GET_DEPOSIT_METADATA,
+ args=[self.collection.name, deposit_id])
+
+ response = self.client.get(url)
+
+ self.assertEqual(response.status_code,
+ status.HTTP_200_OK)
+ self.assertEqual(response._headers['content-type'][1],
+ 'application/json')
+ data = response.json()
+
+ expected_origin = {
+ 'type': 'deposit',
+ 'url': 'https://hal-test.archives-ouvertes.fr/hal-01243065'
+ }
+ expected_metadata = {
+ '@xmlns': 'http://www.w3.org/2005/Atom',
+ '@xmlns:codemeta':
+ 'https://doi.org/10.5063/SCHEMA/CODEMETA-2.0',
+ 'author': {
+ 'email': 'hal@ccsd.cnrs.fr',
+ 'name': 'HAL'
+ },
+ 'client': 'hal',
+ 'codemeta:applicationCategory': 'test',
+ 'codemeta:author': {
+ 'codemeta:name': 'Morane Gruenpeter'
+ },
+ 'codemeta:dateCreated': [
+ '2015-04-06T17:08:47+02:00',
+ '2016-04-06T17:08:47+02:00',
+ ],
+ 'codemeta:datePublished': [
+ '2017-05-03T16:08:47+02:00',
+ '2018-05-03T16:08:47+02:00',
+ ],
+ 'codemeta:description': 'this is the description',
+ 'codemeta:developmentStatus': 'stable',
+ 'codemeta:keywords': 'DSP programming,Web',
+ 'codemeta:license': [
+ {
+ 'codemeta:name': 'GNU General Public License v3.0 only'
+ },
+ {
+ 'codemeta:name':
+ 'CeCILL Free Software License Agreement v1.1'
+ }
+ ],
+ 'codemeta:programmingLanguage': [
+ 'php', 'python', 'C'
+ ],
+ 'codemeta:runtimePlatform': 'phpstorm',
+ 'codemeta:url': 'https://hal-test.archives-ouvertes.fr/hal-01243065', # noqa
+ 'codemeta:version': '1',
+ 'external_identifier': 'hal-01243065',
+ 'id': 'hal-01243065',
+ 'title': 'Composing a Web of Audio Applications'
+ }
+
+ expected_origin_metadata = {
+ 'metadata': expected_metadata,
+ 'provider': {
+ 'metadata': {},
+ 'provider_name': 'hal',
+ 'provider_type': 'deposit_client',
+ 'provider_url': 'https://hal-test.archives-ouvertes.fr/'
+ },
+ 'tool': {
+ 'configuration': {
+ 'sword_version': '2'
+ },
+ 'name': 'swh-deposit',
+ 'version': '0.0.1'
+ }
+ }
+
+ expected_revision = {
+ 'author': {
+ 'email': 'robot@softwareheritage.org',
+ 'fullname': 'Software Heritage',
+ 'name': 'Software Heritage'
+ },
+ 'committer': {
+ 'email': 'robot@softwareheritage.org',
+ 'fullname': 'Software Heritage',
+ 'name': 'Software Heritage'
+ },
+ 'committer_date': {
+ 'negative_utc': False,
+ 'offset': 120,
+ 'timestamp': {
+ 'microseconds': 0,
+ 'seconds': 1493820527
+ }
+ },
+ 'date': {
+ 'negative_utc': False,
+ 'offset': 120,
+ 'timestamp': {
+ 'microseconds': 0,
+ 'seconds': 1428332927
+ }
+ },
+ 'message': 'hal: Deposit %s in collection hal' % deposit_id,
+ 'metadata': expected_metadata,
+ 'synthetic': True,
+ 'type': 'tar'
+ }
+
+ expected_meta = {
+ 'branch_name': 'master',
+ 'origin': expected_origin,
+ 'origin_metadata': expected_origin_metadata,
+ 'revision': expected_revision,
+ }
+
+ self.assertEqual(data, expected_meta)
+
def test_access_to_nonexisting_deposit_returns_404_response(self):
"""Read unknown collection should return a 404 response
diff --git a/swh/deposit/tests/common.py b/swh/deposit/tests/common.py
--- a/swh/deposit/tests/common.py
+++ b/swh/deposit/tests/common.py
@@ -346,6 +346,7 @@
another one
no one
+ 2017-10-07T15:17:08Z
"""
self.atom_entry_data2 = b"""
diff --git a/swh/deposit/tests/loader/test_loader.py b/swh/deposit/tests/loader/test_loader.py
--- a/swh/deposit/tests/loader/test_loader.py
+++ b/swh/deposit/tests/loader/test_loader.py
@@ -59,12 +59,10 @@
# create the extraction dir used by the loader
os.makedirs(TEST_LOADER_CONFIG['extraction_dir'], exist_ok=True)
- # 1. create a deposit with archive and metadata
- self.deposit_id = self.create_simple_binary_deposit()
- # 2. Sets a basic client which accesses the test data
+ # Sets a basic client which accesses the test data
loader_client = SWHDepositTestClient(self.client,
config=CLIENT_TEST_CONFIG)
- # 3. setup loader with that client
+ # Setup loader with that client
self.loader = loader.DepositLoader(client=loader_client)
self.storage = self.loader.storage
@@ -77,7 +75,11 @@
"""Load a deposit which is ready
"""
- args = [self.collection.name, self.deposit_id]
+ # create a deposit with archive and metadata
+ deposit_id = self.create_simple_binary_deposit()
+ self.update_binary_deposit(deposit_id, status_partial=False)
+
+ args = [self.collection.name, deposit_id]
archive_url = reverse(PRIVATE_GET_RAW_CONTENT, args=args)
deposit_meta_url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=args)
@@ -100,9 +102,9 @@
"""Load a deposit with metadata, test metadata integrity
"""
- self.deposit_metadata_id = self.add_metadata_to_deposit(
- self.deposit_id)
- args = [self.collection.name, self.deposit_metadata_id]
+ deposit_id = self.create_simple_binary_deposit()
+ self.add_metadata_to_deposit(deposit_id, status_partial=False)
+ args = [self.collection.name, deposit_id]
archive_url = reverse(PRIVATE_GET_RAW_CONTENT, args=args)
deposit_meta_url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=args)
@@ -157,7 +159,7 @@
self.assertOriginMetadataContains('deposit', origin_url,
expected_origin_metadata)
- deposit = Deposit.objects.get(pk=self.deposit_id)
+ deposit = Deposit.objects.get(pk=deposit_id)
self.assertRegex(deposit.swh_id, r'^swh:1:dir:.*')
self.assertEqual(deposit.swh_id_context, '%s;origin=%s' % (