diff --git a/requirements.txt b/requirements.txt
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
vcversioner
click
xmltodict
+iso8601
diff --git a/swh/deposit/api/private/deposit_read.py b/swh/deposit/api/private/deposit_read.py
--- a/swh/deposit/api/private/deposit_read.py
+++ b/swh/deposit/api/private/deposit_read.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2017-2018 The Software Heritage developers
+# Copyright (C) 2017-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -14,6 +14,7 @@
from swh.core import tarball
from swh.model import identifiers
+from swh.deposit.utils import normalize_date
from . import DepositReadMixin
from ...config import SWH_PERSON, ARCHIVE_TYPE
@@ -136,6 +137,36 @@
if client_domain in metadata[field]:
return metadata[field]
+ def _normalize_dates(self, deposit, metadata):
+ """Normalize the date to use as a tuple of author date, committer date
+ from the incoming metadata.
+
+ Args:
+ deposit (Deposit): Deposit model representation
+ metadata (Dict): Metadata dict representation
+
+ Returns:
+ Tuple of author date, committer date. Those dates are
+ swh normalized.
+
+ """
+ commit_date = metadata.get('codemeta:datePublished')
+ author_date = metadata.get('codemeta:dateCreated')
+
+ if author_date and commit_date:
+ pass
+ elif commit_date:
+ author_date = commit_date
+ elif author_date:
+ commit_date = author_date
+ else:
+ author_date = deposit.complete_date
+ commit_date = deposit.complete_date
+ return (
+ normalize_date(author_date),
+ normalize_date(commit_date)
+ )
+
def metadata_read(self, deposit):
"""Read and aggregate multiple data on deposit into one unified data
dictionary.
@@ -169,12 +200,13 @@
revision_type = 'tar'
revision_msg = '%s: Deposit %s in collection %s' % (
fullname, deposit.id, deposit.collection.name)
- complete_date = identifiers.normalize_timestamp(deposit.complete_date)
+
+ author_date, commit_date = self._normalize_dates(deposit, metadata)
data['revision'] = {
'synthetic': True,
- 'date': complete_date,
- 'committer_date': complete_date,
+ 'date': author_date,
+ 'committer_date': commit_date,
'author': author_committer,
'committer': author_committer,
'type': revision_type,
diff --git a/swh/deposit/tests/api/test_deposit_read_metadata.py b/swh/deposit/tests/api/test_deposit_read_metadata.py
--- a/swh/deposit/tests/api/test_deposit_read_metadata.py
+++ b/swh/deposit/tests/api/test_deposit_read_metadata.py
@@ -23,6 +23,43 @@
"""Deposit access to read metadata information on deposit.
"""
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+
+ self.template_metadata = """
+
+ Composing a Web of Audio Applications
+ hal
+ hal-01243065
+ hal-01243065
+ https://hal-test.archives-ouvertes.fr/hal-01243065
+ test
+ DSP programming
+ this is the description
+ 1
+ phpstorm
+ stable
+ php
+ python
+ C
+
+ GNU General Public License v3.0 only
+
+
+ CeCILL Free Software License Agreement v1.1
+
+
+ HAL
+ hal@ccsd.cnrs.fr
+
+
+ Morane Gruenpeter
+
+%s
+"""
+
def test_read_metadata(self):
"""Private metadata read api to existing deposit should return metadata
@@ -50,6 +87,7 @@
'metadata': {
'@xmlns': ['http://www.w3.org/2005/Atom'],
'author': ['some awesome author', 'another one', 'no one'],
+ 'codemeta:dateCreated': '2017-10-07T15:17:08Z',
'external_identifier': 'some-external-id',
'url': 'https://hal-test.archives-ouvertes.fr/' +
'some-external-id'
@@ -70,15 +108,30 @@
},
'revision': {
'synthetic': True,
- 'committer_date': None,
+ 'committer_date': {
+ 'timestamp': {
+ 'seconds': 1507389428,
+ 'microseconds': 0
+ },
+ 'offset': 0,
+ 'negative_utc': False
+ },
'message': 'hal: Deposit %s in collection hal' % deposit_id,
'author': SWH_PERSON,
'committer': SWH_PERSON,
- 'date': None,
+ 'date': {
+ 'timestamp': {
+ 'seconds': 1507389428,
+ 'microseconds': 0
+ },
+ 'offset': 0,
+ 'negative_utc': False
+ },
'metadata': {
'@xmlns': ['http://www.w3.org/2005/Atom'],
'author': ['some awesome author', 'another one', 'no one'],
'external_identifier': 'some-external-id',
+ 'codemeta:dateCreated': '2017-10-07T15:17:08Z',
'url': 'https://hal-test.archives-ouvertes.fr/' +
'some-external-id'
},
@@ -135,6 +188,7 @@
'metadata': {
'@xmlns': ['http://www.w3.org/2005/Atom'],
'author': ['some awesome author', 'another one', 'no one'],
+ 'codemeta:dateCreated': '2017-10-07T15:17:08Z',
'external_identifier': 'some-external-id',
'url': 'https://hal-test.archives-ouvertes.fr/' +
'some-external-id'
@@ -155,8 +209,22 @@
},
'revision': {
'synthetic': True,
- 'date': None,
- 'committer_date': None,
+ 'date': {
+ 'timestamp': {
+ 'seconds': 1507389428,
+ 'microseconds': 0
+ },
+ 'offset': 0,
+ 'negative_utc': False
+ },
+ 'committer_date': {
+ 'timestamp': {
+ 'seconds': 1507389428,
+ 'microseconds': 0
+ },
+ 'offset': 0,
+ 'negative_utc': False
+ },
'author': SWH_PERSON,
'committer': SWH_PERSON,
'type': 'tar',
@@ -164,6 +232,7 @@
'metadata': {
'@xmlns': ['http://www.w3.org/2005/Atom'],
'author': ['some awesome author', 'another one', 'no one'],
+ 'codemeta:dateCreated': '2017-10-07T15:17:08Z',
'external_identifier': 'some-external-id',
'url': 'https://hal-test.archives-ouvertes.fr/' +
'some-external-id'
@@ -175,6 +244,393 @@
self.assertEqual(data, expected_meta)
+ def test_read_metadata_3(self):
+ """date(Created|Published) provided, uses author/committer date
+
+ """
+ # add metadata to the deposit with datePublished and dateCreated
+ codemeta_entry_data = self.template_metadata % """
+ 2015-04-06T17:08:47+02:00
+ 2017-05-03T16:08:47+02:00
+"""
+
+ deposit_id = self.create_deposit_partial_with_data_in_args(
+ codemeta_entry_data)
+
+ url = reverse(PRIVATE_GET_DEPOSIT_METADATA,
+ args=[self.collection.name, deposit_id])
+
+ response = self.client.get(url)
+
+ self.assertEqual(response.status_code,
+ status.HTTP_200_OK)
+ self.assertEqual(response._headers['content-type'][1],
+ 'application/json')
+ data = response.json()
+
+ expected_origin = {
+ 'type': 'deposit',
+ 'url': 'https://hal-test.archives-ouvertes.fr/hal-01243065'
+ }
+ expected_metadata = {
+ '@xmlns': 'http://www.w3.org/2005/Atom',
+ '@xmlns:codemeta':
+ 'https://doi.org/10.5063/SCHEMA/CODEMETA-2.0',
+ 'author': {
+ 'email': 'hal@ccsd.cnrs.fr',
+ 'name': 'HAL'
+ },
+ 'client': 'hal',
+ 'codemeta:applicationCategory': 'test',
+ 'codemeta:author': {
+ 'codemeta:name': 'Morane Gruenpeter'
+ },
+ 'codemeta:dateCreated': '2015-04-06T17:08:47+02:00',
+ 'codemeta:datePublished': '2017-05-03T16:08:47+02:00',
+ 'codemeta:description': 'this is the description',
+ 'codemeta:developmentStatus': 'stable',
+ 'codemeta:keywords': 'DSP programming',
+ 'codemeta:license': [
+ {
+ 'codemeta:name': 'GNU General Public License v3.0 only'
+ },
+ {
+ 'codemeta:name':
+ 'CeCILL Free Software License Agreement v1.1'
+ }
+ ],
+ 'codemeta:programmingLanguage': [
+ 'php', 'python', 'C'
+ ],
+ 'codemeta:runtimePlatform': 'phpstorm',
+ 'codemeta:url': 'https://hal-test.archives-ouvertes.fr/hal-01243065', # noqa
+ 'codemeta:version': '1',
+ 'external_identifier': 'hal-01243065',
+ 'id': 'hal-01243065',
+ 'title': 'Composing a Web of Audio Applications'
+ }
+
+ expected_origin_metadata = {
+ 'metadata': expected_metadata,
+ 'provider': {
+ 'metadata': {},
+ 'provider_name': 'hal',
+ 'provider_type': 'deposit_client',
+ 'provider_url': 'https://hal-test.archives-ouvertes.fr/'
+ },
+ 'tool': {
+ 'configuration': {
+ 'sword_version': '2'
+ },
+ 'name': 'swh-deposit',
+ 'version': '0.0.1'
+ }
+ }
+
+ expected_revision = {
+ 'author': {
+ 'email': 'robot@softwareheritage.org',
+ 'fullname': 'Software Heritage',
+ 'name': 'Software Heritage'
+ },
+ 'committer': {
+ 'email': 'robot@softwareheritage.org',
+ 'fullname': 'Software Heritage',
+ 'name': 'Software Heritage'
+ },
+ 'committer_date': {
+ 'negative_utc': False,
+ 'offset': 120,
+ 'timestamp': {
+ 'microseconds': 0,
+ 'seconds': 1493820527
+ }
+ },
+ 'date': {
+ 'negative_utc': False,
+ 'offset': 120,
+ 'timestamp': {
+ 'microseconds': 0,
+ 'seconds': 1428332927
+ }
+ },
+ 'message': 'hal: Deposit %s in collection hal' % deposit_id,
+ 'metadata': expected_metadata,
+ 'synthetic': True,
+ 'type': 'tar'
+ }
+
+ expected_meta = {
+ 'branch_name': 'master',
+ 'origin': expected_origin,
+ 'origin_metadata': expected_origin_metadata,
+ 'revision': expected_revision,
+ }
+
+ self.assertEqual(data, expected_meta)
+
+ def test_read_metadata_4(self):
+ """dateCreated/datePublished not provided, revision uses complete_date
+
+ """
+ codemeta_entry_data = self.template_metadata % ''
+
+ deposit_id = self.create_deposit_partial_with_data_in_args(
+ codemeta_entry_data)
+
+ # will use the deposit completed date as fallback date
+ deposit = Deposit.objects.get(pk=deposit_id)
+ deposit.complete_date = '2016-04-06'
+ deposit.save()
+
+ url = reverse(PRIVATE_GET_DEPOSIT_METADATA,
+ args=[self.collection.name, deposit_id])
+
+ response = self.client.get(url)
+
+ self.assertEqual(response.status_code,
+ status.HTTP_200_OK)
+ self.assertEqual(response._headers['content-type'][1],
+ 'application/json')
+ data = response.json()
+
+ expected_origin = {
+ 'type': 'deposit',
+ 'url': 'https://hal-test.archives-ouvertes.fr/hal-01243065'
+ }
+ expected_metadata = {
+ '@xmlns': 'http://www.w3.org/2005/Atom',
+ '@xmlns:codemeta':
+ 'https://doi.org/10.5063/SCHEMA/CODEMETA-2.0',
+ 'author': {
+ 'email': 'hal@ccsd.cnrs.fr',
+ 'name': 'HAL'
+ },
+ 'client': 'hal',
+ 'codemeta:applicationCategory': 'test',
+ 'codemeta:author': {
+ 'codemeta:name': 'Morane Gruenpeter'
+ },
+ 'codemeta:description': 'this is the description',
+ 'codemeta:developmentStatus': 'stable',
+ 'codemeta:keywords': 'DSP programming',
+ 'codemeta:license': [
+ {
+ 'codemeta:name': 'GNU General Public License v3.0 only'
+ },
+ {
+ 'codemeta:name':
+ 'CeCILL Free Software License Agreement v1.1'
+ }
+ ],
+ 'codemeta:programmingLanguage': [
+ 'php', 'python', 'C'
+ ],
+ 'codemeta:runtimePlatform': 'phpstorm',
+ 'codemeta:url': 'https://hal-test.archives-ouvertes.fr/hal-01243065', # noqa
+ 'codemeta:version': '1',
+ 'external_identifier': 'hal-01243065',
+ 'id': 'hal-01243065',
+ 'title': 'Composing a Web of Audio Applications'
+ }
+
+ expected_origin_metadata = {
+ 'metadata': expected_metadata,
+ 'provider': {
+ 'metadata': {},
+ 'provider_name': 'hal',
+ 'provider_type': 'deposit_client',
+ 'provider_url': 'https://hal-test.archives-ouvertes.fr/'
+ },
+ 'tool': {
+ 'configuration': {
+ 'sword_version': '2'
+ },
+ 'name': 'swh-deposit',
+ 'version': '0.0.1'
+ }
+ }
+
+ expected_revision = {
+ 'author': {
+ 'email': 'robot@softwareheritage.org',
+ 'fullname': 'Software Heritage',
+ 'name': 'Software Heritage'
+ },
+ 'committer': {
+ 'email': 'robot@softwareheritage.org',
+ 'fullname': 'Software Heritage',
+ 'name': 'Software Heritage'
+ },
+ 'committer_date': {
+ 'negative_utc': False,
+ 'offset': 0,
+ 'timestamp': {
+ 'microseconds': 0,
+ 'seconds': 1459900800
+ }
+ },
+ 'date': {
+ 'negative_utc': False,
+ 'offset': 0,
+ 'timestamp': {
+ 'microseconds': 0,
+ 'seconds': 1459900800
+ }
+ },
+ 'message': 'hal: Deposit %s in collection hal' % deposit_id,
+ 'metadata': expected_metadata,
+ 'synthetic': True,
+ 'type': 'tar'
+ }
+
+ expected_meta = {
+ 'branch_name': 'master',
+ 'origin': expected_origin,
+ 'origin_metadata': expected_origin_metadata,
+ 'revision': expected_revision,
+ }
+
+ self.assertEqual(data, expected_meta)
+
+ def test_read_metadata_5(self):
+ """dateCreated/datePublished provided, revision uses author/committer
+ date
+
+ If multiple dateCreated provided, the first occurrence (of
+ dateCreated) is selected. If multiple datePublished provided,
+ the first occurrence (of datePublished) is selected.
+
+ """
+ # add metadata to the deposit with multiple datePublished/dateCreated
+ codemeta_entry_data = self.template_metadata % """
+ 2015-04-06T17:08:47+02:00
+ 2017-05-03T16:08:47+02:00
+ 2016-04-06T17:08:47+02:00
+ 2018-05-03T16:08:47+02:00
+"""
+
+ deposit_id = self.create_deposit_partial_with_data_in_args(
+ codemeta_entry_data)
+
+ url = reverse(PRIVATE_GET_DEPOSIT_METADATA,
+ args=[self.collection.name, deposit_id])
+
+ response = self.client.get(url)
+
+ self.assertEqual(response.status_code,
+ status.HTTP_200_OK)
+ self.assertEqual(response._headers['content-type'][1],
+ 'application/json')
+ data = response.json()
+
+ expected_origin = {
+ 'type': 'deposit',
+ 'url': 'https://hal-test.archives-ouvertes.fr/hal-01243065'
+ }
+ expected_metadata = {
+ '@xmlns': 'http://www.w3.org/2005/Atom',
+ '@xmlns:codemeta':
+ 'https://doi.org/10.5063/SCHEMA/CODEMETA-2.0',
+ 'author': {
+ 'email': 'hal@ccsd.cnrs.fr',
+ 'name': 'HAL'
+ },
+ 'client': 'hal',
+ 'codemeta:applicationCategory': 'test',
+ 'codemeta:author': {
+ 'codemeta:name': 'Morane Gruenpeter'
+ },
+ 'codemeta:dateCreated': [
+ '2015-04-06T17:08:47+02:00',
+ '2016-04-06T17:08:47+02:00',
+ ],
+ 'codemeta:datePublished': [
+ '2017-05-03T16:08:47+02:00',
+ '2018-05-03T16:08:47+02:00',
+ ],
+ 'codemeta:description': 'this is the description',
+ 'codemeta:developmentStatus': 'stable',
+ 'codemeta:keywords': 'DSP programming',
+ 'codemeta:license': [
+ {
+ 'codemeta:name': 'GNU General Public License v3.0 only'
+ },
+ {
+ 'codemeta:name':
+ 'CeCILL Free Software License Agreement v1.1'
+ }
+ ],
+ 'codemeta:programmingLanguage': [
+ 'php', 'python', 'C'
+ ],
+ 'codemeta:runtimePlatform': 'phpstorm',
+ 'codemeta:url': 'https://hal-test.archives-ouvertes.fr/hal-01243065', # noqa
+ 'codemeta:version': '1',
+ 'external_identifier': 'hal-01243065',
+ 'id': 'hal-01243065',
+ 'title': 'Composing a Web of Audio Applications'
+ }
+
+ expected_origin_metadata = {
+ 'metadata': expected_metadata,
+ 'provider': {
+ 'metadata': {},
+ 'provider_name': 'hal',
+ 'provider_type': 'deposit_client',
+ 'provider_url': 'https://hal-test.archives-ouvertes.fr/'
+ },
+ 'tool': {
+ 'configuration': {
+ 'sword_version': '2'
+ },
+ 'name': 'swh-deposit',
+ 'version': '0.0.1'
+ }
+ }
+
+ expected_revision = {
+ 'author': {
+ 'email': 'robot@softwareheritage.org',
+ 'fullname': 'Software Heritage',
+ 'name': 'Software Heritage'
+ },
+ 'committer': {
+ 'email': 'robot@softwareheritage.org',
+ 'fullname': 'Software Heritage',
+ 'name': 'Software Heritage'
+ },
+ 'committer_date': {
+ 'negative_utc': False,
+ 'offset': 120,
+ 'timestamp': {
+ 'microseconds': 0,
+ 'seconds': 1493820527
+ }
+ },
+ 'date': {
+ 'negative_utc': False,
+ 'offset': 120,
+ 'timestamp': {
+ 'microseconds': 0,
+ 'seconds': 1428332927
+ }
+ },
+ 'message': 'hal: Deposit %s in collection hal' % deposit_id,
+ 'metadata': expected_metadata,
+ 'synthetic': True,
+ 'type': 'tar'
+ }
+
+ expected_meta = {
+ 'branch_name': 'master',
+ 'origin': expected_origin,
+ 'origin_metadata': expected_origin_metadata,
+ 'revision': expected_revision,
+ }
+
+ self.assertEqual(data, expected_meta)
+
def test_access_to_nonexisting_deposit_returns_404_response(self):
"""Read unknown collection should return a 404 response
diff --git a/swh/deposit/tests/common.py b/swh/deposit/tests/common.py
--- a/swh/deposit/tests/common.py
+++ b/swh/deposit/tests/common.py
@@ -346,6 +346,7 @@
another one
no one
+ 2017-10-07T15:17:08Z
"""
self.atom_entry_data2 = b"""
@@ -488,6 +489,9 @@
deposit id
"""
+ if isinstance(data, str):
+ data = data.encode('utf-8')
+
response = self.client.post(
reverse(COL_IRI, args=[self.collection.name]),
content_type='application/atom+xml;type=entry',
diff --git a/swh/deposit/tests/loader/test_loader.py b/swh/deposit/tests/loader/test_loader.py
--- a/swh/deposit/tests/loader/test_loader.py
+++ b/swh/deposit/tests/loader/test_loader.py
@@ -59,12 +59,10 @@
# create the extraction dir used by the loader
os.makedirs(TEST_LOADER_CONFIG['extraction_dir'], exist_ok=True)
- # 1. create a deposit with archive and metadata
- self.deposit_id = self.create_simple_binary_deposit()
- # 2. Sets a basic client which accesses the test data
+ # Sets a basic client which accesses the test data
loader_client = SWHDepositTestClient(self.client,
config=CLIENT_TEST_CONFIG)
- # 3. setup loader with that client
+ # Setup loader with that client
self.loader = loader.DepositLoader(client=loader_client)
self.storage = self.loader.storage
@@ -77,7 +75,11 @@
"""Load a deposit which is ready
"""
- args = [self.collection.name, self.deposit_id]
+ # create a deposit with archive and metadata
+ deposit_id = self.create_simple_binary_deposit()
+ self.update_binary_deposit(deposit_id, status_partial=False)
+
+ args = [self.collection.name, deposit_id]
archive_url = reverse(PRIVATE_GET_RAW_CONTENT, args=args)
deposit_meta_url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=args)
@@ -100,9 +102,9 @@
"""Load a deposit with metadata, test metadata integrity
"""
- self.deposit_metadata_id = self.add_metadata_to_deposit(
- self.deposit_id)
- args = [self.collection.name, self.deposit_metadata_id]
+ deposit_id = self.create_simple_binary_deposit()
+ self.add_metadata_to_deposit(deposit_id, status_partial=False)
+ args = [self.collection.name, deposit_id]
archive_url = reverse(PRIVATE_GET_RAW_CONTENT, args=args)
deposit_meta_url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=args)
@@ -157,7 +159,7 @@
self.assertOriginMetadataContains('deposit', origin_url,
expected_origin_metadata)
- deposit = Deposit.objects.get(pk=self.deposit_id)
+ deposit = Deposit.objects.get(pk=deposit_id)
self.assertRegex(deposit.swh_id, r'^swh:1:dir:.*')
self.assertEqual(deposit.swh_id_context, '%s;origin=%s' % (
diff --git a/swh/deposit/tests/test_utils.py b/swh/deposit/tests/test_utils.py
--- a/swh/deposit/tests/test_utils.py
+++ b/swh/deposit/tests/test_utils.py
@@ -1,10 +1,11 @@
-# Copyright (C) 2018 The Software Heritage developers
+# Copyright (C) 2018-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import unittest
+from unittest.mock import patch
from swh.deposit import utils
@@ -130,3 +131,48 @@
utils.merge(d1)
self.assertEqual(utils.merge(d0), d0)
+
+
+@patch('swh.deposit.utils.normalize_timestamp', side_effect=lambda x: x)
+def test_normalize_date_0(mock_normalize):
+ """When date is a list, choose the first date and normalize it
+
+ Note: We do not test swh.model.identifiers which is already tested
+ in swh.model
+
+ """
+ actual_date = utils.normalize_date(['2017-10-12', 'date1'])
+
+ expected_date = '2017-10-12 00:00:00+00:00'
+
+ assert str(actual_date) == expected_date
+
+
+@patch('swh.deposit.utils.normalize_timestamp', side_effect=lambda x: x)
+def test_normalize_date_1(mock_normalize):
+ """Providing a date in a reasonable format, everything is fine
+
+ Note: We do not test swh.model.identifiers which is already tested
+ in swh.model
+
+ """
+ actual_date = utils.normalize_date('2018-06-11 17:02:02')
+
+ expected_date = '2018-06-11 17:02:02+00:00'
+
+ assert str(actual_date) == expected_date
+
+
+@patch('swh.deposit.utils.normalize_timestamp', side_effect=lambda x: x)
+def test_normalize_date_doing_irrelevant_stuff(mock_normalize):
+ """Providing a date with only the year results in a reasonable date
+
+ Note: We do not test swh.model.identifiers which is already tested
+ in swh.model
+
+ """
+ actual_date = utils.normalize_date('2017')
+
+ expected_date = '2017-01-01 00:00:00+00:00'
+
+ assert str(actual_date) == expected_date
diff --git a/swh/deposit/utils.py b/swh/deposit/utils.py
--- a/swh/deposit/utils.py
+++ b/swh/deposit/utils.py
@@ -1,10 +1,14 @@
-# Copyright (C) 2018 The Software Heritage developers
+# Copyright (C) 2018-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+import iso8601
+
from types import GeneratorType
+from swh.model.identifiers import normalize_timestamp
+
def merge(*dicts):
"""Given an iterator of dicts, merge them losing no information.
@@ -53,3 +57,27 @@
new_val = _extend([existing_val], value)
d[key] = new_val
return d
+
+
+def normalize_date(date):
+ """Normalize date fields as expected by swh workers.
+
+ If date is a list, elect arbitrarily the first element of that
+ list
+
+ If date is (then) a string, parse it through
+ dateutil.parser.parse to extract a datetime.
+
+ Then normalize it through
+ swh.model.identifiers.normalize_timestamp.
+
+ Returns
+ The swh date object
+
+ """
+ if isinstance(date, list):
+ date = date[0]
+ if isinstance(date, str):
+ date = iso8601.parse_date(date)
+
+ return normalize_timestamp(date)