diff --git a/swh/deposit/api/private/deposit_read.py b/swh/deposit/api/private/deposit_read.py --- a/swh/deposit/api/private/deposit_read.py +++ b/swh/deposit/api/private/deposit_read.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2018 The Software Heritage developers +# Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -9,6 +9,7 @@ import tempfile from contextlib import contextmanager +from dateutil import parser from django.http import FileResponse from rest_framework import status @@ -136,6 +137,59 @@ if client_domain in metadata[field]: return metadata[field] + def _prepare_date(self, date): + """Prepare date fields as normalized swh date + + If date is a list, elect arbitrarily the first element of that + list + + If date is (then) a string, parse it through + dateutil.parser.parse to extract a datetime. + + Then normalize it through + swh.model.identifiers.normalize_timestamp. + + Returns + The swh date object + + """ + if isinstance(date, list): + date = date[0] + if isinstance(date, str): + date = parser.parse(date) + + return identifiers.normalize_timestamp(date) + + def _normalize_dates(self, deposit, metadata): + """Normalize the date to use as a tuple of author date, committer date + from the incoming metadata. + + Args: + deposit (Deposit): Deposit model representation + metadata (Dict): Metadata dict representation + + Returns: + Tuple of author date, committer date. Those dates are + swh normalized. + + """ + commit_date = metadata.get('codemeta:datePublished') + author_date = metadata.get('codemeta:dateCreated') + + if author_date and commit_date: + pass + elif commit_date: + author_date = commit_date + elif author_date: + commit_date = author_date + else: + author_date = deposit.complete_date + commit_date = deposit.complete_date + return ( + self._prepare_date(author_date), + self._prepare_date(commit_date) + ) + def metadata_read(self, deposit): """Read and aggregate multiple data on deposit into one unified data dictionary. @@ -169,12 +223,13 @@ revision_type = 'tar' revision_msg = '%s: Deposit %s in collection %s' % ( fullname, deposit.id, deposit.collection.name) - complete_date = identifiers.normalize_timestamp(deposit.complete_date) + + author_date, commit_date = self._normalize_dates(deposit, metadata) data['revision'] = { 'synthetic': True, - 'date': complete_date, - 'committer_date': complete_date, + 'date': author_date, + 'committer_date': commit_date, 'author': author_committer, 'committer': author_committer, 'type': revision_type, diff --git a/swh/deposit/tests/api/test_deposit_read_metadata.py b/swh/deposit/tests/api/test_deposit_read_metadata.py --- a/swh/deposit/tests/api/test_deposit_read_metadata.py +++ b/swh/deposit/tests/api/test_deposit_read_metadata.py @@ -50,6 +50,7 @@ 'metadata': { '@xmlns': ['http://www.w3.org/2005/Atom'], 'author': ['some awesome author', 'another one', 'no one'], + 'codemeta:dateCreated': '2017-10-07T15:17:08Z', 'external_identifier': 'some-external-id', 'url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id' @@ -70,15 +71,30 @@ }, 'revision': { 'synthetic': True, - 'committer_date': None, + 'committer_date': { + 'timestamp': { + 'seconds': 1507389428, + 'microseconds': 0 + }, + 'offset': 0, + 'negative_utc': False + }, 'message': 'hal: Deposit %s in collection hal' % deposit_id, 'author': SWH_PERSON, 'committer': SWH_PERSON, - 'date': None, + 'date': { + 'timestamp': { + 'seconds': 1507389428, + 'microseconds': 0 + }, + 'offset': 0, + 'negative_utc': False + }, 'metadata': { '@xmlns': ['http://www.w3.org/2005/Atom'], 'author': ['some awesome author', 'another one', 'no one'], 'external_identifier': 'some-external-id', + 'codemeta:dateCreated': '2017-10-07T15:17:08Z', 'url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id' }, @@ -135,6 +151,7 @@ 'metadata': { '@xmlns': ['http://www.w3.org/2005/Atom'], 'author': ['some awesome author', 'another one', 'no one'], + 'codemeta:dateCreated': '2017-10-07T15:17:08Z', 'external_identifier': 'some-external-id', 'url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id' @@ -155,8 +172,22 @@ }, 'revision': { 'synthetic': True, - 'date': None, - 'committer_date': None, + 'date': { + 'timestamp': { + 'seconds': 1507389428, + 'microseconds': 0 + }, + 'offset': 0, + 'negative_utc': False + }, + 'committer_date': { + 'timestamp': { + 'seconds': 1507389428, + 'microseconds': 0 + }, + 'offset': 0, + 'negative_utc': False + }, 'author': SWH_PERSON, 'committer': SWH_PERSON, 'type': 'tar', @@ -164,6 +195,7 @@ 'metadata': { '@xmlns': ['http://www.w3.org/2005/Atom'], 'author': ['some awesome author', 'another one', 'no one'], + 'codemeta:dateCreated': '2017-10-07T15:17:08Z', 'external_identifier': 'some-external-id', 'url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id' @@ -175,6 +207,482 @@ self.assertEqual(data, expected_meta) + def test_read_metadata_3(self): + """dateCreated/datePublished provided, revision uses author/committer date + + """ + # add metadata to the deposit with datePublished and dateCreated + codemeta_entry_data = b""" + + Composing a Web of Audio Applications + hal + hal-01243065 + hal-01243065 + https://hal-test.archives-ouvertes.fr/hal-01243065 + test + DSP programming,Web + 2015-04-06T17:08:47+02:00 + this is the description + 1 + phpstorm + stable + php + python + C + 2017-05-03T16:08:47+02:00 + + GNU General Public License v3.0 only + + + CeCILL Free Software License Agreement v1.1 + + + HAL + hal@ccsd.cnrs.fr + + + Morane Gruenpeter + +""" # noqa + + deposit_id = self.create_deposit_partial_with_data_in_args( + codemeta_entry_data) + + url = reverse(PRIVATE_GET_DEPOSIT_METADATA, + args=[self.collection.name, deposit_id]) + + response = self.client.get(url) + + self.assertEqual(response.status_code, + status.HTTP_200_OK) + self.assertEqual(response._headers['content-type'][1], + 'application/json') + data = response.json() + + expected_origin = { + 'type': 'deposit', + 'url': 'https://hal-test.archives-ouvertes.fr/hal-01243065' + } + expected_metadata = { + '@xmlns': 'http://www.w3.org/2005/Atom', + '@xmlns:codemeta': + 'https://doi.org/10.5063/SCHEMA/CODEMETA-2.0', + 'author': { + 'email': 'hal@ccsd.cnrs.fr', + 'name': 'HAL' + }, + 'client': 'hal', + 'codemeta:applicationCategory': 'test', + 'codemeta:author': { + 'codemeta:name': 'Morane Gruenpeter' + }, + 'codemeta:dateCreated': '2015-04-06T17:08:47+02:00', + 'codemeta:datePublished': '2017-05-03T16:08:47+02:00', + 'codemeta:description': 'this is the description', + 'codemeta:developmentStatus': 'stable', + 'codemeta:keywords': 'DSP programming,Web', + 'codemeta:license': [ + { + 'codemeta:name': 'GNU General Public License v3.0 only' + }, + { + 'codemeta:name': + 'CeCILL Free Software License Agreement v1.1' + } + ], + 'codemeta:programmingLanguage': [ + 'php', 'python', 'C' + ], + 'codemeta:runtimePlatform': 'phpstorm', + 'codemeta:url': 'https://hal-test.archives-ouvertes.fr/hal-01243065', # noqa + 'codemeta:version': '1', + 'external_identifier': 'hal-01243065', + 'id': 'hal-01243065', + 'title': 'Composing a Web of Audio Applications' + } + + expected_origin_metadata = { + 'metadata': expected_metadata, + 'provider': { + 'metadata': {}, + 'provider_name': 'hal', + 'provider_type': 'deposit_client', + 'provider_url': 'https://hal-test.archives-ouvertes.fr/' + }, + 'tool': { + 'configuration': { + 'sword_version': '2' + }, + 'name': 'swh-deposit', + 'version': '0.0.1' + } + } + + expected_revision = { + 'author': { + 'email': 'robot@softwareheritage.org', + 'fullname': 'Software Heritage', + 'name': 'Software Heritage' + }, + 'committer': { + 'email': 'robot@softwareheritage.org', + 'fullname': 'Software Heritage', + 'name': 'Software Heritage' + }, + 'committer_date': { + 'negative_utc': False, + 'offset': 120, + 'timestamp': { + 'microseconds': 0, + 'seconds': 1493820527 + } + }, + 'date': { + 'negative_utc': False, + 'offset': 120, + 'timestamp': { + 'microseconds': 0, + 'seconds': 1428332927 + } + }, + 'message': 'hal: Deposit %s in collection hal' % deposit_id, + 'metadata': expected_metadata, + 'synthetic': True, + 'type': 'tar' + } + + expected_meta = { + 'branch_name': 'master', + 'origin': expected_origin, + 'origin_metadata': expected_origin_metadata, + 'revision': expected_revision, + } + + self.assertEqual(data, expected_meta) + + def test_read_metadata_4(self): + """dateCreated/datePublished not provided, revision uses complete_date + + """ + # add metadata to the deposit with datePublished and dateCreated + codemeta_entry_data = b""" + + Composing a Web of Audio Applications + hal + hal-01243065 + hal-01243065 + https://hal-test.archives-ouvertes.fr/hal-01243065 + test + DSP programming + this is the description + 1 + phpstorm + stable + php + python + C + + GNU General Public License v3.0 only + + + CeCILL Free Software License Agreement v1.1 + + + HAL + hal@ccsd.cnrs.fr + + + Morane Gruenpeter + +""" # noqa + + deposit_id = self.create_deposit_partial_with_data_in_args( + codemeta_entry_data) + + # will use the deposit completed date as fallback date + deposit = Deposit.objects.get(pk=deposit_id) + deposit.complete_date = '2016-04-06' + deposit.save() + + url = reverse(PRIVATE_GET_DEPOSIT_METADATA, + args=[self.collection.name, deposit_id]) + + response = self.client.get(url) + + self.assertEqual(response.status_code, + status.HTTP_200_OK) + self.assertEqual(response._headers['content-type'][1], + 'application/json') + data = response.json() + + expected_origin = { + 'type': 'deposit', + 'url': 'https://hal-test.archives-ouvertes.fr/hal-01243065' + } + expected_metadata = { + '@xmlns': 'http://www.w3.org/2005/Atom', + '@xmlns:codemeta': + 'https://doi.org/10.5063/SCHEMA/CODEMETA-2.0', + 'author': { + 'email': 'hal@ccsd.cnrs.fr', + 'name': 'HAL' + }, + 'client': 'hal', + 'codemeta:applicationCategory': 'test', + 'codemeta:author': { + 'codemeta:name': 'Morane Gruenpeter' + }, + 'codemeta:description': 'this is the description', + 'codemeta:developmentStatus': 'stable', + 'codemeta:keywords': 'DSP programming', + 'codemeta:license': [ + { + 'codemeta:name': 'GNU General Public License v3.0 only' + }, + { + 'codemeta:name': + 'CeCILL Free Software License Agreement v1.1' + } + ], + 'codemeta:programmingLanguage': [ + 'php', 'python', 'C' + ], + 'codemeta:runtimePlatform': 'phpstorm', + 'codemeta:url': 'https://hal-test.archives-ouvertes.fr/hal-01243065', # noqa + 'codemeta:version': '1', + 'external_identifier': 'hal-01243065', + 'id': 'hal-01243065', + 'title': 'Composing a Web of Audio Applications' + } + + expected_origin_metadata = { + 'metadata': expected_metadata, + 'provider': { + 'metadata': {}, + 'provider_name': 'hal', + 'provider_type': 'deposit_client', + 'provider_url': 'https://hal-test.archives-ouvertes.fr/' + }, + 'tool': { + 'configuration': { + 'sword_version': '2' + }, + 'name': 'swh-deposit', + 'version': '0.0.1' + } + } + + expected_revision = { + 'author': { + 'email': 'robot@softwareheritage.org', + 'fullname': 'Software Heritage', + 'name': 'Software Heritage' + }, + 'committer': { + 'email': 'robot@softwareheritage.org', + 'fullname': 'Software Heritage', + 'name': 'Software Heritage' + }, + 'committer_date': { + 'negative_utc': False, + 'offset': 0, + 'timestamp': { + 'microseconds': 0, + 'seconds': 1459900800 + } + }, + 'date': { + 'negative_utc': False, + 'offset': 0, + 'timestamp': { + 'microseconds': 0, + 'seconds': 1459900800 + } + }, + 'message': 'hal: Deposit %s in collection hal' % deposit_id, + 'metadata': expected_metadata, + 'synthetic': True, + 'type': 'tar' + } + + expected_meta = { + 'branch_name': 'master', + 'origin': expected_origin, + 'origin_metadata': expected_origin_metadata, + 'revision': expected_revision, + } + + self.assertEqual(data, expected_meta) + + def test_read_metadata_5(self): + """dateCreated/datePublished provided, revision uses author/committer + date + + If multiple dateCreated provided, the first occurrence (of + dateCreated) is selected. If multiple datePublished provided, + the first occurrence (of datePublished) is selected. + + """ + # add metadata to the deposit with datePublished and dateCreated + codemeta_entry_data = b""" + + Composing a Web of Audio Applications + hal + hal-01243065 + hal-01243065 + https://hal-test.archives-ouvertes.fr/hal-01243065 + test + DSP programming,Web + 2015-04-06T17:08:47+02:00 + 2016-04-06T17:08:47+02:00 + this is the description + 1 + phpstorm + stable + php + python + C + 2017-05-03T16:08:47+02:00 + 2018-05-03T16:08:47+02:00 + + GNU General Public License v3.0 only + + + CeCILL Free Software License Agreement v1.1 + + + HAL + hal@ccsd.cnrs.fr + + + Morane Gruenpeter + +""" # noqa + + deposit_id = self.create_deposit_partial_with_data_in_args( + codemeta_entry_data) + + url = reverse(PRIVATE_GET_DEPOSIT_METADATA, + args=[self.collection.name, deposit_id]) + + response = self.client.get(url) + + self.assertEqual(response.status_code, + status.HTTP_200_OK) + self.assertEqual(response._headers['content-type'][1], + 'application/json') + data = response.json() + + expected_origin = { + 'type': 'deposit', + 'url': 'https://hal-test.archives-ouvertes.fr/hal-01243065' + } + expected_metadata = { + '@xmlns': 'http://www.w3.org/2005/Atom', + '@xmlns:codemeta': + 'https://doi.org/10.5063/SCHEMA/CODEMETA-2.0', + 'author': { + 'email': 'hal@ccsd.cnrs.fr', + 'name': 'HAL' + }, + 'client': 'hal', + 'codemeta:applicationCategory': 'test', + 'codemeta:author': { + 'codemeta:name': 'Morane Gruenpeter' + }, + 'codemeta:dateCreated': [ + '2015-04-06T17:08:47+02:00', + '2016-04-06T17:08:47+02:00', + ], + 'codemeta:datePublished': [ + '2017-05-03T16:08:47+02:00', + '2018-05-03T16:08:47+02:00', + ], + 'codemeta:description': 'this is the description', + 'codemeta:developmentStatus': 'stable', + 'codemeta:keywords': 'DSP programming,Web', + 'codemeta:license': [ + { + 'codemeta:name': 'GNU General Public License v3.0 only' + }, + { + 'codemeta:name': + 'CeCILL Free Software License Agreement v1.1' + } + ], + 'codemeta:programmingLanguage': [ + 'php', 'python', 'C' + ], + 'codemeta:runtimePlatform': 'phpstorm', + 'codemeta:url': 'https://hal-test.archives-ouvertes.fr/hal-01243065', # noqa + 'codemeta:version': '1', + 'external_identifier': 'hal-01243065', + 'id': 'hal-01243065', + 'title': 'Composing a Web of Audio Applications' + } + + expected_origin_metadata = { + 'metadata': expected_metadata, + 'provider': { + 'metadata': {}, + 'provider_name': 'hal', + 'provider_type': 'deposit_client', + 'provider_url': 'https://hal-test.archives-ouvertes.fr/' + }, + 'tool': { + 'configuration': { + 'sword_version': '2' + }, + 'name': 'swh-deposit', + 'version': '0.0.1' + } + } + + expected_revision = { + 'author': { + 'email': 'robot@softwareheritage.org', + 'fullname': 'Software Heritage', + 'name': 'Software Heritage' + }, + 'committer': { + 'email': 'robot@softwareheritage.org', + 'fullname': 'Software Heritage', + 'name': 'Software Heritage' + }, + 'committer_date': { + 'negative_utc': False, + 'offset': 120, + 'timestamp': { + 'microseconds': 0, + 'seconds': 1493820527 + } + }, + 'date': { + 'negative_utc': False, + 'offset': 120, + 'timestamp': { + 'microseconds': 0, + 'seconds': 1428332927 + } + }, + 'message': 'hal: Deposit %s in collection hal' % deposit_id, + 'metadata': expected_metadata, + 'synthetic': True, + 'type': 'tar' + } + + expected_meta = { + 'branch_name': 'master', + 'origin': expected_origin, + 'origin_metadata': expected_origin_metadata, + 'revision': expected_revision, + } + + self.assertEqual(data, expected_meta) + def test_access_to_nonexisting_deposit_returns_404_response(self): """Read unknown collection should return a 404 response diff --git a/swh/deposit/tests/common.py b/swh/deposit/tests/common.py --- a/swh/deposit/tests/common.py +++ b/swh/deposit/tests/common.py @@ -346,6 +346,7 @@ another one no one + 2017-10-07T15:17:08Z """ self.atom_entry_data2 = b""" diff --git a/swh/deposit/tests/loader/test_loader.py b/swh/deposit/tests/loader/test_loader.py --- a/swh/deposit/tests/loader/test_loader.py +++ b/swh/deposit/tests/loader/test_loader.py @@ -59,12 +59,10 @@ # create the extraction dir used by the loader os.makedirs(TEST_LOADER_CONFIG['extraction_dir'], exist_ok=True) - # 1. create a deposit with archive and metadata - self.deposit_id = self.create_simple_binary_deposit() - # 2. Sets a basic client which accesses the test data + # Sets a basic client which accesses the test data loader_client = SWHDepositTestClient(self.client, config=CLIENT_TEST_CONFIG) - # 3. setup loader with that client + # Setup loader with that client self.loader = loader.DepositLoader(client=loader_client) self.storage = self.loader.storage @@ -77,7 +75,11 @@ """Load a deposit which is ready """ - args = [self.collection.name, self.deposit_id] + # create a deposit with archive and metadata + deposit_id = self.create_simple_binary_deposit() + self.update_binary_deposit(deposit_id, status_partial=False) + + args = [self.collection.name, deposit_id] archive_url = reverse(PRIVATE_GET_RAW_CONTENT, args=args) deposit_meta_url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=args) @@ -100,9 +102,9 @@ """Load a deposit with metadata, test metadata integrity """ - self.deposit_metadata_id = self.add_metadata_to_deposit( - self.deposit_id) - args = [self.collection.name, self.deposit_metadata_id] + deposit_id = self.create_simple_binary_deposit() + self.add_metadata_to_deposit(deposit_id, status_partial=False) + args = [self.collection.name, deposit_id] archive_url = reverse(PRIVATE_GET_RAW_CONTENT, args=args) deposit_meta_url = reverse(PRIVATE_GET_DEPOSIT_METADATA, args=args) @@ -157,7 +159,7 @@ self.assertOriginMetadataContains('deposit', origin_url, expected_origin_metadata) - deposit = Deposit.objects.get(pk=self.deposit_id) + deposit = Deposit.objects.get(pk=deposit_id) self.assertRegex(deposit.swh_id, r'^swh:1:dir:.*') self.assertEqual(deposit.swh_id_context, '%s;origin=%s' % (