diff --git a/swh/deposit/api/private/__init__.py b/swh/deposit/api/private/__init__.py --- a/swh/deposit/api/private/__init__.py +++ b/swh/deposit/api/private/__init__.py @@ -3,6 +3,7 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +from swh.deposit import utils from ...config import METADATA_TYPE from ...models import DepositRequest, Deposit @@ -45,7 +46,6 @@ metadata dict from the deposit. """ - metadata = {} - for dr in self._deposit_requests(deposit, request_type=METADATA_TYPE): - metadata.update(dr.metadata) - return metadata + metadata = (m.metadata for m in self._deposit_requests( + deposit, request_type=METADATA_TYPE)) + return utils.merge(*metadata) diff --git a/swh/deposit/api/private/deposit_read.py b/swh/deposit/api/private/deposit_read.py --- a/swh/deposit/api/private/deposit_read.py +++ b/swh/deposit/api/private/deposit_read.py @@ -82,18 +82,6 @@ if not os.path.exists(self.extraction_dir): os.makedirs(self.extraction_dir) - def retrieve_archives(self, deposit_id): - """Given a deposit identifier, returns its associated archives' path. - - Yields: - path to deposited archives - - """ - deposit_requests = self._deposit_requests( - deposit_id, request_type=ARCHIVE_TYPE) - for deposit_request in deposit_requests: - yield deposit_request.archive.path - def process_get(self, req, collection_name, deposit_id): """Build a unique tarball from the multiple received and stream that content to the client. @@ -107,9 +95,9 @@ Tuple status, stream of content, content-type """ - archive_paths = list(self.retrieve_archives(deposit_id)) - with aggregate_tarballs(self.extraction_dir, - archive_paths) as path: + archive_paths = [r.archive.path for r in self._deposit_requests( + deposit_id, request_type=ARCHIVE_TYPE)] + with aggregate_tarballs(self.extraction_dir, archive_paths) as path: return FileResponse(open(path, 'rb'), status=status.HTTP_200_OK, content_type='application/octet-stream') diff --git a/swh/deposit/tests/api/test_deposit_read_metadata.py b/swh/deposit/tests/api/test_deposit_read_metadata.py --- a/swh/deposit/tests/api/test_deposit_read_metadata.py +++ b/swh/deposit/tests/api/test_deposit_read_metadata.py @@ -51,8 +51,8 @@ }, 'origin_metadata': { 'metadata': { - '@xmlns': 'http://www.w3.org/2005/Atom', - 'author': 'some awesome author', + '@xmlns': ['http://www.w3.org/2005/Atom'], + 'author': ['some awesome author', 'another one', 'no one'], 'external_identifier': 'some-external-id', 'url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id' @@ -79,8 +79,8 @@ 'committer': SWH_PERSON, 'date': None, 'metadata': { - '@xmlns': 'http://www.w3.org/2005/Atom', - 'author': 'some awesome author', + '@xmlns': ['http://www.w3.org/2005/Atom'], + 'author': ['some awesome author', 'another one', 'no one'], 'external_identifier': 'some-external-id', 'url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id' @@ -137,8 +137,8 @@ }, 'origin_metadata': { 'metadata': { - '@xmlns': 'http://www.w3.org/2005/Atom', - 'author': 'some awesome author', + '@xmlns': ['http://www.w3.org/2005/Atom'], + 'author': ['some awesome author', 'another one', 'no one'], 'external_identifier': 'some-external-id', 'url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id' @@ -166,8 +166,8 @@ 'type': 'tar', 'message': 'hal: Deposit %s in collection hal' % deposit_id, 'metadata': { - '@xmlns': 'http://www.w3.org/2005/Atom', - 'author': 'some awesome author', + '@xmlns': ['http://www.w3.org/2005/Atom'], + 'author': ['some awesome author', 'another one', 'no one'], 'external_identifier': 'some-external-id', 'url': 'https://hal-test.archives-ouvertes.fr/' + 'some-external-id' @@ -177,7 +177,7 @@ 'branch_name': 'master', } - self.assertEquals(data, expected_meta) + self.assertEqual(data, expected_meta) @istest def access_to_nonexisting_deposit_returns_404_response(self): diff --git a/swh/deposit/tests/common.py b/swh/deposit/tests/common.py --- a/swh/deposit/tests/common.py +++ b/swh/deposit/tests/common.py @@ -348,12 +348,13 @@ some-external-id https://hal-test.archives-ouvertes.fr/some-external-id + some awesome author """ self.atom_entry_data1 = b""" - some awesome author - + another one + no one """ self.atom_entry_data2 = b""" diff --git a/swh/deposit/tests/test_utils.py b/swh/deposit/tests/test_utils.py new file mode 100644 --- /dev/null +++ b/swh/deposit/tests/test_utils.py @@ -0,0 +1,138 @@ +# Copyright (C) 2018 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import unittest + +from nose.tools import istest + +from swh.deposit import utils + + +class UtilsTestCase(unittest.TestCase): + """Utils library + + """ + @istest + def merge(self): + """Calling utils.merge on dicts should merge without losing information + + """ + d0 = { + 'author': 'someone', + 'license': [['gpl2']], + 'a': 1 + } + + d1 = { + 'author': ['author0', {'name': 'author1'}], + 'license': [['gpl3']], + 'b': { + '1': '2' + } + } + + d2 = { + 'author': map(lambda x: x, ['else']), + 'license': 'mit', + 'b': { + '2': '3', + } + } + + d3 = { + 'author': (v for v in ['no one']), + } + + actual_merge = utils.merge(d0, d1, d2, d3) + + expected_merge = { + 'a': 1, + 'license': [['gpl2'], ['gpl3'], 'mit'], + 'author': [ + 'someone', 'author0', {'name': 'author1'}, 'else', 'no one'], + 'b': { + '1': '2', + '2': '3', + } + } + self.assertEquals(actual_merge, expected_merge) + + @istest + def merge_2(self): + d0 = { + 'license': 'gpl2', + 'runtime': { + 'os': 'unix derivative' + } + } + + d1 = { + 'license': 'gpl3', + 'runtime': 'GNU/Linux' + } + + expected = { + 'license': ['gpl2', 'gpl3'], + 'runtime': [ + { + 'os': 'unix derivative' + }, + 'GNU/Linux' + ], + } + + actual = utils.merge(d0, d1) + self.assertEqual(actual, expected) + + @istest + def merge_edge_cases(self): + input_dict = { + 'license': ['gpl2', 'gpl3'], + 'runtime': [ + { + 'os': 'unix derivative' + }, + 'GNU/Linux' + ], + } + # against empty dict + actual = utils.merge(input_dict, {}) + self.assertEqual(actual, input_dict) + + # against oneself + actual = utils.merge(input_dict, input_dict, input_dict) + self.assertEqual(input_dict, input_dict) + + @istest + def merge_one_dict(self): + """Merge one dict should result in the same dict value + + """ + input_and_expected = {'anything': 'really'} + actual = utils.merge(input_and_expected) + self.assertEqual(actual, input_and_expected) + + @istest + def merge_raise(self): + """Calling utils.merge with any no dict argument should raise + + """ + d0 = { + 'author': 'someone', + 'a': 1 + } + + d1 = ['not a dict'] + + with self.assertRaises(ValueError): + utils.merge(d0, d1) + + with self.assertRaises(ValueError): + utils.merge(d1, d0) + + with self.assertRaises(ValueError): + utils.merge(d1) + + self.assertEquals(utils.merge(d0), d0) diff --git a/swh/deposit/utils.py b/swh/deposit/utils.py new file mode 100644 --- /dev/null +++ b/swh/deposit/utils.py @@ -0,0 +1,55 @@ +# Copyright (C) 2018 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from types import GeneratorType + + +def merge(*dicts): + """Given an iterator of dicts, merge them losing no information. + + Args: + *dicts: arguments are all supposed to be dict to merge into one + + Returns: + dict merged without losing information + + """ + def _extend(existing_val, value): + """Given an existing value and a value (as potential lists), merge + them together without repetition. + + """ + if isinstance(value, (list, map, GeneratorType)): + vals = value + else: + vals = [value] + for v in vals: + if v in existing_val: + continue + existing_val.append(v) + return existing_val + + d = {} + for data in dicts: + if not isinstance(data, dict): + raise ValueError( + 'dicts is supposed to be a variable arguments of dict') + + for key, value in data.items(): + existing_val = d.get(key) + if not existing_val: + d[key] = value + continue + if isinstance(existing_val, (list, map, GeneratorType)): + new_val = _extend(existing_val, value) + elif isinstance(existing_val, dict): + if isinstance(value, dict): + new_val = merge(existing_val, value) + else: + new_val = _extend([existing_val], value) + else: + new_val = _extend([existing_val], value) + d[key] = new_val + return d