diff --git a/swh/deposit/api/private/__init__.py b/swh/deposit/api/private/__init__.py
--- a/swh/deposit/api/private/__init__.py
+++ b/swh/deposit/api/private/__init__.py
@@ -3,6 +3,7 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+from swh.deposit import utils
from ...config import METADATA_TYPE
from ...models import DepositRequest, Deposit
@@ -45,7 +46,6 @@
metadata dict from the deposit.
"""
- metadata = {}
- for dr in self._deposit_requests(deposit, request_type=METADATA_TYPE):
- metadata.update(dr.metadata)
- return metadata
+ metadata = (m.metadata for m in self._deposit_requests(
+ deposit, request_type=METADATA_TYPE))
+ return utils.merge(*metadata)
diff --git a/swh/deposit/api/private/deposit_read.py b/swh/deposit/api/private/deposit_read.py
--- a/swh/deposit/api/private/deposit_read.py
+++ b/swh/deposit/api/private/deposit_read.py
@@ -82,18 +82,6 @@
if not os.path.exists(self.extraction_dir):
os.makedirs(self.extraction_dir)
- def retrieve_archives(self, deposit_id):
- """Given a deposit identifier, returns its associated archives' path.
-
- Yields:
- path to deposited archives
-
- """
- deposit_requests = self._deposit_requests(
- deposit_id, request_type=ARCHIVE_TYPE)
- for deposit_request in deposit_requests:
- yield deposit_request.archive.path
-
def process_get(self, req, collection_name, deposit_id):
"""Build a unique tarball from the multiple received and stream that
content to the client.
@@ -107,9 +95,9 @@
Tuple status, stream of content, content-type
"""
- archive_paths = list(self.retrieve_archives(deposit_id))
- with aggregate_tarballs(self.extraction_dir,
- archive_paths) as path:
+ archive_paths = [r.archive.path for r in self._deposit_requests(
+ deposit_id, request_type=ARCHIVE_TYPE)]
+ with aggregate_tarballs(self.extraction_dir, archive_paths) as path:
return FileResponse(open(path, 'rb'),
status=status.HTTP_200_OK,
content_type='application/octet-stream')
diff --git a/swh/deposit/tests/api/test_deposit_read_metadata.py b/swh/deposit/tests/api/test_deposit_read_metadata.py
--- a/swh/deposit/tests/api/test_deposit_read_metadata.py
+++ b/swh/deposit/tests/api/test_deposit_read_metadata.py
@@ -51,8 +51,8 @@
},
'origin_metadata': {
'metadata': {
- '@xmlns': 'http://www.w3.org/2005/Atom',
- 'author': 'some awesome author',
+ '@xmlns': ['http://www.w3.org/2005/Atom'],
+ 'author': ['some awesome author', 'another one', 'no one'],
'external_identifier': 'some-external-id',
'url': 'https://hal-test.archives-ouvertes.fr/' +
'some-external-id'
@@ -79,8 +79,8 @@
'committer': SWH_PERSON,
'date': None,
'metadata': {
- '@xmlns': 'http://www.w3.org/2005/Atom',
- 'author': 'some awesome author',
+ '@xmlns': ['http://www.w3.org/2005/Atom'],
+ 'author': ['some awesome author', 'another one', 'no one'],
'external_identifier': 'some-external-id',
'url': 'https://hal-test.archives-ouvertes.fr/' +
'some-external-id'
@@ -137,8 +137,8 @@
},
'origin_metadata': {
'metadata': {
- '@xmlns': 'http://www.w3.org/2005/Atom',
- 'author': 'some awesome author',
+ '@xmlns': ['http://www.w3.org/2005/Atom'],
+ 'author': ['some awesome author', 'another one', 'no one'],
'external_identifier': 'some-external-id',
'url': 'https://hal-test.archives-ouvertes.fr/' +
'some-external-id'
@@ -166,8 +166,8 @@
'type': 'tar',
'message': 'hal: Deposit %s in collection hal' % deposit_id,
'metadata': {
- '@xmlns': 'http://www.w3.org/2005/Atom',
- 'author': 'some awesome author',
+ '@xmlns': ['http://www.w3.org/2005/Atom'],
+ 'author': ['some awesome author', 'another one', 'no one'],
'external_identifier': 'some-external-id',
'url': 'https://hal-test.archives-ouvertes.fr/' +
'some-external-id'
@@ -177,7 +177,7 @@
'branch_name': 'master',
}
- self.assertEquals(data, expected_meta)
+ self.assertEqual(data, expected_meta)
@istest
def access_to_nonexisting_deposit_returns_404_response(self):
diff --git a/swh/deposit/tests/common.py b/swh/deposit/tests/common.py
--- a/swh/deposit/tests/common.py
+++ b/swh/deposit/tests/common.py
@@ -348,12 +348,13 @@
some-external-id
https://hal-test.archives-ouvertes.fr/some-external-id
+ some awesome author
"""
self.atom_entry_data1 = b"""
- some awesome author
-
+ another one
+ no one
"""
self.atom_entry_data2 = b"""
diff --git a/swh/deposit/tests/test_utils.py b/swh/deposit/tests/test_utils.py
new file mode 100644
--- /dev/null
+++ b/swh/deposit/tests/test_utils.py
@@ -0,0 +1,115 @@
+# Copyright (C) 2018 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import unittest
+
+from nose.tools import istest
+
+from swh.deposit import utils
+
+
+class UtilsTestCase(unittest.TestCase):
+ """Utils library
+
+ """
+ @istest
+ def merge(self):
+ """Calling utils.merge on dicts should merge without losing information
+
+ """
+ d0 = {
+ 'author': 'someone',
+ 'a': 1
+ }
+
+ d1 = {
+ 'author': ['author0', {'name': 'author1'}],
+ 'b': {
+ '1': '2'
+ }
+ }
+
+ d2 = {
+ 'author': map(lambda x: x, ['else']),
+ 'b': {
+ '2': '3',
+ }
+ }
+
+ d3 = {
+ 'author': (v for v in ['no one']),
+ }
+
+ actual_merge = utils.merge(d0, d1, d2, d3)
+
+ expected_merge = {
+ 'a': 1,
+ 'author': [
+ 'someone', 'author0', {'name': 'author1'}, 'else', 'no one'],
+ 'b': {
+ '1': '2',
+ '2': '3',
+ }
+ }
+ self.assertEquals(actual_merge, expected_merge)
+
+ @istest
+ def merge_2(self):
+ d0 = {
+ 'license': 'gpl2',
+ 'runtime': {
+ 'os': 'unix derivative'
+ }
+ }
+
+ d1 = {
+ 'license': 'gpl3',
+ 'runtime': 'GNU/Linux'
+ }
+
+ expected = {
+ 'license': ['gpl2', 'gpl3'],
+ 'runtime': [
+ {
+ 'os': 'unix derivative'
+ },
+ 'GNU/Linux'
+ ],
+ }
+
+ actual = utils.merge(d0, d1)
+ self.assertEqual(actual, expected)
+
+ @istest
+ def merge_one_dict(self):
+ """Merge one dict should result in the same dict value
+
+ """
+ input_and_expected = {'anything': 'really'}
+ actual = utils.merge(input_and_expected)
+ self.assertEqual(actual, input_and_expected)
+
+ @istest
+ def merge_raise(self):
+ """Calling utils.merge with any no dict argument should raise
+
+ """
+ d0 = {
+ 'author': 'someone',
+ 'a': 1
+ }
+
+ d1 = ['not a dict']
+
+ with self.assertRaises(ValueError):
+ utils.merge(d0, d1)
+
+ with self.assertRaises(ValueError):
+ utils.merge(d1, d0)
+
+ with self.assertRaises(ValueError):
+ utils.merge(d1)
+
+ self.assertEquals(utils.merge(d0), d0)
diff --git a/swh/deposit/utils.py b/swh/deposit/utils.py
new file mode 100644
--- /dev/null
+++ b/swh/deposit/utils.py
@@ -0,0 +1,55 @@
+# Copyright (C) 2018 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from types import GeneratorType
+
+
+def merge(*dicts):
+ """Given an iterator of dicts, merge them losing no information.
+
+ Args:
+ *dicts: arguments are all supposed to be dict to merge into one
+
+ Returns:
+ dict merged without losing information
+
+ """
+ def _extend(existing_val, value):
+ """Given an existing value and a value (as potential lists), merge
+ them together without repetition.
+
+ """
+ if isinstance(value, (list, map, GeneratorType)):
+ vals = value
+ else:
+ vals = [value]
+ for v in vals:
+ if v in existing_val:
+ continue
+ existing_val.append(v)
+ return existing_val
+
+ d = {}
+ for data in dicts:
+ if not isinstance(data, dict):
+ raise ValueError(
+ 'dicts is supposed to be a variable arguments of dict')
+
+ for key, value in data.items():
+ existing_val = d.get(key)
+ if not existing_val:
+ d[key] = value
+ continue
+ if isinstance(existing_val, (list, map, GeneratorType)):
+ new_val = _extend(existing_val, value)
+ elif isinstance(existing_val, dict):
+ if isinstance(value, dict):
+ new_val = merge(existing_val, value)
+ else:
+ new_val = _extend([existing_val], value)
+ else:
+ new_val = _extend([existing_val], value)
+ d[key] = new_val
+ return d