diff --git a/swh/objstorage/multiplexer/multiplexer_objstorage.py b/swh/objstorage/multiplexer/multiplexer_objstorage.py --- a/swh/objstorage/multiplexer/multiplexer_objstorage.py +++ b/swh/objstorage/multiplexer/multiplexer_objstorage.py @@ -245,10 +245,21 @@ """ write_threads = list(self.get_write_threads()) - return sum(self.wrap_call( + results = self.wrap_call( write_threads, 'add_batch', contents, check_presence=check_presence, - )) // len(write_threads) + ) + object_add = 0 + object_add_bytes = 0 + + for result in results: + object_add += result['object:add'] + object_add_bytes += result['object:add:bytes'] + + return { + 'object:add': object_add // len(result), + 'object:add:bytes': object_add_bytes // len(result), + } def restore(self, content, obj_id=None): return self.wrap_call( diff --git a/swh/objstorage/multiplexer/striping_objstorage.py b/swh/objstorage/multiplexer/striping_objstorage.py --- a/swh/objstorage/multiplexer/striping_objstorage.py +++ b/swh/objstorage/multiplexer/striping_objstorage.py @@ -64,8 +64,15 @@ check_presence=check_presence, mailbox=mailbox, ) - return sum( - ObjStorageThread.collect_results( - mailbox, len(content_by_storage_index) - ) + + results = ObjStorageThread.collect_results( + mailbox, len(content_by_storage_index) ) + summary = { + 'object:add': 0, + 'object:add:bytes': 0, + } + for result in results: + summary['object:add'] += result['object:add'] + summary['object:add:bytes'] += result['object:add:bytes'] + return summary diff --git a/swh/objstorage/objstorage.py b/swh/objstorage/objstorage.py --- a/swh/objstorage/objstorage.py +++ b/swh/objstorage/objstorage.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2018 The Software Heritage developers +# Copyright (C) 2015-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -11,6 +11,8 @@ from swh.model import hashutil +from typing import Dict + from .exc import ObjNotFoundError @@ -149,19 +151,31 @@ """ pass - def add_batch(self, contents, check_presence=True): + def add_batch(self, contents, check_presence=True) -> Dict: """Add a batch of new objects to the object storage. Args: - contents (dict): mapping from obj_id to object contents + contents: mapping from obj_id to object contents + Returns: - the number of objects added to the storage + the summary of objects added to the storage (count of object, + count of bytes object) + """ - ctr = 0 + object_add = 0 + object_add_bytes = 0 + for obj_id, content in contents.items(): - self.add(content, obj_id, check_presence=check_presence) - ctr += 1 - return ctr + if check_presence and obj_id in self: + continue + self.add(content, obj_id, check_presence=False) + object_add += 1 + object_add_bytes += len(content) + + return { + 'object:add': object_add, + 'object:add:bytes': object_add_bytes, + } def restore(self, content, obj_id=None, *args, **kwargs): """Restore a content that have been corrupted. diff --git a/swh/objstorage/tests/objstorage_testing.py b/swh/objstorage/tests/objstorage_testing.py --- a/swh/objstorage/tests/objstorage_testing.py +++ b/swh/objstorage/tests/objstorage_testing.py @@ -160,13 +160,21 @@ def test_add_batch(self): contents = {} + expected_content_add = 0 + expected_content_add_bytes = 0 for i in range(50): content = b'Test content %02d' % i content, obj_id = self.hash_content(content) contents[obj_id] = content + expected_content_add_bytes += len(content) + expected_content_add += 1 ret = self.storage.add_batch(contents) - self.assertEqual(len(contents), ret) + + self.assertEqual(ret, { + 'object:add': expected_content_add, + 'object:add:bytes': expected_content_add_bytes, + }) for obj_id in contents: self.assertIn(obj_id, self.storage) diff --git a/tox.ini b/tox.ini --- a/tox.ini +++ b/tox.ini @@ -6,6 +6,7 @@ testing deps = pytest-cov + dev: pdbpp commands = pytest --cov={envsitepackagesdir}/swh/objstorage \ {envsitepackagesdir}/swh/objstorage \