diff --git a/swh/storage/api/client.py b/swh/storage/api/client.py --- a/swh/storage/api/client.py +++ b/swh/storage/api/client.py @@ -3,7 +3,10 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +from typing import Any, Dict, Iterable, Union + from swh.core.api import RPCClient, RemoteException +from swh.model.model import Content from .. import HashCollision from ..exc import StorageAPIError, StorageArgumentException @@ -35,6 +38,11 @@ else: raise + def content_add(self, content: Iterable[Union[Content, Dict[str, Any]]]): + content = [c.with_data() if isinstance(c, Content) else c + for c in content] + return self.post('content/add', {'content': content}) + def reset(self): return self.post('reset', {}) diff --git a/swh/storage/objstorage.py b/swh/storage/objstorage.py --- a/swh/storage/objstorage.py +++ b/swh/storage/objstorage.py @@ -5,7 +5,7 @@ from typing import Dict, Generator, Iterable -from swh.model.model import Content +from swh.model.model import Content, MissingData from swh.objstorage import get_objstorage from swh.objstorage.exc import ObjNotFoundError @@ -52,9 +52,10 @@ objstorage. """ - contents = list(contents) - if any(cont.data is None for cont in contents): - raise StorageArgumentException('Missing data') + try: + contents = [c.with_data() for c in contents] + except MissingData: + raise StorageArgumentException('Missing data') from None summary = self.objstorage.add_batch({ cont.sha1: cont.data for cont in contents diff --git a/swh/storage/tests/test_api_client_dicts.py b/swh/storage/tests/test_api_client_dicts.py --- a/swh/storage/tests/test_api_client_dicts.py +++ b/swh/storage/tests/test_api_client_dicts.py @@ -67,3 +67,7 @@ swh_storage.journal_writer.journal = None with patch.object(server.storage.journal_writer, 'journal', None): super().test_content_update(swh_storage) + + @pytest.mark.skip('non-applicable test') + def test_content_add_from_lazy_content(self): + pass diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py --- a/swh/storage/tests/test_storage.py +++ b/swh/storage/tests/test_storage.py @@ -26,7 +26,7 @@ from swh.model import from_disk, identifiers from swh.model.hashutil import hash_to_bytes -from swh.model.model import Release, Revision +from swh.model.model import Content, Release, Revision from swh.model.hypothesis_strategies import objects from swh.storage import HashCollision, get_storage from swh.storage.converters import origin_url_to_sha1 as sha1 @@ -175,6 +175,53 @@ swh_storage.refresh_stat_counters() assert swh_storage.stat_counters()['content'] == 1 + def test_content_add_from_lazy_content(self, swh_storage): + called = False + cont = data.cont + + class LazyContent(Content): + def with_data(self): + nonlocal called + called = True + return Content.from_dict({ + **self.to_dict(), + 'data': cont['data'] + }) + + lazy_content = LazyContent.from_dict({ + **cont, + 'data': b'nope', + }) + + insertion_start_time = datetime.datetime.now(tz=datetime.timezone.utc) + + # bypass the validation proxy for now, to directly put a dict + actual_result = swh_storage.storage.content_add([lazy_content]) + + insertion_end_time = datetime.datetime.now(tz=datetime.timezone.utc) + + assert actual_result == { + 'content:add': 1, + 'content:add:bytes': cont['length'], + } + + assert called + + assert list(swh_storage.content_get([cont['sha1']])) == \ + [{'sha1': cont['sha1'], 'data': cont['data']}] + + expected_cont = data.cont + del expected_cont['data'] + journal_objects = list(swh_storage.journal_writer.journal.objects) + for (obj_type, obj) in journal_objects: + assert insertion_start_time <= obj['ctime'] + assert obj['ctime'] <= insertion_end_time + del obj['ctime'] + assert journal_objects == [('content', expected_cont)] + + swh_storage.refresh_stat_counters() + assert swh_storage.stat_counters()['content'] == 1 + def test_content_add_validation(self, swh_storage): cont = data.cont