diff --git a/swh/storage/vault/cache.py b/swh/storage/vault/cache.py --- a/swh/storage/vault/cache.py +++ b/swh/storage/vault/cache.py @@ -31,6 +31,14 @@ storage = self._get_storage(obj_type) return storage.get(hashutil.hash_to_bytes(obj_id)) + def add_stream(self, obj_type, obj_id, content_iter): + storage = self._get_storage(obj_type) + return storage.add_stream(content_iter, obj_id) + + def get_stream(self, obj_type, obj_id): + storage = self._get_storage(obj_type) + return storage.get_stream(hashutil.hash_to_bytes(obj_id)) + def is_cached(self, obj_type, obj_id): storage = self._get_storage(obj_type) return hashutil.hash_to_bytes(obj_id) in storage diff --git a/swh/storage/vault/cookers/base.py b/swh/storage/vault/cookers/base.py --- a/swh/storage/vault/cookers/base.py +++ b/swh/storage/vault/cookers/base.py @@ -65,7 +65,7 @@ @abc.abstractmethod def prepare_bundle(self): - """Implementation of the cooker. Returns the bundle bytes. + """Implementation of the cooker. Yields chunks of the bundle bytes. Override this with the cooker implementation. """ @@ -74,20 +74,20 @@ def cook(self): """Cook the requested object into a bundle """ - bundle_content = self.prepare_bundle() + content_iter = self.prepare_bundle() # Cache the bundle - self.update_cache(bundle_content) + self.update_cache(content_iter) # Make a notification that the bundle have been cooked # NOT YET IMPLEMENTED see TODO in function. self.notify_bundle_ready( notif_data='Bundle %s ready' % hashutil.hash_to_hex(self.obj_id)) - def update_cache(self, bundle_content): + def update_cache(self, content_iter): """Update the cache with id and bundle_content. """ - self.cache.add(self.CACHE_TYPE_KEY, self.obj_id, bundle_content) + self.cache.add_stream(self.CACHE_TYPE_KEY, self.obj_id, content_iter) def notify_bundle_ready(self, notif_data): # TODO plug this method with the notification method once diff --git a/swh/storage/vault/cookers/directory.py b/swh/storage/vault/cookers/directory.py --- a/swh/storage/vault/cookers/directory.py +++ b/swh/storage/vault/cookers/directory.py @@ -21,4 +21,4 @@ """ directory_builder = DirectoryBuilder(self.storage) - return directory_builder.get_directory_bytes(self.obj_id) + yield directory_builder.get_directory_bytes(self.obj_id) diff --git a/swh/storage/vault/cookers/revision_flat.py b/swh/storage/vault/cookers/revision_flat.py --- a/swh/storage/vault/cookers/revision_flat.py +++ b/swh/storage/vault/cookers/revision_flat.py @@ -30,4 +30,5 @@ revdir.mkdir() directory_builder.build_directory(revision['directory'], str(revdir).encode()) - return get_tar_bytes(root_tmp, hashutil.hash_to_hex(self.obj_id)) + # FIXME: stream the bytes! this tarball can be HUUUUUGE + yield get_tar_bytes(root_tmp, hashutil.hash_to_hex(self.obj_id)) diff --git a/swh/storage/vault/cookers/revision_git.py b/swh/storage/vault/cookers/revision_git.py --- a/swh/storage/vault/cookers/revision_git.py +++ b/swh/storage/vault/cookers/revision_git.py @@ -17,9 +17,11 @@ CACHE_TYPE_KEY = 'revision_git' def prepare_bundle(self): - commands = self.fastexport(self.storage.revision_log([self.obj_id])) - bundle_content = b'\n'.join(bytes(command) for command in commands) - return bundle_content + log = self.storage.revision_log([self.obj_id]) + commands = self.fastexport(log) + + for command in commands: + yield bytes(command) def fastexport(self, log): """Generate all the git fast-import commands from a given log.