diff --git a/swh/vault/cookers/git_bare.py b/swh/vault/cookers/git_bare.py --- a/swh/vault/cookers/git_bare.py +++ b/swh/vault/cookers/git_bare.py @@ -17,6 +17,7 @@ to avoid downloading and writing the same objects twice. """ +import asyncio import datetime import enum import glob @@ -30,7 +31,7 @@ from typing import Any, Dict, Iterable, Iterator, List, NoReturn, Optional, Set, Tuple import zlib -from swh.core.api.classes import stream_results_optional +from swh.core.api.classes import stream_results_optional, stream_results_optional_async from swh.model import git_objects from swh.model.hashutil import hash_to_bytehex, hash_to_hex from swh.model.model import ( @@ -535,15 +536,44 @@ pass def load_directory(self, obj_id: Sha1Git) -> None: - # Load the directory - entries_it: Optional[Iterable[DirectoryEntry]] = stream_results_optional( - self.storage.directory_get_entries, obj_id + entries_it = stream_results_optional(self.storage.directory_get_entries, obj_id) + + if entries_it is None: + logger.error("Missing swh:1:dir:%s, ignoring.", hash_to_hex(obj_id)) + return + + self.load_directory_entries(obj_id, entries_it) + + async def _load_directories_async( + self, async_storage_factory, obj_ids: List[Sha1Git] + ): + async_storage = async_storage_factory() + async with async_storage: + await asyncio.gather( + *( + self.load_directory_async(async_storage, obj_id) + for obj_id in obj_ids + ) + ) + + async def load_directory_async(self, async_storage, obj_id: Sha1Git) -> None: + entries_it = await stream_results_optional_async( + async_storage.directory_get_entries, obj_id ) if entries_it is None: logger.error("Missing swh:1:dir:%s, ignoring.", hash_to_hex(obj_id)) return + entries = [entry async for entry in entries_it] + + self.load_directory_entries(obj_id, entries) + + def load_directory_entries( + self, obj_id: Sha1Git, entries_it: Iterable[DirectoryEntry] + ) -> None: + # Load the directory + entries = [entry.to_dict() for entry in entries_it] directory = {"id": obj_id, "entries": entries} git_object = git_objects.directory_git_object(directory)