diff --git a/swh/vault/cookers/git_bare.py b/swh/vault/cookers/git_bare.py --- a/swh/vault/cookers/git_bare.py +++ b/swh/vault/cookers/git_bare.py @@ -21,6 +21,7 @@ import enum import glob import logging +import multiprocessing.dummy import os.path import re import subprocess @@ -51,9 +52,11 @@ from swh.vault.cookers.base import BaseVaultCooker from swh.vault.to_disk import HIDDEN_MESSAGE, SKIPPED_MESSAGE +THREAD_POOL_SIZE = 10 + RELEASE_BATCH_SIZE = 10000 REVISION_BATCH_SIZE = 10000 -DIRECTORY_BATCH_SIZE = 10000 +DIRECTORY_BATCH_SIZE = 10000 # should be at least ~10x larger than THREAD_POOL_SIZE CONTENT_BATCH_SIZE = 100 @@ -524,8 +527,12 @@ return self.write_object(release["id"], git_object) def load_directories(self, obj_ids: List[Sha1Git]) -> None: - for obj_id in obj_ids: - self.load_directory(obj_id) + if not obj_ids: + return + + with multiprocessing.dummy.Pool(min(THREAD_POOL_SIZE, len(obj_ids))) as p: + for _ in p.imap_unordered(self.load_directory, obj_ids): + pass def load_directory(self, obj_id: Sha1Git) -> None: # Load the directory