Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/bzr/loader.py
# Copyright (C) 2021-2022 The Software Heritage developers | # Copyright (C) 2021-2022 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
"""This document contains a SWH loader for ingesting repository data | """This document contains a SWH loader for ingesting repository data | ||||
from Bazaar or Breezy. | from Bazaar or Breezy. | ||||
""" | """ | ||||
from datetime import datetime | from datetime import datetime | ||||
from functools import lru_cache, partial | from functools import lru_cache, partial | ||||
import os | import os | ||||
from tempfile import mkdtemp | from tempfile import mkdtemp | ||||
from typing import Dict, Iterator, List, NewType, Optional, Set, TypeVar, Union | from typing import Dict, Iterator, List, NewType, Optional, Set, TypeVar, Union | ||||
from breezy import errors as bzr_errors | from breezy import errors as bzr_errors | ||||
from breezy import repository, tsort | from breezy import repository, tsort | ||||
from breezy.builtins import cmd_clone | from breezy.builtins import cmd_branch | ||||
from breezy.bzr import bzrdir | from breezy.bzr import bzrdir | ||||
from breezy.bzr.branch import Branch as BzrBranch | from breezy.bzr.branch import Branch as BzrBranch | ||||
from breezy.bzr.inventory import Inventory, InventoryEntry | from breezy.bzr.inventory import Inventory, InventoryEntry | ||||
from breezy.revision import NULL_REVISION | from breezy.revision import NULL_REVISION | ||||
from breezy.revision import Revision as BzrRevision | from breezy.revision import Revision as BzrRevision | ||||
from swh.loader.core.loader import BaseLoader | from swh.loader.core.loader import BaseLoader | ||||
from swh.loader.core.utils import clean_dangling_folders, clone_with_timeout | from swh.loader.core.utils import clean_dangling_folders, clone_with_timeout | ||||
▲ Show 20 Lines • Show All 232 Lines • ▼ Show 20 Lines | def fetch_data(self) -> bool: | ||||
prefix=TEMPORARY_DIR_PREFIX_PATTERN, | prefix=TEMPORARY_DIR_PREFIX_PATTERN, | ||||
suffix=f"-{os.getpid()}", | suffix=f"-{os.getpid()}", | ||||
dir=self._temp_directory, | dir=self._temp_directory, | ||||
) | ) | ||||
msg = "Cloning '%s' to '%s' with timeout %s seconds" | msg = "Cloning '%s' to '%s' with timeout %s seconds" | ||||
self.log.debug( | self.log.debug( | ||||
msg, self.origin_url, self._repo_directory, self._clone_timeout | msg, self.origin_url, self._repo_directory, self._clone_timeout | ||||
) | ) | ||||
closure = partial(cmd_clone().run, self.origin_url, self._repo_directory) | closure = partial( | ||||
cmd_branch().run, | |||||
self.origin_url, | |||||
self._repo_directory, | |||||
no_tree=True, | |||||
use_existing_dir=True, | |||||
) | |||||
clone_with_timeout( | clone_with_timeout( | ||||
self.origin_url, self._repo_directory, closure, self._clone_timeout | self.origin_url, self._repo_directory, closure, self._clone_timeout | ||||
) | ) | ||||
else: # existing local repository | else: # existing local repository | ||||
# Allow to load on disk repository without cloning | # Allow to load on disk repository without cloning | ||||
# for testing purpose. | # for testing purpose. | ||||
self.log.debug("Using local directory '%s'", self.directory) | self.log.debug("Using local directory '%s'", self.directory) | ||||
self._repo_directory = self.directory | self._repo_directory = self.directory | ||||
▲ Show 20 Lines • Show All 335 Lines • Show Last 20 Lines |