diff --git a/swh/loader/mercurial/loader.py b/swh/loader/mercurial/loader.py --- a/swh/loader/mercurial/loader.py +++ b/swh/loader/mercurial/loader.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2020 The Software Heritage developers +# Copyright (C) 2017-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -34,6 +34,7 @@ from swh.core.config import merge_configs from swh.loader.core.loader import DVCSLoader from swh.loader.core.utils import clean_dangling_folders +from swh.loader.exception import NotFound from swh.model import identifiers from swh.model.hashutil import ( DEFAULT_ALGORITHMS, @@ -236,35 +237,39 @@ directory = self.directory - if not directory: # remote repository - self.working_directory = mkdtemp( - prefix=TEMPORARY_DIR_PREFIX_PATTERN, - suffix="-%s" % os.getpid(), - dir=self.temp_directory, - ) - os.makedirs(self.working_directory, exist_ok=True) - self.hgdir = self.working_directory + try: + if not directory: # remote repository + self.working_directory = mkdtemp( + prefix=TEMPORARY_DIR_PREFIX_PATTERN, + suffix="-%s" % os.getpid(), + dir=self.temp_directory, + ) + os.makedirs(self.working_directory, exist_ok=True) + self.hgdir = self.working_directory + + self.log.debug( + "Cloning %s to %s with timeout %s seconds", + self.origin_url, + self.hgdir, + self.clone_timeout, + ) - self.log.debug( - "Cloning %s to %s with timeout %s seconds", - self.origin_url, - self.hgdir, - self.clone_timeout, - ) + self.clone_with_timeout( + self.log, self.origin_url, self.hgdir, self.clone_timeout + ) - self.clone_with_timeout( - self.log, self.origin_url, self.hgdir, self.clone_timeout - ) + else: # local repository + self.working_directory = None + self.hgdir = directory - else: # local repository - self.working_directory = None - self.hgdir = directory + self.bundle_path = os.path.join(self.hgdir, self.bundle_filename) + self.log.debug("Bundling at %s" % self.bundle_path) - self.bundle_path = os.path.join(self.hgdir, self.bundle_filename) - self.log.debug("Bundling at %s" % self.bundle_path) - with hglib.open(self.hgdir) as repo: - self.heads = self.get_heads(repo) - repo.bundle(bytes(self.bundle_path, "utf-8"), all=True, type=b"none-v2") + with hglib.open(self.hgdir) as repo: + self.heads = self.get_heads(repo) + repo.bundle(bytes(self.bundle_path, "utf-8"), all=True, type=b"none-v2") + except Exception as e: + raise NotFound(e) self.cache_filename1 = os.path.join( self.hgdir, "swh-cache-1-%s" % (hex(random.randint(0, 0xFFFFFF))[2:],) diff --git a/swh/loader/mercurial/tests/test_loader.py b/swh/loader/mercurial/tests/test_loader.py --- a/swh/loader/mercurial/tests/test_loader.py +++ b/swh/loader/mercurial/tests/test_loader.py @@ -231,6 +231,24 @@ ) +def test_visit_error_with_status_not_found(swh_config, datadir, tmp_path, mocker): + """Not reaching the repo leads to a 'not_found' ingestion status""" + mock = mocker.patch("hglib.clone") + mock.side_effect = hglib.error.ResponseError() + + archive_name = "the-sandbox" + archive_path = os.path.join(datadir, f"{archive_name}.tgz") + repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) + + loader = HgBundle20Loader(repo_url) + + assert loader.load() == {"status": "uneventful"} + + assert_last_visit_matches( + loader.storage, repo_url, status="not_found", type="hg", snapshot=None, + ) + + def test_visit_repository_with_transplant_operations(swh_config, datadir, tmp_path): """Visit a mercurial repository visit transplant operations within should yield a snapshot as well.