Page MenuHomeSoftware Heritage

D5041.id18070.diff
No OneTemporary

D5041.id18070.diff

diff --git a/swh/loader/git/loader.py b/swh/loader/git/loader.py
--- a/swh/loader/git/loader.py
+++ b/swh/loader/git/loader.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2016-2020 The Software Heritage developers
+# Copyright (C) 2016-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -14,11 +14,13 @@
from typing import Any, Callable, Dict, Iterable, List, Optional, Set, Tuple, Type
import dulwich.client
+from dulwich.errors import GitProtocolError, ObjectFormatException
from dulwich.object_store import ObjectStoreGraphWalker
from dulwich.pack import PackData, PackInflater
from swh.core.config import merge_configs
from swh.loader.core.loader import DVCSLoader
+from swh.loader.exception import NotFound
from swh.model import hashutil
from swh.model.model import (
BaseContent,
@@ -235,9 +237,18 @@
sys.stderr.buffer.write(msg)
sys.stderr.flush()
- fetch_info = self.fetch_pack_from_origin(
- self.origin.url, self.base_snapshot, do_progress
- )
+ try:
+ fetch_info = self.fetch_pack_from_origin(
+ self.origin.url, self.base_snapshot, do_progress
+ )
+ except GitProtocolError as e:
+ # Failure to communicate during the initialization in various forms (e.g
+ # repository not found, 401, ...)
+ raise NotFound(e)
+ except (IOError, ObjectFormatException) as e:
+ # Problem after the initialization, during the fetch pack communication (e.g
+ # pack file too big, ...)
+ raise ValueError(e)
self.pack_buffer = fetch_info.pack_buffer
self.pack_size = fetch_info.pack_size
diff --git a/swh/loader/git/tests/test_loader.py b/swh/loader/git/tests/test_loader.py
--- a/swh/loader/git/tests/test_loader.py
+++ b/swh/loader/git/tests/test_loader.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2018-2020 The Software Heritage developers
+# Copyright (C) 2018-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -6,15 +6,71 @@
import os
from unittest import TestCase
+from dulwich.errors import GitProtocolError, ObjectFormatException
import dulwich.repo
import pytest
from swh.loader.git.loader import GitLoader
from swh.loader.git.tests.test_from_disk import FullGitLoaderTests
-from swh.loader.tests import prepare_repository_from_archive
+from swh.loader.tests import assert_last_visit_matches, prepare_repository_from_archive
-class GitLoaderTest(TestCase, FullGitLoaderTests):
+class CommonGitLoaderNotFound:
+ @pytest.fixture(autouse=True)
+ def __inject_fixtures(self, mocker):
+ """Inject required fixtures in unittest.TestCase class
+
+ """
+ self.mocker = mocker
+
+ def test_load_visit_not_found(self):
+ """Ingesting an unknown url result in a visit with not_found status
+
+ """
+ # simulate an initial communication error (e.g no repository found, ...)
+ mock = self.mocker.patch(
+ "swh.loader.git.loader.GitLoader.fetch_pack_from_origin"
+ )
+ mock.side_effect = GitProtocolError
+
+ res = self.loader.load()
+ assert res == {"status": "uneventful"}
+
+ assert_last_visit_matches(
+ self.loader.storage,
+ self.repo_url,
+ status="not_found",
+ type="git",
+ snapshot=None,
+ )
+
+ def test_load_visit_failure(self):
+ """Failing during the fetch pack step result in failing visit
+
+ """
+ for failure_exception in [IOError, ObjectFormatException]:
+ with self.subTest(failure_exception=failure_exception):
+ # simulate a fetch communication error after the initial connection
+ # server error (e.g IOError, ObjectFormatException, ...)
+ mock = self.mocker.patch(
+ "swh.loader.git.loader.GitLoader.fetch_pack_from_origin"
+ )
+
+ mock.side_effect = failure_exception
+
+ res = self.loader.load()
+ assert res == {"status": "failed"}
+
+ assert_last_visit_matches(
+ self.loader.storage,
+ self.repo_url,
+ status="failed",
+ type="git",
+ snapshot=None,
+ )
+
+
+class GitLoaderTest(TestCase, FullGitLoaderTests, CommonGitLoaderNotFound):
"""Prepare a git directory repository to be loaded through a GitLoader.
This tests all git loader scenario.
@@ -34,7 +90,7 @@
self.repo = dulwich.repo.Repo(self.destination_path)
-class GitLoader2Test(TestCase, FullGitLoaderTests):
+class GitLoader2Test(TestCase, FullGitLoaderTests, CommonGitLoaderNotFound):
"""Mostly the same loading scenario but with a base-url different than the repo-url.
To walk slightly different paths, the end result should stay the same.

File Metadata

Mime Type
text/plain
Expires
Thu, Jul 3, 3:38 PM (2 w, 1 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3227943

Event Timeline