Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9345975
D5041.id18070.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
5 KB
Subscribers
None
D5041.id18070.diff
View Options
diff --git a/swh/loader/git/loader.py b/swh/loader/git/loader.py
--- a/swh/loader/git/loader.py
+++ b/swh/loader/git/loader.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2016-2020 The Software Heritage developers
+# Copyright (C) 2016-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -14,11 +14,13 @@
from typing import Any, Callable, Dict, Iterable, List, Optional, Set, Tuple, Type
import dulwich.client
+from dulwich.errors import GitProtocolError, ObjectFormatException
from dulwich.object_store import ObjectStoreGraphWalker
from dulwich.pack import PackData, PackInflater
from swh.core.config import merge_configs
from swh.loader.core.loader import DVCSLoader
+from swh.loader.exception import NotFound
from swh.model import hashutil
from swh.model.model import (
BaseContent,
@@ -235,9 +237,18 @@
sys.stderr.buffer.write(msg)
sys.stderr.flush()
- fetch_info = self.fetch_pack_from_origin(
- self.origin.url, self.base_snapshot, do_progress
- )
+ try:
+ fetch_info = self.fetch_pack_from_origin(
+ self.origin.url, self.base_snapshot, do_progress
+ )
+ except GitProtocolError as e:
+ # Failure to communicate during the initialization in various forms (e.g
+ # repository not found, 401, ...)
+ raise NotFound(e)
+ except (IOError, ObjectFormatException) as e:
+ # Problem after the initialization, during the fetch pack communication (e.g
+ # pack file too big, ...)
+ raise ValueError(e)
self.pack_buffer = fetch_info.pack_buffer
self.pack_size = fetch_info.pack_size
diff --git a/swh/loader/git/tests/test_loader.py b/swh/loader/git/tests/test_loader.py
--- a/swh/loader/git/tests/test_loader.py
+++ b/swh/loader/git/tests/test_loader.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2018-2020 The Software Heritage developers
+# Copyright (C) 2018-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -6,15 +6,71 @@
import os
from unittest import TestCase
+from dulwich.errors import GitProtocolError, ObjectFormatException
import dulwich.repo
import pytest
from swh.loader.git.loader import GitLoader
from swh.loader.git.tests.test_from_disk import FullGitLoaderTests
-from swh.loader.tests import prepare_repository_from_archive
+from swh.loader.tests import assert_last_visit_matches, prepare_repository_from_archive
-class GitLoaderTest(TestCase, FullGitLoaderTests):
+class CommonGitLoaderNotFound:
+ @pytest.fixture(autouse=True)
+ def __inject_fixtures(self, mocker):
+ """Inject required fixtures in unittest.TestCase class
+
+ """
+ self.mocker = mocker
+
+ def test_load_visit_not_found(self):
+ """Ingesting an unknown url result in a visit with not_found status
+
+ """
+ # simulate an initial communication error (e.g no repository found, ...)
+ mock = self.mocker.patch(
+ "swh.loader.git.loader.GitLoader.fetch_pack_from_origin"
+ )
+ mock.side_effect = GitProtocolError
+
+ res = self.loader.load()
+ assert res == {"status": "uneventful"}
+
+ assert_last_visit_matches(
+ self.loader.storage,
+ self.repo_url,
+ status="not_found",
+ type="git",
+ snapshot=None,
+ )
+
+ def test_load_visit_failure(self):
+ """Failing during the fetch pack step result in failing visit
+
+ """
+ for failure_exception in [IOError, ObjectFormatException]:
+ with self.subTest(failure_exception=failure_exception):
+ # simulate a fetch communication error after the initial connection
+ # server error (e.g IOError, ObjectFormatException, ...)
+ mock = self.mocker.patch(
+ "swh.loader.git.loader.GitLoader.fetch_pack_from_origin"
+ )
+
+ mock.side_effect = failure_exception
+
+ res = self.loader.load()
+ assert res == {"status": "failed"}
+
+ assert_last_visit_matches(
+ self.loader.storage,
+ self.repo_url,
+ status="failed",
+ type="git",
+ snapshot=None,
+ )
+
+
+class GitLoaderTest(TestCase, FullGitLoaderTests, CommonGitLoaderNotFound):
"""Prepare a git directory repository to be loaded through a GitLoader.
This tests all git loader scenario.
@@ -34,7 +90,7 @@
self.repo = dulwich.repo.Repo(self.destination_path)
-class GitLoader2Test(TestCase, FullGitLoaderTests):
+class GitLoader2Test(TestCase, FullGitLoaderTests, CommonGitLoaderNotFound):
"""Mostly the same loading scenario but with a base-url different than the repo-url.
To walk slightly different paths, the end result should stay the same.
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Jul 3, 3:38 PM (2 w, 1 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3227943
Attached To
D5041: loader.git: Mark visit status as not_found when relevant
Event Timeline
Log In to Comment