Page MenuHomeSoftware Heritage

D8845.diff
No OneTemporary

D8845.diff

diff --git a/swh/loader/core/loader.py b/swh/loader/core/loader.py
--- a/swh/loader/core/loader.py
+++ b/swh/loader/core/loader.py
@@ -10,7 +10,7 @@
from pathlib import Path
import tempfile
import time
-from typing import Any, ContextManager, Dict, Iterable, List, Optional, Union
+from typing import Any, ContextManager, Dict, List, Optional, Union
from urllib.parse import urlparse
from requests.exceptions import HTTPError
@@ -25,15 +25,12 @@
from swh.loader.package.utils import download
from swh.model import from_disk
from swh.model.model import (
- BaseContent,
Content,
Directory,
Origin,
OriginVisit,
OriginVisitStatus,
RawExtrinsicMetadata,
- Release,
- Revision,
Sha1Git,
SkippedContent,
Snapshot,
@@ -570,89 +567,6 @@
self.statsd.increment(f"{name}_count", tags=tags)
-class DVCSLoader(BaseLoader):
- """This base class is a pattern for dvcs loaders (e.g. git, mercurial).
-
- Those loaders are able to load all the data in one go. For example, the
- loader defined in swh-loader-git :class:`BulkUpdater`.
-
- For other loaders (stateful one, (e.g :class:`SWHSvnLoader`),
- inherit directly from :class:`BaseLoader`.
-
- """
-
- def cleanup(self) -> None:
- """Clean up an eventual state installed for computations."""
- pass
-
- def has_contents(self) -> bool:
- """Checks whether we need to load contents"""
- return True
-
- def get_contents(self) -> Iterable[BaseContent]:
- """Get the contents that need to be loaded"""
- raise NotImplementedError
-
- def has_directories(self) -> bool:
- """Checks whether we need to load directories"""
- return True
-
- def get_directories(self) -> Iterable[Directory]:
- """Get the directories that need to be loaded"""
- raise NotImplementedError
-
- def has_revisions(self) -> bool:
- """Checks whether we need to load revisions"""
- return True
-
- def get_revisions(self) -> Iterable[Revision]:
- """Get the revisions that need to be loaded"""
- raise NotImplementedError
-
- def has_releases(self) -> bool:
- """Checks whether we need to load releases"""
- return True
-
- def get_releases(self) -> Iterable[Release]:
- """Get the releases that need to be loaded"""
- raise NotImplementedError
-
- def get_snapshot(self) -> Snapshot:
- """Get the snapshot that needs to be loaded"""
- raise NotImplementedError
-
- def eventful(self) -> bool:
- """Whether the load was eventful"""
- raise NotImplementedError
-
- def store_data(self) -> None:
- assert self.origin
- if self.save_data_path:
- self.save_data()
-
- if self.has_contents():
- for obj in self.get_contents():
- if isinstance(obj, Content):
- self.storage.content_add([obj])
- elif isinstance(obj, SkippedContent):
- self.storage.skipped_content_add([obj])
- else:
- raise TypeError(f"Unexpected content type: {obj}")
- if self.has_directories():
- for directory in self.get_directories():
- self.storage.directory_add([directory])
- if self.has_revisions():
- for revision in self.get_revisions():
- self.storage.revision_add([revision])
- if self.has_releases():
- for release in self.get_releases():
- self.storage.release_add([release])
- snapshot = self.get_snapshot()
- self.storage.snapshot_add([snapshot])
- self.flush()
- self.loaded_snapshot_id = snapshot.id
-
-
class NodeLoader(BaseLoader):
"""Common class for :class:`ContentLoader` and :class:`Directoryloader`.
diff --git a/swh/loader/core/tests/test_loader.py b/swh/loader/core/tests/test_loader.py
--- a/swh/loader/core/tests/test_loader.py
+++ b/swh/loader/core/tests/test_loader.py
@@ -18,19 +18,16 @@
BaseLoader,
ContentLoader,
DirectoryLoader,
- DVCSLoader,
)
from swh.loader.core.metadata_fetchers import MetadataFetcherProtocol
from swh.loader.exception import NotFound, UnsupportedChecksumComputation
from swh.loader.tests import assert_last_visit_matches
-from swh.model.hashutil import hash_to_bytes
from swh.model.model import (
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
Origin,
RawExtrinsicMetadata,
- Snapshot,
)
import swh.storage.exc
@@ -77,28 +74,6 @@
return None
-class DummyDVCSLoader(DummyLoader, DVCSLoader):
- """DVCS Loader that does nothing in regards to DAG objects."""
-
- def get_contents(self):
- return []
-
- def get_directories(self):
- return []
-
- def get_revisions(self):
- return []
-
- def get_releases(self):
- return []
-
- def get_snapshot(self):
- return Snapshot(branches={})
-
- def eventful(self):
- return False
-
-
class DummyBaseLoader(DummyLoader, BaseLoader):
"""Buffered loader will send new data when threshold is reached"""
@@ -270,27 +245,11 @@
assert post_load.call_args_list == [mocker.call(), mocker.call(success=False)]
-def test_dvcs_loader(swh_storage):
- loader = DummyDVCSLoader(swh_storage)
- result = loader.load()
- assert result == {"status": "eventful"}
-
-
-def test_dvcs_loader_with_config(swh_storage):
- loader = DummyDVCSLoader(swh_storage, "another-logger")
- result = loader.load()
- assert result == {"status": "eventful"}
-
-
def test_loader_logger_default_name(swh_storage):
loader = DummyBaseLoader(swh_storage)
assert isinstance(loader.log, logging.Logger)
assert loader.log.name == "swh.loader.core.tests.test_loader.DummyBaseLoader"
- loader = DummyDVCSLoader(swh_storage)
- assert isinstance(loader.log, logging.Logger)
- assert loader.log.name == "swh.loader.core.tests.test_loader.DummyDVCSLoader"
-
def test_loader_logger_with_name(swh_storage):
loader = DummyBaseLoader(swh_storage, "some.logger.name")
@@ -316,7 +275,7 @@
):
"""Check whether a failed load properly logged its exception, and that the
snapshot didn't get referenced in storage"""
- assert isinstance(loader, (DVCSLoader, ContentLoader, DirectoryLoader))
+ assert isinstance(loader, (ContentLoader, DirectoryLoader))
for record in caplog.records:
if record.levelname != "ERROR":
continue
@@ -326,10 +285,6 @@
assert isinstance(exc, exc_class)
assert exc_text in exc.args[0]
- if isinstance(loader, DVCSLoader):
- # Check that the get_snapshot operation would have succeeded
- assert loader.get_snapshot() is not None
-
# And confirm that the visit doesn't reference a snapshot
visit = assert_last_visit_matches(loader.storage, origin.url, status)
if status != "partial":
@@ -410,103 +365,13 @@
assert loader.statsd.constant_tags == {"visit_type": "my-visit-type"}
-class DummyDVCSLoaderExc(DummyDVCSLoader):
- """A loader which raises an exception when loading some contents"""
-
- def get_contents(self):
- raise RuntimeError("Failed to get contents!")
-
-
-def test_dvcs_loader_exc_partial_visit(swh_storage, caplog):
- logger_name = "dvcsloaderexc"
- caplog.set_level(logging.ERROR, logger=logger_name)
-
- loader = DummyDVCSLoaderExc(swh_storage, logging_class=logger_name)
- # fake the loading ending up in a snapshot
- loader.loaded_snapshot_id = hash_to_bytes(
- "9e4dd2b40d1b46b70917c0949aa2195c823a648e"
- )
- result = loader.load()
-
- # loading failed
- assert result == {"status": "failed"}
-
- # still resulted in a partial visit with a snapshot (somehow)
- _check_load_failure(
- caplog,
- loader,
- RuntimeError,
- "Failed to get contents!",
- )
-
-
-class BrokenStorageProxy:
- def __init__(self, storage):
- self.storage = storage
-
- def __getattr__(self, attr):
- return getattr(self.storage, attr)
-
- def snapshot_add(self, snapshots):
- raise RuntimeError("Failed to add snapshot!")
-
-
-class DummyDVCSLoaderStorageExc(DummyDVCSLoader):
- """A loader which raises an exception when loading some contents"""
-
- def __init__(self, *args, **kwargs):
- super().__init__(*args, **kwargs)
- self.storage = BrokenStorageProxy(self.storage)
-
-
-def test_dvcs_loader_storage_exc_failed_visit(swh_storage, caplog):
- logger_name = "dvcsloaderexc"
- caplog.set_level(logging.ERROR, logger=logger_name)
-
- loader = DummyDVCSLoaderStorageExc(swh_storage, logging_class=logger_name)
- result = loader.load()
-
- assert result == {"status": "failed"}
-
- _check_load_failure(
- caplog, loader, RuntimeError, "Failed to add snapshot!", status="failed"
- )
-
-
-class DummyDVCSLoaderNotFound(DummyDVCSLoader, BaseLoader):
- """A loader which raises a not_found exception during the prepare method call"""
-
- def prepare(*args, **kwargs):
- raise NotFound("Unknown origin!")
-
- def load_status(self):
- return {
- "status": "uneventful",
- }
-
-
-def test_loader_not_found(swh_storage, caplog):
- loader = DummyDVCSLoaderNotFound(swh_storage)
- result = loader.load()
-
- assert result == {"status": "uneventful"}
-
- _check_load_failure(caplog, loader, NotFound, "Unknown origin!", status="not_found")
-
-
class DummyLoaderWithError(DummyBaseLoader):
def prepare(self, *args, **kwargs):
raise Exception("error")
-class DummyDVCSLoaderWithError(DummyDVCSLoader, BaseLoader):
- def prepare(self, *args, **kwargs):
- raise Exception("error")
-
-
-@pytest.mark.parametrize("loader_cls", [DummyLoaderWithError, DummyDVCSLoaderWithError])
-def test_loader_sentry_tags_on_error(swh_storage, sentry_events, loader_cls):
- loader = loader_cls(swh_storage)
+def test_loader_sentry_tags_on_error(swh_storage, sentry_events):
+ loader = DummyLoaderWithError(swh_storage)
loader.load()
sentry_tags = sentry_events[0]["tags"]
assert sentry_tags.get(SENTRY_ORIGIN_URL_TAG_NAME) == ORIGIN.url

File Metadata

Mime Type
text/plain
Expires
Dec 17 2024, 10:26 AM (13 w, 5 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3218187

Event Timeline