diff --git a/swh/loader/core/loader.py b/swh/loader/core/loader.py --- a/swh/loader/core/loader.py +++ b/swh/loader/core/loader.py @@ -10,7 +10,7 @@ from pathlib import Path import tempfile import time -from typing import Any, ContextManager, Dict, Iterable, List, Optional, Union +from typing import Any, ContextManager, Dict, List, Optional, Union from urllib.parse import urlparse from requests.exceptions import HTTPError @@ -25,15 +25,12 @@ from swh.loader.package.utils import download from swh.model import from_disk from swh.model.model import ( - BaseContent, Content, Directory, Origin, OriginVisit, OriginVisitStatus, RawExtrinsicMetadata, - Release, - Revision, Sha1Git, SkippedContent, Snapshot, @@ -570,89 +567,6 @@ self.statsd.increment(f"{name}_count", tags=tags) -class DVCSLoader(BaseLoader): - """This base class is a pattern for dvcs loaders (e.g. git, mercurial). - - Those loaders are able to load all the data in one go. For example, the - loader defined in swh-loader-git :class:`BulkUpdater`. - - For other loaders (stateful one, (e.g :class:`SWHSvnLoader`), - inherit directly from :class:`BaseLoader`. - - """ - - def cleanup(self) -> None: - """Clean up an eventual state installed for computations.""" - pass - - def has_contents(self) -> bool: - """Checks whether we need to load contents""" - return True - - def get_contents(self) -> Iterable[BaseContent]: - """Get the contents that need to be loaded""" - raise NotImplementedError - - def has_directories(self) -> bool: - """Checks whether we need to load directories""" - return True - - def get_directories(self) -> Iterable[Directory]: - """Get the directories that need to be loaded""" - raise NotImplementedError - - def has_revisions(self) -> bool: - """Checks whether we need to load revisions""" - return True - - def get_revisions(self) -> Iterable[Revision]: - """Get the revisions that need to be loaded""" - raise NotImplementedError - - def has_releases(self) -> bool: - """Checks whether we need to load releases""" - return True - - def get_releases(self) -> Iterable[Release]: - """Get the releases that need to be loaded""" - raise NotImplementedError - - def get_snapshot(self) -> Snapshot: - """Get the snapshot that needs to be loaded""" - raise NotImplementedError - - def eventful(self) -> bool: - """Whether the load was eventful""" - raise NotImplementedError - - def store_data(self) -> None: - assert self.origin - if self.save_data_path: - self.save_data() - - if self.has_contents(): - for obj in self.get_contents(): - if isinstance(obj, Content): - self.storage.content_add([obj]) - elif isinstance(obj, SkippedContent): - self.storage.skipped_content_add([obj]) - else: - raise TypeError(f"Unexpected content type: {obj}") - if self.has_directories(): - for directory in self.get_directories(): - self.storage.directory_add([directory]) - if self.has_revisions(): - for revision in self.get_revisions(): - self.storage.revision_add([revision]) - if self.has_releases(): - for release in self.get_releases(): - self.storage.release_add([release]) - snapshot = self.get_snapshot() - self.storage.snapshot_add([snapshot]) - self.flush() - self.loaded_snapshot_id = snapshot.id - - class NodeLoader(BaseLoader): """Common class for :class:`ContentLoader` and :class:`Directoryloader`. diff --git a/swh/loader/core/tests/test_loader.py b/swh/loader/core/tests/test_loader.py --- a/swh/loader/core/tests/test_loader.py +++ b/swh/loader/core/tests/test_loader.py @@ -18,19 +18,16 @@ BaseLoader, ContentLoader, DirectoryLoader, - DVCSLoader, ) from swh.loader.core.metadata_fetchers import MetadataFetcherProtocol from swh.loader.exception import NotFound, UnsupportedChecksumComputation from swh.loader.tests import assert_last_visit_matches -from swh.model.hashutil import hash_to_bytes from swh.model.model import ( MetadataAuthority, MetadataAuthorityType, MetadataFetcher, Origin, RawExtrinsicMetadata, - Snapshot, ) import swh.storage.exc @@ -77,28 +74,6 @@ return None -class DummyDVCSLoader(DummyLoader, DVCSLoader): - """DVCS Loader that does nothing in regards to DAG objects.""" - - def get_contents(self): - return [] - - def get_directories(self): - return [] - - def get_revisions(self): - return [] - - def get_releases(self): - return [] - - def get_snapshot(self): - return Snapshot(branches={}) - - def eventful(self): - return False - - class DummyBaseLoader(DummyLoader, BaseLoader): """Buffered loader will send new data when threshold is reached""" @@ -270,27 +245,11 @@ assert post_load.call_args_list == [mocker.call(), mocker.call(success=False)] -def test_dvcs_loader(swh_storage): - loader = DummyDVCSLoader(swh_storage) - result = loader.load() - assert result == {"status": "eventful"} - - -def test_dvcs_loader_with_config(swh_storage): - loader = DummyDVCSLoader(swh_storage, "another-logger") - result = loader.load() - assert result == {"status": "eventful"} - - def test_loader_logger_default_name(swh_storage): loader = DummyBaseLoader(swh_storage) assert isinstance(loader.log, logging.Logger) assert loader.log.name == "swh.loader.core.tests.test_loader.DummyBaseLoader" - loader = DummyDVCSLoader(swh_storage) - assert isinstance(loader.log, logging.Logger) - assert loader.log.name == "swh.loader.core.tests.test_loader.DummyDVCSLoader" - def test_loader_logger_with_name(swh_storage): loader = DummyBaseLoader(swh_storage, "some.logger.name") @@ -316,7 +275,7 @@ ): """Check whether a failed load properly logged its exception, and that the snapshot didn't get referenced in storage""" - assert isinstance(loader, (DVCSLoader, ContentLoader, DirectoryLoader)) + assert isinstance(loader, (ContentLoader, DirectoryLoader)) for record in caplog.records: if record.levelname != "ERROR": continue @@ -326,10 +285,6 @@ assert isinstance(exc, exc_class) assert exc_text in exc.args[0] - if isinstance(loader, DVCSLoader): - # Check that the get_snapshot operation would have succeeded - assert loader.get_snapshot() is not None - # And confirm that the visit doesn't reference a snapshot visit = assert_last_visit_matches(loader.storage, origin.url, status) if status != "partial": @@ -410,103 +365,13 @@ assert loader.statsd.constant_tags == {"visit_type": "my-visit-type"} -class DummyDVCSLoaderExc(DummyDVCSLoader): - """A loader which raises an exception when loading some contents""" - - def get_contents(self): - raise RuntimeError("Failed to get contents!") - - -def test_dvcs_loader_exc_partial_visit(swh_storage, caplog): - logger_name = "dvcsloaderexc" - caplog.set_level(logging.ERROR, logger=logger_name) - - loader = DummyDVCSLoaderExc(swh_storage, logging_class=logger_name) - # fake the loading ending up in a snapshot - loader.loaded_snapshot_id = hash_to_bytes( - "9e4dd2b40d1b46b70917c0949aa2195c823a648e" - ) - result = loader.load() - - # loading failed - assert result == {"status": "failed"} - - # still resulted in a partial visit with a snapshot (somehow) - _check_load_failure( - caplog, - loader, - RuntimeError, - "Failed to get contents!", - ) - - -class BrokenStorageProxy: - def __init__(self, storage): - self.storage = storage - - def __getattr__(self, attr): - return getattr(self.storage, attr) - - def snapshot_add(self, snapshots): - raise RuntimeError("Failed to add snapshot!") - - -class DummyDVCSLoaderStorageExc(DummyDVCSLoader): - """A loader which raises an exception when loading some contents""" - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.storage = BrokenStorageProxy(self.storage) - - -def test_dvcs_loader_storage_exc_failed_visit(swh_storage, caplog): - logger_name = "dvcsloaderexc" - caplog.set_level(logging.ERROR, logger=logger_name) - - loader = DummyDVCSLoaderStorageExc(swh_storage, logging_class=logger_name) - result = loader.load() - - assert result == {"status": "failed"} - - _check_load_failure( - caplog, loader, RuntimeError, "Failed to add snapshot!", status="failed" - ) - - -class DummyDVCSLoaderNotFound(DummyDVCSLoader, BaseLoader): - """A loader which raises a not_found exception during the prepare method call""" - - def prepare(*args, **kwargs): - raise NotFound("Unknown origin!") - - def load_status(self): - return { - "status": "uneventful", - } - - -def test_loader_not_found(swh_storage, caplog): - loader = DummyDVCSLoaderNotFound(swh_storage) - result = loader.load() - - assert result == {"status": "uneventful"} - - _check_load_failure(caplog, loader, NotFound, "Unknown origin!", status="not_found") - - class DummyLoaderWithError(DummyBaseLoader): def prepare(self, *args, **kwargs): raise Exception("error") -class DummyDVCSLoaderWithError(DummyDVCSLoader, BaseLoader): - def prepare(self, *args, **kwargs): - raise Exception("error") - - -@pytest.mark.parametrize("loader_cls", [DummyLoaderWithError, DummyDVCSLoaderWithError]) -def test_loader_sentry_tags_on_error(swh_storage, sentry_events, loader_cls): - loader = loader_cls(swh_storage) +def test_loader_sentry_tags_on_error(swh_storage, sentry_events): + loader = DummyLoaderWithError(swh_storage) loader.load() sentry_tags = sentry_events[0]["tags"] assert sentry_tags.get(SENTRY_ORIGIN_URL_TAG_NAME) == ORIGIN.url