Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7122899
D8845.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
10 KB
Subscribers
None
D8845.diff
View Options
diff --git a/swh/loader/core/loader.py b/swh/loader/core/loader.py
--- a/swh/loader/core/loader.py
+++ b/swh/loader/core/loader.py
@@ -10,7 +10,7 @@
from pathlib import Path
import tempfile
import time
-from typing import Any, ContextManager, Dict, Iterable, List, Optional, Union
+from typing import Any, ContextManager, Dict, List, Optional, Union
from urllib.parse import urlparse
from requests.exceptions import HTTPError
@@ -25,15 +25,12 @@
from swh.loader.package.utils import download
from swh.model import from_disk
from swh.model.model import (
- BaseContent,
Content,
Directory,
Origin,
OriginVisit,
OriginVisitStatus,
RawExtrinsicMetadata,
- Release,
- Revision,
Sha1Git,
SkippedContent,
Snapshot,
@@ -570,89 +567,6 @@
self.statsd.increment(f"{name}_count", tags=tags)
-class DVCSLoader(BaseLoader):
- """This base class is a pattern for dvcs loaders (e.g. git, mercurial).
-
- Those loaders are able to load all the data in one go. For example, the
- loader defined in swh-loader-git :class:`BulkUpdater`.
-
- For other loaders (stateful one, (e.g :class:`SWHSvnLoader`),
- inherit directly from :class:`BaseLoader`.
-
- """
-
- def cleanup(self) -> None:
- """Clean up an eventual state installed for computations."""
- pass
-
- def has_contents(self) -> bool:
- """Checks whether we need to load contents"""
- return True
-
- def get_contents(self) -> Iterable[BaseContent]:
- """Get the contents that need to be loaded"""
- raise NotImplementedError
-
- def has_directories(self) -> bool:
- """Checks whether we need to load directories"""
- return True
-
- def get_directories(self) -> Iterable[Directory]:
- """Get the directories that need to be loaded"""
- raise NotImplementedError
-
- def has_revisions(self) -> bool:
- """Checks whether we need to load revisions"""
- return True
-
- def get_revisions(self) -> Iterable[Revision]:
- """Get the revisions that need to be loaded"""
- raise NotImplementedError
-
- def has_releases(self) -> bool:
- """Checks whether we need to load releases"""
- return True
-
- def get_releases(self) -> Iterable[Release]:
- """Get the releases that need to be loaded"""
- raise NotImplementedError
-
- def get_snapshot(self) -> Snapshot:
- """Get the snapshot that needs to be loaded"""
- raise NotImplementedError
-
- def eventful(self) -> bool:
- """Whether the load was eventful"""
- raise NotImplementedError
-
- def store_data(self) -> None:
- assert self.origin
- if self.save_data_path:
- self.save_data()
-
- if self.has_contents():
- for obj in self.get_contents():
- if isinstance(obj, Content):
- self.storage.content_add([obj])
- elif isinstance(obj, SkippedContent):
- self.storage.skipped_content_add([obj])
- else:
- raise TypeError(f"Unexpected content type: {obj}")
- if self.has_directories():
- for directory in self.get_directories():
- self.storage.directory_add([directory])
- if self.has_revisions():
- for revision in self.get_revisions():
- self.storage.revision_add([revision])
- if self.has_releases():
- for release in self.get_releases():
- self.storage.release_add([release])
- snapshot = self.get_snapshot()
- self.storage.snapshot_add([snapshot])
- self.flush()
- self.loaded_snapshot_id = snapshot.id
-
-
class NodeLoader(BaseLoader):
"""Common class for :class:`ContentLoader` and :class:`Directoryloader`.
diff --git a/swh/loader/core/tests/test_loader.py b/swh/loader/core/tests/test_loader.py
--- a/swh/loader/core/tests/test_loader.py
+++ b/swh/loader/core/tests/test_loader.py
@@ -18,19 +18,16 @@
BaseLoader,
ContentLoader,
DirectoryLoader,
- DVCSLoader,
)
from swh.loader.core.metadata_fetchers import MetadataFetcherProtocol
from swh.loader.exception import NotFound, UnsupportedChecksumComputation
from swh.loader.tests import assert_last_visit_matches
-from swh.model.hashutil import hash_to_bytes
from swh.model.model import (
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
Origin,
RawExtrinsicMetadata,
- Snapshot,
)
import swh.storage.exc
@@ -77,28 +74,6 @@
return None
-class DummyDVCSLoader(DummyLoader, DVCSLoader):
- """DVCS Loader that does nothing in regards to DAG objects."""
-
- def get_contents(self):
- return []
-
- def get_directories(self):
- return []
-
- def get_revisions(self):
- return []
-
- def get_releases(self):
- return []
-
- def get_snapshot(self):
- return Snapshot(branches={})
-
- def eventful(self):
- return False
-
-
class DummyBaseLoader(DummyLoader, BaseLoader):
"""Buffered loader will send new data when threshold is reached"""
@@ -270,27 +245,11 @@
assert post_load.call_args_list == [mocker.call(), mocker.call(success=False)]
-def test_dvcs_loader(swh_storage):
- loader = DummyDVCSLoader(swh_storage)
- result = loader.load()
- assert result == {"status": "eventful"}
-
-
-def test_dvcs_loader_with_config(swh_storage):
- loader = DummyDVCSLoader(swh_storage, "another-logger")
- result = loader.load()
- assert result == {"status": "eventful"}
-
-
def test_loader_logger_default_name(swh_storage):
loader = DummyBaseLoader(swh_storage)
assert isinstance(loader.log, logging.Logger)
assert loader.log.name == "swh.loader.core.tests.test_loader.DummyBaseLoader"
- loader = DummyDVCSLoader(swh_storage)
- assert isinstance(loader.log, logging.Logger)
- assert loader.log.name == "swh.loader.core.tests.test_loader.DummyDVCSLoader"
-
def test_loader_logger_with_name(swh_storage):
loader = DummyBaseLoader(swh_storage, "some.logger.name")
@@ -316,7 +275,7 @@
):
"""Check whether a failed load properly logged its exception, and that the
snapshot didn't get referenced in storage"""
- assert isinstance(loader, (DVCSLoader, ContentLoader, DirectoryLoader))
+ assert isinstance(loader, (ContentLoader, DirectoryLoader))
for record in caplog.records:
if record.levelname != "ERROR":
continue
@@ -326,10 +285,6 @@
assert isinstance(exc, exc_class)
assert exc_text in exc.args[0]
- if isinstance(loader, DVCSLoader):
- # Check that the get_snapshot operation would have succeeded
- assert loader.get_snapshot() is not None
-
# And confirm that the visit doesn't reference a snapshot
visit = assert_last_visit_matches(loader.storage, origin.url, status)
if status != "partial":
@@ -410,103 +365,13 @@
assert loader.statsd.constant_tags == {"visit_type": "my-visit-type"}
-class DummyDVCSLoaderExc(DummyDVCSLoader):
- """A loader which raises an exception when loading some contents"""
-
- def get_contents(self):
- raise RuntimeError("Failed to get contents!")
-
-
-def test_dvcs_loader_exc_partial_visit(swh_storage, caplog):
- logger_name = "dvcsloaderexc"
- caplog.set_level(logging.ERROR, logger=logger_name)
-
- loader = DummyDVCSLoaderExc(swh_storage, logging_class=logger_name)
- # fake the loading ending up in a snapshot
- loader.loaded_snapshot_id = hash_to_bytes(
- "9e4dd2b40d1b46b70917c0949aa2195c823a648e"
- )
- result = loader.load()
-
- # loading failed
- assert result == {"status": "failed"}
-
- # still resulted in a partial visit with a snapshot (somehow)
- _check_load_failure(
- caplog,
- loader,
- RuntimeError,
- "Failed to get contents!",
- )
-
-
-class BrokenStorageProxy:
- def __init__(self, storage):
- self.storage = storage
-
- def __getattr__(self, attr):
- return getattr(self.storage, attr)
-
- def snapshot_add(self, snapshots):
- raise RuntimeError("Failed to add snapshot!")
-
-
-class DummyDVCSLoaderStorageExc(DummyDVCSLoader):
- """A loader which raises an exception when loading some contents"""
-
- def __init__(self, *args, **kwargs):
- super().__init__(*args, **kwargs)
- self.storage = BrokenStorageProxy(self.storage)
-
-
-def test_dvcs_loader_storage_exc_failed_visit(swh_storage, caplog):
- logger_name = "dvcsloaderexc"
- caplog.set_level(logging.ERROR, logger=logger_name)
-
- loader = DummyDVCSLoaderStorageExc(swh_storage, logging_class=logger_name)
- result = loader.load()
-
- assert result == {"status": "failed"}
-
- _check_load_failure(
- caplog, loader, RuntimeError, "Failed to add snapshot!", status="failed"
- )
-
-
-class DummyDVCSLoaderNotFound(DummyDVCSLoader, BaseLoader):
- """A loader which raises a not_found exception during the prepare method call"""
-
- def prepare(*args, **kwargs):
- raise NotFound("Unknown origin!")
-
- def load_status(self):
- return {
- "status": "uneventful",
- }
-
-
-def test_loader_not_found(swh_storage, caplog):
- loader = DummyDVCSLoaderNotFound(swh_storage)
- result = loader.load()
-
- assert result == {"status": "uneventful"}
-
- _check_load_failure(caplog, loader, NotFound, "Unknown origin!", status="not_found")
-
-
class DummyLoaderWithError(DummyBaseLoader):
def prepare(self, *args, **kwargs):
raise Exception("error")
-class DummyDVCSLoaderWithError(DummyDVCSLoader, BaseLoader):
- def prepare(self, *args, **kwargs):
- raise Exception("error")
-
-
-@pytest.mark.parametrize("loader_cls", [DummyLoaderWithError, DummyDVCSLoaderWithError])
-def test_loader_sentry_tags_on_error(swh_storage, sentry_events, loader_cls):
- loader = loader_cls(swh_storage)
+def test_loader_sentry_tags_on_error(swh_storage, sentry_events):
+ loader = DummyLoaderWithError(swh_storage)
loader.load()
sentry_tags = sentry_events[0]["tags"]
assert sentry_tags.get(SENTRY_ORIGIN_URL_TAG_NAME) == ORIGIN.url
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Dec 17 2024, 10:26 AM (13 w, 5 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3218187
Attached To
D8845: Drop unused core.loader.DVCSLoader class
Event Timeline
Log In to Comment