Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7147881
D5088.id18163.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
7 KB
Subscribers
None
D5088.id18163.diff
View Options
diff --git a/swh/loader/core/loader.py b/swh/loader/core/loader.py
--- a/swh/loader/core/loader.py
+++ b/swh/loader/core/loader.py
@@ -33,28 +33,65 @@
}
-class Loader:
- """The base class for a Software Heritage Loader.
+class BaseLoader:
+ """Mixin base class for (D)VCS loaders (e.g svn, git, mercurial, ...).
A loader retrieves origin information (git/mercurial/svn repositories, pypi/npm/...
package artifacts), ingests the contents/directories/revisions/releases/snapshot to
the storage backend.
- For now, this just exposes 2 static methods (from_config, from_configfile) to
- centralize and ease the loader instantiation.
+ The main entry point for the loader is the :func:`load` function.
+
+ 2 static methods (:func:`from_config`, :func:`from_configfile`) centralizes and ease
+ the loader instantiation.
+
+ You can take a look at some example classes:
- Args:
- storage: the instance of the Storage being used to register the
- origin information
+ - :class:`SvnLoader`
+ - :class:`GitLoader`
+ - :class:`PypiLoader`
"""
def __init__(
- self, storage: StorageInterface, max_content_size: Optional[int] = None,
+ self,
+ storage: StorageInterface,
+ logging_class: Optional[str] = None,
+ save_data_path: Optional[str] = None,
+ max_content_size: Optional[int] = None,
):
+ super().__init__()
self.storage = storage
self.max_content_size = int(max_content_size) if max_content_size else None
+ if logging_class is None:
+ logging_class = "%s.%s" % (
+ self.__class__.__module__,
+ self.__class__.__name__,
+ )
+ self.log = logging.getLogger(logging_class)
+
+ _log = logging.getLogger("requests.packages.urllib3.connectionpool")
+ _log.setLevel(logging.WARN)
+
+ # possibly overridden in self.prepare method
+ self.visit_date: Optional[datetime.datetime] = None
+ self.origin: Optional[Origin] = None
+
+ if not hasattr(self, "visit_type"):
+ self.visit_type: Optional[str] = None
+
+ self.origin_metadata: Dict[str, Any] = {}
+ self.loaded_snapshot_id: Optional[Sha1Git] = None
+
+ if save_data_path:
+ path = save_data_path
+ os.stat(path)
+ if not os.access(path, os.R_OK | os.W_OK):
+ raise PermissionError("Permission denied: %r" % path)
+
+ self.save_data_path = save_data_path
+
@classmethod
def from_config(cls, storage: Dict[str, Any], **config: Any):
"""Instantiate a loader from a configuration dict.
@@ -92,79 +129,6 @@
config.update({k: v for k, v in kwargs.items() if v is not None})
return cls.from_config(**config)
-
-class BaseLoader(Loader):
- """Mixin base class for (D)VCS loaders (e.g svn, git, mercurial, ...).
-
- To define such loaders, you must:
-
- - inherit from this class
-
- - and implement following methods:
-
- - :func:`prepare`: First step executed by the loader to prepare some
- state needed by the `func`:load method.
-
- - :func:`get_origin`: Retrieve the origin that is currently being loaded.
-
- - :func:`fetch_data`: Fetch the data is actually the method to implement
- to compute data to inject in swh (through the store_data method)
-
- - :func:`store_data`: Store data fetched.
-
- - :func:`visit_status`: Explicit status of the visit ('partial' or
- 'full')
-
- - :func:`load_status`: Explicit status of the loading, for use by the
- scheduler (eventful/uneventful/temporary failure/permanent failure).
-
- - :func:`cleanup`: Last step executed by the loader.
-
- The entry point for the resulting loader is :func:`load`.
-
- You can take a look at some example classes:
-
- - :class:`SvnLoader`
-
- """
-
- def __init__(
- self,
- storage: StorageInterface,
- logging_class: Optional[str] = None,
- save_data_path: Optional[str] = None,
- max_content_size: Optional[int] = None,
- ):
- super().__init__(storage=storage, max_content_size=max_content_size)
-
- if logging_class is None:
- logging_class = "%s.%s" % (
- self.__class__.__module__,
- self.__class__.__name__,
- )
- self.log = logging.getLogger(logging_class)
-
- _log = logging.getLogger("requests.packages.urllib3.connectionpool")
- _log.setLevel(logging.WARN)
-
- # possibly overridden in self.prepare method
- self.visit_date: Optional[datetime.datetime] = None
- self.origin: Optional[Origin] = None
-
- if not hasattr(self, "visit_type"):
- self.visit_type: Optional[str] = None
-
- self.origin_metadata: Dict[str, Any] = {}
- self.loaded_snapshot_id: Optional[Sha1Git] = None
-
- if save_data_path:
- path = save_data_path
- os.stat(path)
- if not os.access(path, os.R_OK | os.W_OK):
- raise PermissionError("Permission denied: %r" % path)
-
- self.save_data_path = save_data_path
-
def save_data(self) -> None:
"""Save the data associated to the current load"""
raise NotImplementedError
@@ -202,7 +166,7 @@
"""
raise NotImplementedError
- def prepare_origin_visit(self, *args, **kwargs) -> None:
+ def prepare_origin_visit(self) -> None:
"""First step executed by the loader to prepare origin and visit
references. Set/update self.origin, and
optionally self.origin_url, self.visit_date.
@@ -233,7 +197,7 @@
)
)[0]
- def prepare(self, *args, **kwargs) -> None:
+ def prepare(self) -> None:
"""Second step executed by the loader to prepare some state needed by
the loader.
@@ -323,7 +287,7 @@
"""
pass
- def load(self, *args, **kwargs) -> Dict[str, str]:
+ def load(self) -> Dict[str, str]:
r"""Loading logic for the loader to follow:
- 1. Call :meth:`prepare_origin_visit` to prepare the
@@ -347,7 +311,7 @@
msg = "Cleaning up dangling data failed! Continue loading."
self.log.warning(msg)
- self.prepare_origin_visit(*args, **kwargs)
+ self.prepare_origin_visit()
self._store_origin_visit()
assert (
@@ -361,7 +325,7 @@
)
try:
- self.prepare(*args, **kwargs)
+ self.prepare()
while True:
more_data_to_fetch = self.fetch_data()
@@ -391,7 +355,7 @@
self.log.exception(
"Loading failure, updating to `%s` status",
status,
- extra={"swh_task_args": args, "swh_task_kwargs": kwargs,},
+ extra={"swh_task_args": [], "swh_task_kwargs": {"origin": self.origin.url},},
)
visit_status = OriginVisitStatus(
origin=self.origin.url,
diff --git a/swh/loader/package/loader.py b/swh/loader/package/loader.py
--- a/swh/loader/package/loader.py
+++ b/swh/loader/package/loader.py
@@ -28,7 +28,7 @@
import sentry_sdk
from swh.core.tarball import uncompress
-from swh.loader.core.loader import Loader
+from swh.loader.core.loader import BaseLoader
from swh.loader.exception import NotFound
from swh.loader.package.utils import download
from swh.model import from_disk
@@ -117,9 +117,10 @@
TPackageInfo = TypeVar("TPackageInfo", bound=BasePackageInfo)
-class PackageLoader(Loader, Generic[TPackageInfo]):
+class PackageLoader(BaseLoader, Generic[TPackageInfo]):
# Origin visit type (str) set by the loader
visit_type = ""
+ visit_date: datetime.datetime
def __init__(
self,
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Jan 23, 2:35 AM (1 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3216603
Attached To
D5088: core.loader: Merge Loader into BaseLoader
Event Timeline
Log In to Comment