diff --git a/swh/loader/git/base.py b/swh/loader/git/base.py new file mode 100644 --- /dev/null +++ b/swh/loader/git/base.py @@ -0,0 +1,95 @@ +# Copyright (C) 2015-2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from typing import Iterable + +from swh.loader.core.loader import BaseLoader +from swh.model.model import ( + BaseContent, + Content, + Directory, + Release, + Revision, + SkippedContent, + Snapshot, +) + + +class BaseGitLoader(BaseLoader): + """This base class is a pattern for both git loaders + + Those loaders are able to load all the data in one go. + """ + + def cleanup(self) -> None: + """Clean up an eventual state installed for computations.""" + pass + + def has_contents(self) -> bool: + """Checks whether we need to load contents""" + return True + + def get_contents(self) -> Iterable[BaseContent]: + """Get the contents that need to be loaded""" + raise NotImplementedError + + def has_directories(self) -> bool: + """Checks whether we need to load directories""" + return True + + def get_directories(self) -> Iterable[Directory]: + """Get the directories that need to be loaded""" + raise NotImplementedError + + def has_revisions(self) -> bool: + """Checks whether we need to load revisions""" + return True + + def get_revisions(self) -> Iterable[Revision]: + """Get the revisions that need to be loaded""" + raise NotImplementedError + + def has_releases(self) -> bool: + """Checks whether we need to load releases""" + return True + + def get_releases(self) -> Iterable[Release]: + """Get the releases that need to be loaded""" + raise NotImplementedError + + def get_snapshot(self) -> Snapshot: + """Get the snapshot that needs to be loaded""" + raise NotImplementedError + + def eventful(self) -> bool: + """Whether the load was eventful""" + raise NotImplementedError + + def store_data(self) -> None: + assert self.origin + if self.save_data_path: + self.save_data() + + if self.has_contents(): + for obj in self.get_contents(): + if isinstance(obj, Content): + self.storage.content_add([obj]) + elif isinstance(obj, SkippedContent): + self.storage.skipped_content_add([obj]) + else: + raise TypeError(f"Unexpected content type: {obj}") + if self.has_directories(): + for directory in self.get_directories(): + self.storage.directory_add([directory]) + if self.has_revisions(): + for revision in self.get_revisions(): + self.storage.revision_add([revision]) + if self.has_releases(): + for release in self.get_releases(): + self.storage.release_add([release]) + snapshot = self.get_snapshot() + self.storage.snapshot_add([snapshot]) + self.flush() + self.loaded_snapshot_id = snapshot.id diff --git a/swh/loader/git/from_disk.py b/swh/loader/git/from_disk.py --- a/swh/loader/git/from_disk.py +++ b/swh/loader/git/from_disk.py @@ -21,13 +21,13 @@ import dulwich.objects import dulwich.repo -from swh.loader.core.loader import DVCSLoader from swh.model import hashutil from swh.model.model import Snapshot, SnapshotBranch, TargetType from swh.storage.algos.origin import origin_get_latest_visit_status from swh.storage.interface import StorageInterface from . import converters, utils +from .base import BaseGitLoader logger = logging.getLogger(__name__) @@ -87,7 +87,7 @@ @deprecated(version="1.1", reason="Use `swh.loader.git.loader.GitLoader` instead") -class GitLoaderFromDisk(DVCSLoader): +class GitLoaderFromDisk(BaseGitLoader): """Load a git repository from a directory.""" visit_type = "git" diff --git a/swh/loader/git/loader.py b/swh/loader/git/loader.py --- a/swh/loader/git/loader.py +++ b/swh/loader/git/loader.py @@ -19,7 +19,6 @@ from dulwich.pack import PackData, PackInflater from swh.core.statsd import Statsd -from swh.loader.core.loader import DVCSLoader from swh.loader.exception import NotFound from swh.model import hashutil from swh.model.model import ( @@ -35,6 +34,7 @@ from swh.storage.interface import StorageInterface from . import converters, dumb, utils +from .base import BaseGitLoader from .utils import HexBytes logger = logging.getLogger(__name__) @@ -121,7 +121,7 @@ pack_size: int -class GitLoader(DVCSLoader): +class GitLoader(BaseGitLoader): """A bulk loader for a git repository Emits the following statsd stats: