diff --git a/requirements-swh.txt b/requirements-swh.txt --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -1,5 +1,5 @@ swh.core >= 0.0.7 -swh.loader.core >= 0.18.0 +swh.loader.core >= 3.0.0 swh.model >= 4.3.0 swh.scheduler >= 0.0.39 swh.storage >= 0.22.0 diff --git a/swh/loader/git/from_disk.py b/swh/loader/git/from_disk.py --- a/swh/loader/git/from_disk.py +++ b/swh/loader/git/from_disk.py @@ -23,7 +23,7 @@ from swh.loader.core.loader import DVCSLoader from swh.model import hashutil -from swh.model.model import Origin, Snapshot, SnapshotBranch, TargetType +from swh.model.model import Snapshot, SnapshotBranch, TargetType from swh.storage.algos.origin import origin_get_latest_visit_status from swh.storage.interface import StorageInterface @@ -98,21 +98,12 @@ url: str, visit_date: Optional[datetime] = None, directory: Optional[str] = None, - save_data_path: Optional[str] = None, - max_content_size: Optional[int] = None, + **kwargs, ): - super().__init__( - storage=storage, - save_data_path=save_data_path, - max_content_size=max_content_size, - ) - self.origin_url = url - self.visit_date = visit_date + super().__init__(storage=storage, origin_url=url, **kwargs) + self.visit_date = visit_date or self.visit_date self.directory = directory - def prepare_origin_visit(self): - self.origin = Origin(url=self.origin_url) - def prepare(self): self.repo = dulwich.repo.Repo(self.directory) @@ -215,7 +206,7 @@ def fetch_data(self): """Fetch the data from the data source""" visit_status = origin_get_latest_visit_status( - self.storage, self.origin_url, require_snapshot=True + self.storage, self.origin.url, require_snapshot=True ) self.previous_snapshot_id = ( None if visit_status is None else visit_status.snapshot @@ -338,7 +329,7 @@ branches[target] = None utils.warn_dangling_branches( - branches, dangling_branches, logger, self.origin_url + branches, dangling_branches, logger, self.origin.url ) self.snapshot = Snapshot(branches=branches) @@ -431,7 +422,7 @@ logger.info( "Project %s - Uncompressing archive %s at %s", - self.origin_url, + self.origin.url, os.path.basename(self.archive_path), self.repo_path, ) @@ -443,5 +434,5 @@ if self.temp_dir and os.path.exists(self.temp_dir): shutil.rmtree(self.temp_dir) logger.info( - "Project %s - Done injecting %s" % (self.origin_url, self.repo_path) + "Project %s - Done injecting %s" % (self.origin.url, self.repo_path) ) diff --git a/swh/loader/git/loader.py b/swh/loader/git/loader.py --- a/swh/loader/git/loader.py +++ b/swh/loader/git/loader.py @@ -24,7 +24,6 @@ from swh.model.model import ( BaseContent, Directory, - Origin, Release, Revision, Snapshot, @@ -121,8 +120,7 @@ repo_representation: Type[RepoRepresentation] = RepoRepresentation, pack_size_bytes: int = 4 * 1024 * 1024 * 1024, temp_file_cutoff: int = 100 * 1024 * 1024, - save_data_path: Optional[str] = None, - max_content_size: Optional[int] = None, + **kwargs: Any, ): """Initialize the bulk updater. @@ -136,12 +134,7 @@ (if any) references. Otherwise, this loads the full repository. """ - super().__init__( - storage=storage, - save_data_path=save_data_path, - max_content_size=max_content_size, - ) - self.origin_url = url + super().__init__(storage=storage, origin_url=url, **kwargs) self.base_url = base_url self.incremental = incremental self.repo_representation = repo_representation @@ -234,10 +227,6 @@ pack_size=pack_size, ) - def prepare_origin_visit(self) -> None: - self.visit_date = datetime.datetime.now(tz=datetime.timezone.utc) - self.origin = Origin(url=self.origin_url) - def get_full_snapshot(self, origin_url) -> Optional[Snapshot]: return snapshot_get_latest(self.storage, origin_url) @@ -295,7 +284,7 @@ # by the fetch_pack operation when encountering a repository with # dumb transfer protocol so we check if the repository supports it # here to continue the loading if it is the case - self.dumb = dumb.check_protocol(self.origin_url) + self.dumb = dumb.check_protocol(self.origin.url) if not self.dumb: raise @@ -303,7 +292,7 @@ "Protocol used for communication: %s", "dumb" if self.dumb else "smart" ) if self.dumb: - self.dumb_fetcher = dumb.GitObjectsFetcher(self.origin_url, base_repo) + self.dumb_fetcher = dumb.GitObjectsFetcher(self.origin.url, base_repo) self.dumb_fetcher.fetch_object_ids() self.remote_refs = utils.filter_refs(self.dumb_fetcher.refs) # type: ignore self.symbolic_refs = self.dumb_fetcher.head @@ -483,7 +472,7 @@ ) utils.warn_dangling_branches( - branches, dangling_branches, logger, self.origin_url + branches, dangling_branches, logger, self.origin.url ) self.snapshot = Snapshot(branches=branches)