diff --git a/swh/loader/svn/loader.py b/swh/loader/svn/loader.py --- a/swh/loader/svn/loader.py +++ b/swh/loader/svn/loader.py @@ -69,12 +69,20 @@ visit_date: Optional[datetime] = None, destination_path: Optional[str] = None, swh_revision: Optional[str] = None, - start_from_scratch: bool = False, + incremental: bool = True, temp_directory: str = "/tmp", debug: bool = False, check_revision: int = 0, max_content_size: Optional[int] = None, ): + """Load an svn repository. + + Args: + ... + incremental: If True, the default, starts from the last snapshot (if any). + Otherwise, starts from the initial commit of the repository. + + """ super().__init__( storage=storage, logging_class="swh.loader.svn.SvnLoader", @@ -102,7 +110,7 @@ self._load_status = "uneventful" self.visit_date = visit_date self.destination_path = destination_path - self.start_from_scratch = start_from_scratch + self.incremental = incremental self.snapshot = None # state from previous visit self.latest_snapshot = None @@ -224,14 +232,9 @@ swh_revision_id = swh_revision.id return swh_revision_id == revision_id - def start_from( - self, start_from_scratch: bool = False - ) -> Tuple[int, int, Dict[int, Tuple[bytes, ...]]]: + def start_from(self) -> Tuple[int, int, Dict[int, Tuple[bytes, ...]]]: """Determine from where to start the loading. - Args: - start_from_scratch: As opposed to start from the last snapshot - Returns: tuple (revision_start, revision_end, revision_parents) @@ -254,7 +257,7 @@ revision_parents: Dict[int, Tuple[bytes, ...]] = {revision_start: ()} # start from a previous revision if any - if not start_from_scratch and self.latest_revision is not None: + if self.incremental and self.latest_revision is not None: extra_headers = dict(self.latest_revision.extra_headers) revision_start = int(extra_headers[b"svn_revision"]) revision_parents = { @@ -415,9 +418,7 @@ raise try: - revision_start, revision_end, revision_parents = self.start_from( - self.start_from_scratch - ) + revision_start, revision_end, revision_parents = self.start_from() self.swh_revision_gen = self.process_svn_revisions( self.svnrepo, revision_start, revision_end, revision_parents ) @@ -552,7 +553,7 @@ origin_url: Optional[str] = None, destination_path: Optional[str] = None, swh_revision: Optional[str] = None, - start_from_scratch: bool = False, + incremental: bool = False, visit_date: Optional[datetime] = None, temp_directory: str = "/tmp", debug: bool = False, @@ -565,7 +566,7 @@ origin_url=origin_url, destination_path=destination_path, swh_revision=swh_revision, - start_from_scratch=start_from_scratch, + incremental=incremental, visit_date=visit_date, temp_directory=temp_directory, debug=debug, @@ -612,7 +613,7 @@ origin_url: Optional[str] = None, destination_path: Optional[str] = None, swh_revision: Optional[str] = None, - start_from_scratch: bool = False, + incremental: bool = True, visit_date: Optional[datetime] = None, temp_directory: str = "/tmp", debug: bool = False, @@ -625,7 +626,7 @@ origin_url=origin_url, destination_path=destination_path, swh_revision=swh_revision, - start_from_scratch=start_from_scratch, + incremental=incremental, visit_date=visit_date, temp_directory=temp_directory, debug=debug, diff --git a/swh/loader/svn/tasks.py b/swh/loader/svn/tasks.py --- a/swh/loader/svn/tasks.py +++ b/swh/loader/svn/tasks.py @@ -27,22 +27,22 @@ destination_path: Optional[str] = None, swh_revision: Optional[str] = None, visit_date: Optional[str] = None, - start_from_scratch: Optional[bool] = False, + incremental: Optional[bool] = True, ): """Import a svn repository Args: - - url: (mandatory) svn's repository url to ingest data from - - origin_url: Optional original url override to use as origin reference - in the archive. If not provided, "url" is used as origin. - - destination_path: (optional) root directory to - locally retrieve svn's data - - swh_revision: (optional) extra revision hex to - start from. See swh.loader.svn.SvnLoader.process - docstring - - visit_date: Optional date to override the visit date - - start_from_scratch: Flag to allow starting back the svn repository from the - start + url: (mandatory) svn's repository url to ingest data from + origin_url: Optional original url override to use as origin reference in the + archive. If not provided, "url" is used as origin. + destination_path: (optional) root directory to locally retrieve svn's data + swh_revision: (optional) extra revision hex to start from. See + swh.loader.svn.SvnLoader.process docstring + visit_date: Optional date to override the visit date + incremental: If True, the default, starts from the last snapshot (if any). + Otherwise, starts from the initial commit of the repository. + + """ loader = SvnLoader.from_configfile( @@ -51,7 +51,7 @@ destination_path=destination_path, swh_revision=swh_revision, visit_date=convert_to_datetime(visit_date), - start_from_scratch=start_from_scratch, + incremental=incremental, ) return loader.load() @@ -62,25 +62,25 @@ url: Optional[str] = None, archive_path: Optional[str] = None, visit_date: Optional[str] = None, - start_from_scratch: Optional[bool] = False, + incremental: Optional[bool] = True, ): """1. Mount an svn dump from archive as a local svn repository 2. Load it through the svn loader 3. Clean up mounted svn repository archive Args: - - url: origin url - - archive_path: Path on disk to the archive holdin the svn repository to ingest - - visit_date: Optional date to override the visit date - - start_from_scratch: Flag to allow starting back the svn repository from the - start + url: origin url + archive_path: Path on disk to the archive holdin the svn repository to ingest + visit_date: Optional date to override the visit date + incremental: If True, the default, starts from the last snapshot (if any). + Otherwise, starts from the initial commit of the repository. """ loader = SvnLoaderFromDumpArchive.from_configfile( url=url, archive_path=archive_path, visit_date=convert_to_datetime(visit_date), - start_from_scratch=start_from_scratch, + incremental=incremental, ) return loader.load() @@ -91,25 +91,25 @@ url: Optional[str] = None, origin_url: Optional[str] = None, visit_date: Optional[str] = None, - start_from_scratch: Optional[bool] = False, + incremental: Optional[bool] = True, ): """1. Mount a remote svn dump as a local svn repository. 2. Load it through the svn loader. 3. Clean up mounted svn repository archive. Args: - - url: (mandatory) svn's repository url to ingest data from - - origin_url: Optional original url override to use as origin reference + url: (mandatory) svn's repository url to ingest data from + origin_url: Optional original url override to use as origin reference in the archive. If not provided, "url" is used as origin. - - visit_date: Optional date to override the visit date - - start_from_scratch: Flag to allow starting back the svn repository from the - start + visit_date: Optional date to override the visit date + incremental: If True, the default, starts from the last snapshot (if any). + Otherwise, starts from the initial commit of the repository. """ loader = SvnLoaderFromRemoteDump.from_configfile( url=url, origin_url=origin_url, visit_date=convert_to_datetime(visit_date), - start_from_scratch=start_from_scratch, + incremental=incremental, ) return loader.load() diff --git a/swh/loader/svn/tests/test_loader.py b/swh/loader/svn/tests/test_loader.py --- a/swh/loader/svn/tests/test_loader.py +++ b/swh/loader/svn/tests/test_loader.py @@ -343,13 +343,9 @@ check_snapshot(GOURMET_UPDATES_SNAPSHOT, loader.storage) - # Start from scratch loading yields the same result - + # Let's start the ingestion from the start, this should yield the same result loader = SvnLoader( - swh_storage, - repo_updated_url, - origin_url=repo_initial_url, - start_from_scratch=True, + swh_storage, repo_updated_url, origin_url=repo_initial_url, incremental=False, ) assert loader.load() == {"status": "eventful"} visit_status3 = assert_last_visit_matches(