diff --git a/swh/loader/svn/loader.py b/swh/loader/svn/loader.py --- a/swh/loader/svn/loader.py +++ b/swh/loader/svn/loader.py @@ -69,12 +69,20 @@ visit_date: Optional[datetime] = None, destination_path: Optional[str] = None, swh_revision: Optional[str] = None, - start_from_scratch: bool = False, + ignore_history: bool = False, temp_directory: str = "/tmp", debug: bool = False, check_revision: int = 0, max_content_size: Optional[int] = None, ): + """Load an svn repository. + + Args: + ... + ignore_history: If True, as opposed to start from the last snapshot (if + any), starts systematically from the initial commit of the repository. + + """ super().__init__( storage=storage, logging_class="swh.loader.svn.SvnLoader", @@ -102,7 +110,7 @@ self._load_status = "uneventful" self.visit_date = visit_date self.destination_path = destination_path - self.start_from_scratch = start_from_scratch + self.ignore_history = ignore_history self.snapshot = None # state from previous visit self.latest_snapshot = None @@ -225,12 +233,13 @@ return swh_revision_id == revision_id def start_from( - self, start_from_scratch: bool = False + self, ignore_history: bool = False ) -> Tuple[int, int, Dict[int, Tuple[bytes, ...]]]: """Determine from where to start the loading. Args: - start_from_scratch: As opposed to start from the last snapshot + ignore_history: As opposed to start from the last snapshot, starts + systematically from the initial commit. Returns: tuple (revision_start, revision_end, revision_parents) @@ -254,7 +263,7 @@ revision_parents: Dict[int, Tuple[bytes, ...]] = {revision_start: ()} # start from a previous revision if any - if not start_from_scratch and self.latest_revision is not None: + if not ignore_history and self.latest_revision is not None: extra_headers = dict(self.latest_revision.extra_headers) revision_start = int(extra_headers[b"svn_revision"]) revision_parents = { @@ -416,7 +425,7 @@ try: revision_start, revision_end, revision_parents = self.start_from( - self.start_from_scratch + self.ignore_history ) self.swh_revision_gen = self.process_svn_revisions( self.svnrepo, revision_start, revision_end, revision_parents @@ -552,7 +561,7 @@ origin_url: Optional[str] = None, destination_path: Optional[str] = None, swh_revision: Optional[str] = None, - start_from_scratch: bool = False, + ignore_history: bool = False, visit_date: Optional[datetime] = None, temp_directory: str = "/tmp", debug: bool = False, @@ -565,7 +574,7 @@ origin_url=origin_url, destination_path=destination_path, swh_revision=swh_revision, - start_from_scratch=start_from_scratch, + ignore_history=ignore_history, visit_date=visit_date, temp_directory=temp_directory, debug=debug, @@ -612,7 +621,7 @@ origin_url: Optional[str] = None, destination_path: Optional[str] = None, swh_revision: Optional[str] = None, - start_from_scratch: bool = False, + ignore_history: bool = False, visit_date: Optional[datetime] = None, temp_directory: str = "/tmp", debug: bool = False, @@ -625,7 +634,7 @@ origin_url=origin_url, destination_path=destination_path, swh_revision=swh_revision, - start_from_scratch=start_from_scratch, + ignore_history=ignore_history, visit_date=visit_date, temp_directory=temp_directory, debug=debug, diff --git a/swh/loader/svn/tasks.py b/swh/loader/svn/tasks.py --- a/swh/loader/svn/tasks.py +++ b/swh/loader/svn/tasks.py @@ -27,22 +27,21 @@ destination_path: Optional[str] = None, swh_revision: Optional[str] = None, visit_date: Optional[str] = None, - start_from_scratch: Optional[bool] = False, + ignore_history: Optional[bool] = False, ): """Import a svn repository Args: - - url: (mandatory) svn's repository url to ingest data from - - origin_url: Optional original url override to use as origin reference - in the archive. If not provided, "url" is used as origin. - - destination_path: (optional) root directory to - locally retrieve svn's data - - swh_revision: (optional) extra revision hex to - start from. See swh.loader.svn.SvnLoader.process - docstring - - visit_date: Optional date to override the visit date - - start_from_scratch: Flag to allow starting back the svn repository from the - start + url: (mandatory) svn's repository url to ingest data from + origin_url: Optional original url override to use as origin reference in the + archive. If not provided, "url" is used as origin. + destination_path: (optional) root directory to locally retrieve svn's data + swh_revision: (optional) extra revision hex to start from. See + swh.loader.svn.SvnLoader.process docstring + visit_date: Optional date to override the visit date + ignore_history: If True, as opposed to start from the last snapshot (if + any), starts systematically from the initial commit of the repository. + """ loader = SvnLoader.from_configfile( @@ -51,7 +50,7 @@ destination_path=destination_path, swh_revision=swh_revision, visit_date=convert_to_datetime(visit_date), - start_from_scratch=start_from_scratch, + ignore_history=ignore_history, ) return loader.load() @@ -62,25 +61,25 @@ url: Optional[str] = None, archive_path: Optional[str] = None, visit_date: Optional[str] = None, - start_from_scratch: Optional[bool] = False, + ignore_history: Optional[bool] = False, ): """1. Mount an svn dump from archive as a local svn repository 2. Load it through the svn loader 3. Clean up mounted svn repository archive Args: - - url: origin url - - archive_path: Path on disk to the archive holdin the svn repository to ingest - - visit_date: Optional date to override the visit date - - start_from_scratch: Flag to allow starting back the svn repository from the - start + url: origin url + archive_path: Path on disk to the archive holdin the svn repository to ingest + visit_date: Optional date to override the visit date + ignore_history: If True, as opposed to start from the last snapshot (if + any), starts systematically from the initial commit of the repository. """ loader = SvnLoaderFromDumpArchive.from_configfile( url=url, archive_path=archive_path, visit_date=convert_to_datetime(visit_date), - start_from_scratch=start_from_scratch, + ignore_history=ignore_history, ) return loader.load() @@ -91,25 +90,25 @@ url: Optional[str] = None, origin_url: Optional[str] = None, visit_date: Optional[str] = None, - start_from_scratch: Optional[bool] = False, + ignore_history: Optional[bool] = False, ): """1. Mount a remote svn dump as a local svn repository. 2. Load it through the svn loader. 3. Clean up mounted svn repository archive. Args: - - url: (mandatory) svn's repository url to ingest data from - - origin_url: Optional original url override to use as origin reference + url: (mandatory) svn's repository url to ingest data from + origin_url: Optional original url override to use as origin reference in the archive. If not provided, "url" is used as origin. - - visit_date: Optional date to override the visit date - - start_from_scratch: Flag to allow starting back the svn repository from the - start + visit_date: Optional date to override the visit date + ignore_history: If True, as opposed to start from the last snapshot (if + any), starts systematically from the initial commit of the repository. """ loader = SvnLoaderFromRemoteDump.from_configfile( url=url, origin_url=origin_url, visit_date=convert_to_datetime(visit_date), - start_from_scratch=start_from_scratch, + ignore_history=ignore_history, ) return loader.load() diff --git a/swh/loader/svn/tests/test_loader.py b/swh/loader/svn/tests/test_loader.py --- a/swh/loader/svn/tests/test_loader.py +++ b/swh/loader/svn/tests/test_loader.py @@ -346,10 +346,7 @@ # Start from scratch loading yields the same result loader = SvnLoader( - swh_storage, - repo_updated_url, - origin_url=repo_initial_url, - start_from_scratch=True, + swh_storage, repo_updated_url, origin_url=repo_initial_url, ignore_history=True, ) assert loader.load() == {"status": "eventful"} visit_status3 = assert_last_visit_matches(