diff --git a/requirements-swh.txt b/requirements-swh.txt --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -2,4 +2,4 @@ swh.loader.core >= 0.5.9 swh.model >= 0.4.0 swh.scheduler >= 0.0.39 -swh.storage >= 0.10.0 +swh.storage >= 0.11.3 diff --git a/swh/loader/git/loader.py b/swh/loader/git/loader.py --- a/swh/loader/git/loader.py +++ b/swh/loader/git/loader.py @@ -215,19 +215,15 @@ def prepare(self, *args, **kwargs) -> None: assert self.origin is not None - base_origin_url = origin_url = self.origin.url - prev_snapshot: Optional[Snapshot] = None if not self.ignore_history: - prev_snapshot = self.get_full_snapshot(origin_url) + prev_snapshot = self.get_full_snapshot(self.origin.url) if self.base_url and prev_snapshot is None: - base_origin = Origin(url=self.base_url) - base_origin = self.storage.origin_get(base_origin) + base_origin = self.storage.origin_get([self.base_url])[0] if base_origin: - base_origin_url = base_origin.url - prev_snapshot = self.get_full_snapshot(base_origin_url) + prev_snapshot = self.get_full_snapshot(base_origin.url) if prev_snapshot is not None: self.base_snapshot = prev_snapshot diff --git a/swh/loader/git/tests/test_loader.py b/swh/loader/git/tests/test_loader.py --- a/swh/loader/git/tests/test_loader.py +++ b/swh/loader/git/tests/test_loader.py @@ -34,3 +34,24 @@ self.destination_path = os.path.join(tmp_path, archive_name) self.loader = GitLoader(self.repo_url) self.repo = dulwich.repo.Repo(self.destination_path) + + +class GitLoader2Test(TestCase, FullGitLoaderTests): + """Mostly the same loading scenario but with a base-url different than the repo-url. + To walk slightly different paths, the end result should stay the same. + + """ + + @pytest.fixture(autouse=True) + def init(self, swh_config, datadir, tmp_path): + super().setUp() + archive_name = "testrepo" + archive_path = os.path.join(datadir, f"{archive_name}.tgz") + tmp_path = str(tmp_path) + self.repo_url = prepare_repository_from_archive( + archive_path, archive_name, tmp_path=tmp_path + ) + self.destination_path = os.path.join(tmp_path, archive_name) + base_url = f"base://{self.repo_url}" + self.loader = GitLoader(self.repo_url, base_url=base_url) + self.repo = dulwich.repo.Repo(self.destination_path)