diff --git a/swh/loader/git/loader.py b/swh/loader/git/loader.py --- a/swh/loader/git/loader.py +++ b/swh/loader/git/loader.py @@ -14,7 +14,6 @@ import dulwich.client from dulwich.errors import GitProtocolError, NotGitRepository -from dulwich.object_store import ObjectStoreGraphWalker from dulwich.objects import ShaFile from dulwich.pack import PackData, PackInflater @@ -61,13 +60,7 @@ self.base_snapshots = [] self.heads: Set[HexBytes] = set() - - def get_parents(self, commit: bytes) -> List[bytes]: - """This method should return the list of known parents""" - return [] - - def graph_walker(self) -> ObjectStoreGraphWalker: - return ObjectStoreGraphWalker(self.heads, self.get_parents) + self.__iterator = None def determine_wants(self, refs: Dict[bytes, HexBytes]) -> List[HexBytes]: """Get the list of bytehex sha1s that the git loader should fetch. @@ -120,6 +113,15 @@ ) return wanted_refs + # Compatibility with the dulwich graph walker + def __next__(self): + if not self.__iterator: + self.__iterator = iter(self.heads) + return next(self.__iterator, None) + + def ack(self, sha): + pass + @dataclass class FetchPackReturn: @@ -223,7 +225,7 @@ pack_result = client.fetch_pack( path, base_repo.determine_wants, - base_repo.graph_walker(), + base_repo, do_pack, progress=do_activity, ) diff --git a/swh/loader/git/tests/test_loader.py b/swh/loader/git/tests/test_loader.py --- a/swh/loader/git/tests/test_loader.py +++ b/swh/loader/git/tests/test_loader.py @@ -557,6 +557,56 @@ call("git_known_refs_percent", "h", expected_git_known_refs_percent, {}, 1), ] + def test_load_incremental_negotiation(self): + """Check that the packfile negotiated when running an incremental load only + contains the "new" commits, and not all objects.""" + + snapshot_id = b"\x01" * 20 + now = datetime.datetime.now(tz=datetime.timezone.utc) + + def ovgl(origin_url, allowed_statuses, require_snapshot, type): + if origin_url == f"base://{self.repo_url}": + return OriginVisit(origin=origin_url, visit=42, date=now, type="git") + else: + return None + + self.loader.storage.origin_visit_get_latest.side_effect = ovgl + self.loader.storage.origin_visit_status_get_latest.return_value = ( + OriginVisitStatus( + origin=f"base://{self.repo_url}", + visit=42, + snapshot=snapshot_id, + date=now, + status="full", + ) + ) + self.loader.storage.snapshot_get_branches.return_value = { + "id": snapshot_id, + "branches": { + b"refs/heads/master": SnapshotBranch( + # id of the initial commit in the git repository fixture + target=b"\xb6\xf4\x02\x92\xc4\xe9J\x8f~{J\xffP\xe6\xc7B\x9a\xb9\x8e*", + target_type=TargetType.REVISION, + ), + }, + "next_branch": None, + } + + res = self.loader.load() + assert res == {"status": "eventful"} + + stats = get_stats(self.loader.storage) + assert stats == { + "content": 3, # instead of 4 for the full repository + "directory": 6, # instead of 7 + "origin": 1, + "origin_visit": 1, + "release": 0, + "revision": 6, # instead of 7 + "skipped_content": 0, + "snapshot": 1, + } + class DumbGitLoaderTestBase(FullGitLoaderTests): """Prepare a git repository to be loaded using the HTTP dumb transfer protocol."""