Page MenuHomeSoftware Heritage

D6392.diff
No OneTemporary

D6392.diff

diff --git a/swh/loader/git/loader.py b/swh/loader/git/loader.py
--- a/swh/loader/git/loader.py
+++ b/swh/loader/git/loader.py
@@ -97,21 +97,42 @@
for name, value in refs.items():
heads_logger.debug(" %r: %s", name, value.decode())
- # Get the remote heads that we want to fetch
- remote_heads: Set[HexBytes] = set()
+ # specific set of objects to sort
+ tag_names = set()
+ branch_names = set()
+ # remote heads is just all refs without order
+ remote_heads = set()
+ target_to_ref = defaultdict(list)
for ref_name, ref_target in refs.items():
- if utils.ignore_branch_name(ref_name):
+ # Ignore either usual branch to ignore or known references
+ if utils.ignore_branch_name(ref_name) or ref_target in self.local_heads:
continue
- remote_heads.add(ref_target)
+ # Then we'll sort out the tags from the branches
+ if ref_name.startswith(b"refs/tags/"):
+ tag_names.add(ref_name)
+ else:
+ branch_names.add(ref_name)
- if heads_logger.isEnabledFor(logging.DEBUG):
- heads_logger.debug("Filtered remote heads:")
- for value in remote_heads:
- heads_logger.debug(" %s", value.decode())
+ remote_heads.add(ref_target)
+ target_to_ref[ref_target].append(ref_name)
logger.debug("local_heads_count=%s", len(self.local_heads))
logger.debug("remote_heads_count=%s", len(remote_heads))
- wanted_refs = list(remote_heads - self.local_heads)
+
+ # Then we sort the refs (by tags then by branches) so it's mostly ingested in
+ # lexicographic order (provided there is some consistency there)
+ tags = [refs[ref_name] for ref_name in sorted(tag_names)]
+ branches = [refs[ref_name] for ref_name in sorted(branch_names)]
+ # The wanted refs is the concatenation first tags then branches references
+ wanted_refs = tags + branches
+
+ if heads_logger.isEnabledFor(logging.DEBUG):
+ heads_logger.debug("Ordered wanted heads returned by the git remote:")
+ for ref_target in wanted_refs:
+ heads_logger.debug(
+ " %r: %s", target_to_ref[ref_target], ref_target.decode()
+ )
+
logger.debug("wanted_refs_count=%s", len(wanted_refs))
if self.statsd is not None:
self.statsd.histogram(
@@ -119,9 +140,14 @@
len(remote_heads - set(refs.values())) / len(refs),
tags={},
)
+ git_known_refs_percent = (
+ len(self.local_heads & remote_heads) / len(remote_heads)
+ if remote_heads
+ else 0
+ )
self.statsd.histogram(
"git_known_refs_percent",
- len(self.local_heads & remote_heads) / len(remote_heads),
+ git_known_refs_percent,
tags={},
)
return wanted_refs
diff --git a/swh/loader/git/tests/test_loader.py b/swh/loader/git/tests/test_loader.py
--- a/swh/loader/git/tests/test_loader.py
+++ b/swh/loader/git/tests/test_loader.py
@@ -452,7 +452,7 @@
assert [c for c in statsd_report.mock_calls if c[1][0].startswith("git_")] == [
call("git_total", "c", 1, {}, 1),
call("git_ignored_refs_percent", "h", 0.0, {}, 1),
- call("git_known_refs_percent", "h", 0.25, {}, 1),
+ call("git_known_refs_percent", "h", 0.0, {}, 1),
]
assert self.loader.statsd.constant_tags == {
"visit_type": "git",
@@ -512,7 +512,7 @@
assert [c for c in statsd_report.mock_calls if c[1][0].startswith("git_")] == [
call("git_total", "c", 1, {}, 1),
call("git_ignored_refs_percent", "h", 0.0, {}, 1),
- call("git_known_refs_percent", "h", 1.0, {}, 1),
+ call("git_known_refs_percent", "h", 0.0, {}, 1),
]
assert self.loader.statsd.constant_tags == {
"visit_type": "git",
@@ -532,7 +532,7 @@
}
),
Snapshot(branches={}),
- 0.25,
+ 0.0,
id="partial-parent-and-empty-previous",
),
pytest.param(
@@ -542,7 +542,7 @@
b"refs/heads/master": SNAPSHOT1.branches[b"refs/heads/master"]
}
),
- 1.0,
+ 0.0,
id="full-parent-and-partial-previous",
),
],

File Metadata

Mime Type
text/plain
Expires
Dec 17 2024, 7:35 AM (4 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3218434

Event Timeline