Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7122849
D6392.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
4 KB
Subscribers
None
D6392.diff
View Options
diff --git a/swh/loader/git/loader.py b/swh/loader/git/loader.py
--- a/swh/loader/git/loader.py
+++ b/swh/loader/git/loader.py
@@ -97,21 +97,42 @@
for name, value in refs.items():
heads_logger.debug(" %r: %s", name, value.decode())
- # Get the remote heads that we want to fetch
- remote_heads: Set[HexBytes] = set()
+ # specific set of objects to sort
+ tag_names = set()
+ branch_names = set()
+ # remote heads is just all refs without order
+ remote_heads = set()
+ target_to_ref = defaultdict(list)
for ref_name, ref_target in refs.items():
- if utils.ignore_branch_name(ref_name):
+ # Ignore either usual branch to ignore or known references
+ if utils.ignore_branch_name(ref_name) or ref_target in self.local_heads:
continue
- remote_heads.add(ref_target)
+ # Then we'll sort out the tags from the branches
+ if ref_name.startswith(b"refs/tags/"):
+ tag_names.add(ref_name)
+ else:
+ branch_names.add(ref_name)
- if heads_logger.isEnabledFor(logging.DEBUG):
- heads_logger.debug("Filtered remote heads:")
- for value in remote_heads:
- heads_logger.debug(" %s", value.decode())
+ remote_heads.add(ref_target)
+ target_to_ref[ref_target].append(ref_name)
logger.debug("local_heads_count=%s", len(self.local_heads))
logger.debug("remote_heads_count=%s", len(remote_heads))
- wanted_refs = list(remote_heads - self.local_heads)
+
+ # Then we sort the refs (by tags then by branches) so it's mostly ingested in
+ # lexicographic order (provided there is some consistency there)
+ tags = [refs[ref_name] for ref_name in sorted(tag_names)]
+ branches = [refs[ref_name] for ref_name in sorted(branch_names)]
+ # The wanted refs is the concatenation first tags then branches references
+ wanted_refs = tags + branches
+
+ if heads_logger.isEnabledFor(logging.DEBUG):
+ heads_logger.debug("Ordered wanted heads returned by the git remote:")
+ for ref_target in wanted_refs:
+ heads_logger.debug(
+ " %r: %s", target_to_ref[ref_target], ref_target.decode()
+ )
+
logger.debug("wanted_refs_count=%s", len(wanted_refs))
if self.statsd is not None:
self.statsd.histogram(
@@ -119,9 +140,14 @@
len(remote_heads - set(refs.values())) / len(refs),
tags={},
)
+ git_known_refs_percent = (
+ len(self.local_heads & remote_heads) / len(remote_heads)
+ if remote_heads
+ else 0
+ )
self.statsd.histogram(
"git_known_refs_percent",
- len(self.local_heads & remote_heads) / len(remote_heads),
+ git_known_refs_percent,
tags={},
)
return wanted_refs
diff --git a/swh/loader/git/tests/test_loader.py b/swh/loader/git/tests/test_loader.py
--- a/swh/loader/git/tests/test_loader.py
+++ b/swh/loader/git/tests/test_loader.py
@@ -452,7 +452,7 @@
assert [c for c in statsd_report.mock_calls if c[1][0].startswith("git_")] == [
call("git_total", "c", 1, {}, 1),
call("git_ignored_refs_percent", "h", 0.0, {}, 1),
- call("git_known_refs_percent", "h", 0.25, {}, 1),
+ call("git_known_refs_percent", "h", 0.0, {}, 1),
]
assert self.loader.statsd.constant_tags == {
"visit_type": "git",
@@ -512,7 +512,7 @@
assert [c for c in statsd_report.mock_calls if c[1][0].startswith("git_")] == [
call("git_total", "c", 1, {}, 1),
call("git_ignored_refs_percent", "h", 0.0, {}, 1),
- call("git_known_refs_percent", "h", 1.0, {}, 1),
+ call("git_known_refs_percent", "h", 0.0, {}, 1),
]
assert self.loader.statsd.constant_tags == {
"visit_type": "git",
@@ -532,7 +532,7 @@
}
),
Snapshot(branches={}),
- 0.25,
+ 0.0,
id="partial-parent-and-empty-previous",
),
pytest.param(
@@ -542,7 +542,7 @@
b"refs/heads/master": SNAPSHOT1.branches[b"refs/heads/master"]
}
),
- 1.0,
+ 0.0,
id="full-parent-and-partial-previous",
),
],
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Dec 17 2024, 7:35 AM (4 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3218434
Attached To
D6392: git: Ingest ordered tags then ordered branches references
Event Timeline
Log In to Comment