Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9345172
D7831.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
7 KB
Subscribers
None
D7831.diff
View Options
diff --git a/swh/loader/git/loader.py b/swh/loader/git/loader.py
--- a/swh/loader/git/loader.py
+++ b/swh/loader/git/loader.py
@@ -46,7 +46,7 @@
def __init__(
self,
storage,
- base_snapshot: Optional[Snapshot] = None,
+ base_snapshots: List[Snapshot] = None,
incremental: bool = True,
statsd: Statsd = None,
):
@@ -54,10 +54,10 @@
self.incremental = incremental
self.statsd = statsd
- if base_snapshot and incremental:
- self.base_snapshot: Snapshot = base_snapshot
+ if base_snapshots and incremental:
+ self.base_snapshots: List[Snapshot] = base_snapshots
else:
- self.base_snapshot = Snapshot(branches={})
+ self.base_snapshots = []
self.heads: Set[HexBytes] = set()
@@ -80,10 +80,11 @@
# Cache existing heads
local_heads: Set[HexBytes] = set()
- for branch_name, branch in self.base_snapshot.branches.items():
- if not branch or branch.target_type == TargetType.ALIAS:
- continue
- local_heads.add(hashutil.hash_to_hex(branch.target).encode())
+ for base_snapshot in self.base_snapshots:
+ for branch_name, branch in base_snapshot.branches.items():
+ if not branch or branch.target_type == TargetType.ALIAS:
+ continue
+ local_heads.add(hashutil.hash_to_hex(branch.target).encode())
self.heads = local_heads
@@ -266,48 +267,44 @@
def prepare(self) -> None:
assert self.origin is not None
- prev_snapshot: Optional[Snapshot] = None
+ self.prev_snapshot = Snapshot(branches={})
+ """Last snapshot of this origin if any; empty snapshot otherwise"""
+ self.base_snapshots = []
+ """Last snapshot of this origin and all its parents, if any."""
self.statsd.constant_tags["incremental_enabled"] = self.incremental
self.statsd.constant_tags["has_parent_origins"] = bool(self.parent_origins)
+
+ # May be set to True later
+ self.statsd.constant_tags["has_parent_snapshot"] = False
+
if self.incremental:
prev_snapshot = self.get_full_snapshot(self.origin.url)
+ self.statsd.constant_tags["has_previous_snapshot"] = bool(prev_snapshot)
if prev_snapshot:
- incremental_snapshot_origin = "self"
+ self.prev_snapshot = prev_snapshot
+ self.base_snapshots.append(prev_snapshot)
- elif self.parent_origins is not None:
+ if self.parent_origins is not None:
# If this is the first time we load this origin and it is a forge
# fork, load incrementally from one of the origins it was forked from,
# closest parent first
for parent_origin in self.parent_origins:
- prev_snapshot = self.get_full_snapshot(parent_origin.url)
- if prev_snapshot is not None:
- incremental_snapshot_origin = "parent"
- break
- else:
- incremental_snapshot_origin = "none"
- else:
- incremental_snapshot_origin = "none"
-
- self.statsd.constant_tags[
- "incremental_snapshot_origin"
- ] = incremental_snapshot_origin
+ parent_snapshot = self.get_full_snapshot(parent_origin.url)
+ if parent_snapshot is not None:
+ self.statsd.constant_tags["has_parent_snapshot"] = True
+ self.base_snapshots.append(parent_snapshot)
# Increments a metric with full name 'swh_loader_git'; which is useful to
# count how many runs of the loader are with each incremental mode
self.statsd.increment("git_total", tags={})
- if prev_snapshot is not None:
- self.base_snapshot = prev_snapshot
- else:
- self.base_snapshot = Snapshot(branches={})
-
def fetch_data(self) -> bool:
assert self.origin is not None
base_repo = self.repo_representation(
storage=self.storage,
- base_snapshot=self.base_snapshot,
+ base_snapshots=self.base_snapshots,
incremental=self.incremental,
statsd=self.statsd,
)
@@ -502,7 +499,7 @@
unknown_objects = {}
base_snapshot_reverse_branches = {
branch.target: branch
- for branch in self.base_snapshot.branches.values()
+ for branch in self.prev_snapshot.branches.values()
if branch and branch.target_type != TargetType.ALIAS
}
@@ -538,8 +535,8 @@
the one we retrieved at the beginning of the run"""
eventful = False
- if self.base_snapshot and self.snapshot:
- eventful = self.snapshot.id != self.base_snapshot.id
+ if self.prev_snapshot and self.snapshot:
+ eventful = self.snapshot.id != self.prev_snapshot.id
elif self.snapshot:
eventful = bool(self.snapshot.branches)
diff --git a/swh/loader/git/tests/test_loader.py b/swh/loader/git/tests/test_loader.py
--- a/swh/loader/git/tests/test_loader.py
+++ b/swh/loader/git/tests/test_loader.py
@@ -128,7 +128,8 @@
assert self.loader.statsd.constant_tags == {
"visit_type": "git",
"incremental_enabled": True,
- "incremental_snapshot_origin": "none",
+ "has_parent_snapshot": False,
+ "has_previous_snapshot": False,
"has_parent_origins": False,
}
@@ -212,7 +213,8 @@
assert self.loader.statsd.constant_tags == {
"visit_type": "git",
"incremental_enabled": True,
- "incremental_snapshot_origin": "none",
+ "has_parent_snapshot": False,
+ "has_previous_snapshot": False,
"has_parent_origins": True,
}
@@ -284,7 +286,8 @@
assert self.loader.statsd.constant_tags == {
"visit_type": "git",
"incremental_enabled": True,
- "incremental_snapshot_origin": "parent",
+ "has_parent_snapshot": True,
+ "has_previous_snapshot": False,
"has_parent_origins": True,
}
@@ -323,7 +326,14 @@
allowed_statuses=None,
require_snapshot=True,
),
- # -> does not need to fall back to the parent
+ # also fetches the parent, in case the origin was rebased on the parent
+ # since the last visit
+ call(
+ f"base://{self.repo_url}",
+ type=None,
+ allowed_statuses=None,
+ require_snapshot=True,
+ ),
]
# TODO: assert "incremental*" is added to constant tags before these
@@ -336,7 +346,8 @@
assert self.loader.statsd.constant_tags == {
"visit_type": "git",
"incremental_enabled": True,
- "incremental_snapshot_origin": "self",
+ "has_parent_snapshot": False, # Because we reset the mock since last time
+ "has_previous_snapshot": True,
"has_parent_origins": True,
}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Jul 3, 3:10 PM (5 d, 11 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3217688
Attached To
D7831: Use all base snapshots in determine_wants()
Event Timeline
Log In to Comment