Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/git/loader.py
Show All 26 Lines | from swh.model.model import ( | ||||
Snapshot, | Snapshot, | ||||
SnapshotBranch, | SnapshotBranch, | ||||
TargetType, | TargetType, | ||||
Sha1Git, | Sha1Git, | ||||
) | ) | ||||
from swh.loader.core.loader import DVCSLoader | from swh.loader.core.loader import DVCSLoader | ||||
from swh.storage.algos.snapshot import snapshot_get_all_branches | from swh.storage.algos.snapshot import snapshot_get_all_branches | ||||
from . import converters | from . import converters, utils | ||||
def ignore_branch_name(branch_name: bytes) -> bool: | def ignore_branch_name(branch_name: bytes) -> bool: | ||||
"""Should the git loader ignore the branch named `branch_name`?""" | """Should the git loader ignore the branch named `branch_name`?""" | ||||
if branch_name.endswith(b"^{}"): | if branch_name.endswith(b"^{}"): | ||||
# Peeled refs make the git protocol explode | # Peeled refs make the git protocol explode | ||||
return True | return True | ||||
elif branch_name.startswith(b"refs/pull/") and branch_name.endswith(b"/merge"): | elif branch_name.startswith(b"refs/pull/") and branch_name.endswith(b"/merge"): | ||||
▲ Show 20 Lines • Show All 407 Lines • ▼ Show 20 Lines | def get_snapshot(self) -> Snapshot: | ||||
target_type=converters.DULWICH_TARGET_TYPES[object_type], | target_type=converters.DULWICH_TARGET_TYPES[object_type], | ||||
) | ) | ||||
else: | else: | ||||
# The object pointed at by this ref was not fetched, supposedly | # The object pointed at by this ref was not fetched, supposedly | ||||
# because it existed in the base snapshot. We record it here, | # because it existed in the base snapshot. We record it here, | ||||
# and we can get it from the base snapshot later. | # and we can get it from the base snapshot later. | ||||
unfetched_refs[ref_name] = target | unfetched_refs[ref_name] = target | ||||
dangling_branches = {} | |||||
# Handle symbolic references as alias branches | # Handle symbolic references as alias branches | ||||
for ref_name, target in self.symbolic_refs.items(): | for ref_name, target in self.symbolic_refs.items(): | ||||
branches[ref_name] = SnapshotBranch( | branches[ref_name] = SnapshotBranch( | ||||
target_type=TargetType.ALIAS, target=target, | target_type=TargetType.ALIAS, target=target, | ||||
) | ) | ||||
if target not in branches and target not in unfetched_refs: | |||||
# This handles the case where the pointer is "dangling". | |||||
# There's a chance that a further symbolic reference | |||||
# override this default value, which is totally fine. | |||||
dangling_branches[target] = ref_name | |||||
branches[target] = None | |||||
if unfetched_refs: | if unfetched_refs: | ||||
# Handle inference of object types from the contents of the | # Handle inference of object types from the contents of the | ||||
# previous snapshot | # previous snapshot | ||||
unknown_objects = {} | unknown_objects = {} | ||||
base_snapshot_reverse_branches = { | base_snapshot_reverse_branches = { | ||||
branch.target: branch | branch.target: branch | ||||
for branch in self.base_snapshot.branches.values() | for branch in self.base_snapshot.branches.values() | ||||
Show All 15 Lines | def get_snapshot(self) -> Snapshot: | ||||
% ( | % ( | ||||
", ".join( | ", ".join( | ||||
f"{name.decode()}: {hashutil.hash_to_hex(obj)}" | f"{name.decode()}: {hashutil.hash_to_hex(obj)}" | ||||
for name, obj in unknown_objects.items() | for name, obj in unknown_objects.items() | ||||
) | ) | ||||
) | ) | ||||
) | ) | ||||
utils.warn_dangling_branches( | |||||
branches, dangling_branches, self.log, self.origin_url | |||||
) | |||||
self.snapshot = Snapshot(branches=branches) | self.snapshot = Snapshot(branches=branches) | ||||
return self.snapshot | return self.snapshot | ||||
def get_fetch_history_result(self) -> Dict[str, int]: | def get_fetch_history_result(self) -> Dict[str, int]: | ||||
return { | return { | ||||
"contents": len(self.type_to_ids[b"blob"]), | "contents": len(self.type_to_ids[b"blob"]), | ||||
"directories": len(self.type_to_ids[b"tree"]), | "directories": len(self.type_to_ids[b"tree"]), | ||||
"revisions": len(self.type_to_ids[b"commit"]), | "revisions": len(self.type_to_ids[b"commit"]), | ||||
Show All 38 Lines |