Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/git/loader.py
Show All 29 Lines | from swh.model.model import ( | ||||
Sha1Git, | Sha1Git, | ||||
) | ) | ||||
from swh.loader.core.loader import DVCSLoader | from swh.loader.core.loader import DVCSLoader | ||||
from swh.storage.algos.snapshot import snapshot_get_all_branches | from swh.storage.algos.snapshot import snapshot_get_all_branches | ||||
from . import converters, utils | from . import converters, utils | ||||
def ignore_branch_name(branch_name: bytes) -> bool: | |||||
"""Should the git loader ignore the branch named `branch_name`?""" | |||||
if branch_name.endswith(b"^{}"): | |||||
# Peeled refs make the git protocol explode | |||||
return True | |||||
elif branch_name.startswith(b"refs/pull/") and branch_name.endswith(b"/merge"): | |||||
# We filter-out auto-merged GitHub pull requests | |||||
return True | |||||
return False | |||||
def filter_refs(refs: Dict[bytes, bytes]) -> Dict[bytes, bytes]: | |||||
"""Filter the refs dictionary using the policy set in `ignore_branch_name`""" | |||||
return { | |||||
name: target for name, target in refs.items() if not ignore_branch_name(name) | |||||
} | |||||
class RepoRepresentation: | class RepoRepresentation: | ||||
"""Repository representation for a Software Heritage origin.""" | """Repository representation for a Software Heritage origin.""" | ||||
def __init__( | def __init__( | ||||
self, storage, base_snapshot: Optional[Snapshot] = None, ignore_history=False | self, storage, base_snapshot: Optional[Snapshot] = None, ignore_history=False | ||||
): | ): | ||||
self.storage = storage | self.storage = storage | ||||
self.ignore_history = ignore_history | self.ignore_history = ignore_history | ||||
Show All 29 Lines | def determine_wants(self, refs: Dict[bytes, bytes]) -> List[bytes]: | ||||
continue | continue | ||||
local_heads.add(hashutil.hash_to_hex(branch.target).encode()) | local_heads.add(hashutil.hash_to_hex(branch.target).encode()) | ||||
self.heads = local_heads | self.heads = local_heads | ||||
# Get the remote heads that we want to fetch | # Get the remote heads that we want to fetch | ||||
remote_heads: Set[bytes] = set() | remote_heads: Set[bytes] = set() | ||||
for ref_name, ref_target in refs.items(): | for ref_name, ref_target in refs.items(): | ||||
if ignore_branch_name(ref_name): | if utils.ignore_branch_name(ref_name): | ||||
continue | continue | ||||
remote_heads.add(ref_target) | remote_heads.add(ref_target) | ||||
return list(remote_heads - local_heads) | return list(remote_heads - local_heads) | ||||
@dataclass | @dataclass | ||||
class FetchPackReturn: | class FetchPackReturn: | ||||
▲ Show 20 Lines • Show All 87 Lines • ▼ Show 20 Lines | ) -> FetchPackReturn: | ||||
remote_refs = pack_result.refs or {} | remote_refs = pack_result.refs or {} | ||||
symbolic_refs = pack_result.symrefs or {} | symbolic_refs = pack_result.symrefs or {} | ||||
pack_buffer.flush() | pack_buffer.flush() | ||||
pack_size = pack_buffer.tell() | pack_size = pack_buffer.tell() | ||||
pack_buffer.seek(0) | pack_buffer.seek(0) | ||||
return FetchPackReturn( | return FetchPackReturn( | ||||
remote_refs=filter_refs(remote_refs), | remote_refs=utils.filter_refs(remote_refs), | ||||
symbolic_refs=filter_refs(symbolic_refs), | symbolic_refs=utils.filter_refs(symbolic_refs), | ||||
pack_buffer=pack_buffer, | pack_buffer=pack_buffer, | ||||
pack_size=pack_size, | pack_size=pack_size, | ||||
) | ) | ||||
def list_pack( | def list_pack( | ||||
self, pack_data, pack_size | self, pack_data, pack_size | ||||
) -> Tuple[Dict[bytes, bytes], Dict[bytes, Set[bytes]]]: | ) -> Tuple[Dict[bytes, bytes], Dict[bytes, Set[bytes]]]: | ||||
id_to_type = {} | id_to_type = {} | ||||
▲ Show 20 Lines • Show All 336 Lines • Show Last 20 Lines |