diff --git a/swh/loader/git/loader.py b/swh/loader/git/loader.py --- a/swh/loader/git/loader.py +++ b/swh/loader/git/loader.py @@ -38,6 +38,8 @@ logger = logging.getLogger(__name__) +HexBytes = bytes + class RepoRepresentation: """Repository representation for a Software Heritage origin.""" @@ -53,7 +55,7 @@ else: self.base_snapshot = Snapshot(branches={}) - self.heads: Set[bytes] = set() + self.heads: Set[HexBytes] = set() def get_parents(self, commit: bytes) -> List[bytes]: """This method should return the list of known parents""" @@ -62,7 +64,7 @@ def graph_walker(self) -> ObjectStoreGraphWalker: return ObjectStoreGraphWalker(self.heads, self.get_parents) - def determine_wants(self, refs: Dict[bytes, bytes]) -> List[bytes]: + def determine_wants(self, refs: Dict[bytes, HexBytes]) -> List[HexBytes]: """Get the list of bytehex sha1s that the git loader should fetch. This compares the remote refs sent by the server with the base snapshot @@ -73,7 +75,7 @@ return [] # Cache existing heads - local_heads: Set[bytes] = set() + local_heads: Set[HexBytes] = set() for branch_name, branch in self.base_snapshot.branches.items(): if not branch or branch.target_type == TargetType.ALIAS: continue @@ -82,7 +84,7 @@ self.heads = local_heads # Get the remote heads that we want to fetch - remote_heads: Set[bytes] = set() + remote_heads: Set[HexBytes] = set() for ref_name, ref_target in refs.items(): if utils.ignore_branch_name(ref_name): continue @@ -93,8 +95,8 @@ @dataclass class FetchPackReturn: - remote_refs: Dict[bytes, bytes] - symbolic_refs: Dict[bytes, bytes] + remote_refs: Dict[bytes, HexBytes] + symbolic_refs: Dict[bytes, HexBytes] pack_buffer: SpooledTemporaryFile pack_size: int @@ -136,8 +138,8 @@ self.pack_size_bytes = pack_size_bytes self.temp_file_cutoff = temp_file_cutoff # state initialized in fetch_data - self.remote_refs: Dict[bytes, bytes] = {} - self.symbolic_refs: Dict[bytes, bytes] = {} + self.remote_refs: Dict[bytes, HexBytes] = {} + self.symbolic_refs: Dict[bytes, HexBytes] = {} self.ref_object_types: Dict[bytes, Optional[TargetType]] = {} def fetch_pack_from_origin( @@ -274,7 +276,9 @@ self.symbolic_refs = fetch_info.symbolic_refs self.log.info( - "Listed %d refs for repo %s" % (len(self.remote_refs), self.origin.url), + "Listed %d refs for repo %s", + len(self.remote_refs), + self.origin.url, extra={ "swh_type": "git_repo_list_refs", "swh_repo": self.origin.url,