Differential D5815 Diff 20760 swh/loader/mercurial/hgutil.py

Changeset View

Standalone View

swh/loader/mercurial/hgutil.py

	# Copyright (C) 2020-2021 The Software Heritage developers			# Copyright (C) 2020-2021 The Software Heritage developers
	# See the AUTHORS file at the top-level directory of this distribution			# See the AUTHORS file at the top-level directory of this distribution
	# License: GNU General Public License version 3, or any later version			# License: GNU General Public License version 3, or any later version
	# See top-level LICENSE file for more information			# See top-level LICENSE file for more information
				from collections import defaultdict
	import io			import io
				import itertools
	import os			import os
	import signal			import signal
	import time			import time
	import traceback			import traceback
	from typing import Dict, NewType			from typing import List, Mapping, NewType, Set, Tuple

	from billiard import Process, Queue			from billiard import Process, Queue

	# The internal Mercurial API is not guaranteed to be stable.			# The internal Mercurial API is not guaranteed to be stable.
	from mercurial import context, error, hg, smartset, util # type: ignore			from mercurial import bookmarks, context, error, hg, smartset, util # type: ignore
	import mercurial.ui # type: ignore			import mercurial.ui # type: ignore

	NULLID = mercurial.node.nullid			NULLID = mercurial.node.nullid
	HgNodeId = NewType("HgNodeId", bytes)			HgNodeId = NewType("HgNodeId", bytes)
	Repository = hg.localrepo			Repository = hg.localrepo
	BaseContext = context.basectx			BaseContext = context.basectx
	LRUCacheDict = util.lrucachedict			LRUCacheDict = util.lrucachedict
	HgSpanSet = smartset._spanset			HgSpanSet = smartset._spanset
	HgFilteredSet = smartset.filteredset			HgFilteredSet = smartset.filteredset
	LookupError = error.LookupError			LookupError = error.LookupError


	def repository(path: str) -> hg.localrepo:			def repository(path: str) -> hg.localrepo:
	ui = mercurial.ui.ui.load()			ui = mercurial.ui.ui.load()
	return hg.repository(ui, path.encode())			return hg.repository(ui, path.encode())


	def branches(repo: hg.localrepo) -> Dict[bytes, HgNodeId]:			MultipleMapping = Mapping[bytes, List[HgNodeId]]
	"""List repository named branches and their tip node."""			SingleMapping = Mapping[bytes, HgNodeId]
	result = {}
	for tag, heads, tip, isclosed in repo.branchmap().iterbranches():
	result[tag] = tip			def branches_info(
	return result			repo: hg.localrepo, blacklist: Set[int]
				) -> (Tuple[SingleMapping, MultipleMapping, MultipleMapping, SingleMapping]):
				"""Lists all relevant information about branch heads and bookmarks, grouped by type.

				Branch tip: tip-most open head
				Branch open heads: all open heads of the given branch
				Branch closed heads: all closed heads of the given branch
				Bookmarks: all bookmarks in the repository (except local divergent ones)

				Categories may have overlapping nodes: a branch tip can be a closed branch head
				and have a bookmark on it, for example.
				"""
				branch_tips = {}
				branch_open_heads = defaultdict(list)
				branch_closed_heads = defaultdict(list)
				all_bookmarks = bookmarks.listbookmarks(repo)

				for branch_name, heads in repo.branchmap().items():
				# Sort the heads by node id since it's stable and doesn't depend on local
				# topology like cloning order.
				tip = sorted(heads)[0]
				ctx = repo[tip]
				if ctx.rev() not in blacklist and not ctx.closesbranch():
				branch_tips[branch_name] = ctx.node()
				for head in heads:
				head = repo[head]
				if head.rev() in blacklist:
				# This revision or one of its ancestors is corrupted, ignore it
				continue
				node_id = head.node()
				if head.closesbranch():
				branch_closed_heads[branch_name].append(node_id)
				else:
				branch_open_heads[branch_name].append(node_id)

				if branch_tips.get(b"default") is None:
				# `default`'s tip is corrupted, take the first revision that works since this is
				# a pretty broken repo. First try with the ones we've just collected since they
				# might be good candidates for `HEAD`.
				try:
				first_working_revision = next(
				itertools.chain(
				branch_tips.values(),
				itertools.chain.from_iterable(branch_open_heads.values()),
				itertools.chain.from_iterable(branch_closed_heads.values()),
				branch_tips.values(),
				)
				)
				branch_tips[b"default"] = first_working_revision
				except StopIteration:
				# No noteworthy revisions could be found, so take the first one.
				# If your first revision is broken, the repo is useless, so it's okay to
				# crash the loader since that would realistically never happen. But we've
				# gated this function to only be called when at least one revision was
				# loaded, so this will always be fine.
				branch_tips[b"default"] = repo[0].node()

				if all(len(h) == 1 for h in branch_open_heads.values()):
				# The most common case is one head per branch. Simplifying this means we have
				# less duplicate data, because open heads are the same as open branch tips.
				# We don't do more complex deduplication, this is just a simple optimization.
				branch_open_heads.clear()
				return branch_tips, branch_open_heads, branch_closed_heads, all_bookmarks


	class CloneTimeout(Exception):			class CloneTimeout(Exception):
	pass			pass


	class CloneFailure(Exception):			class CloneFailure(Exception):
	pass			pass
	▲ Show 20 Lines • Show All 55 Lines • Show Last 20 Lines