Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/mercurial/hgutil.py
# Copyright (C) 2020-2021 The Software Heritage developers | # Copyright (C) 2020-2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from collections import defaultdict | |||||
import io | import io | ||||
import itertools | |||||
import os | import os | ||||
import signal | import signal | ||||
import time | import time | ||||
import traceback | import traceback | ||||
from typing import Dict, NewType | from typing import List, Mapping, NewType, Set, Tuple | ||||
from billiard import Process, Queue | from billiard import Process, Queue | ||||
# The internal Mercurial API is not guaranteed to be stable. | # The internal Mercurial API is not guaranteed to be stable. | ||||
from mercurial import context, error, hg, smartset, util # type: ignore | from mercurial import bookmarks, context, error, hg, smartset, util # type: ignore | ||||
import mercurial.ui # type: ignore | import mercurial.ui # type: ignore | ||||
NULLID = mercurial.node.nullid | NULLID = mercurial.node.nullid | ||||
HgNodeId = NewType("HgNodeId", bytes) | HgNodeId = NewType("HgNodeId", bytes) | ||||
Repository = hg.localrepo | Repository = hg.localrepo | ||||
BaseContext = context.basectx | BaseContext = context.basectx | ||||
LRUCacheDict = util.lrucachedict | LRUCacheDict = util.lrucachedict | ||||
HgSpanSet = smartset._spanset | HgSpanSet = smartset._spanset | ||||
HgFilteredSet = smartset.filteredset | HgFilteredSet = smartset.filteredset | ||||
LookupError = error.LookupError | LookupError = error.LookupError | ||||
def repository(path: str) -> hg.localrepo: | def repository(path: str) -> hg.localrepo: | ||||
ui = mercurial.ui.ui.load() | ui = mercurial.ui.ui.load() | ||||
return hg.repository(ui, path.encode()) | return hg.repository(ui, path.encode()) | ||||
def branches(repo: hg.localrepo) -> Dict[bytes, HgNodeId]: | MultipleMapping = Mapping[bytes, List[HgNodeId]] | ||||
"""List repository named branches and their tip node.""" | SingleMapping = Mapping[bytes, HgNodeId] | ||||
result = {} | |||||
for tag, heads, tip, isclosed in repo.branchmap().iterbranches(): | |||||
result[tag] = tip | def branches_info( | ||||
return result | repo: hg.localrepo, blacklist: Set[int] | ||||
) -> (Tuple[SingleMapping, MultipleMapping, MultipleMapping, SingleMapping]): | |||||
"""Lists all relevant information about branch heads and bookmarks, grouped by type. | |||||
Branch tip: tip-most open head | |||||
Branch open heads: all open heads of the given branch | |||||
Branch closed heads: all closed heads of the given branch | |||||
Bookmarks: all bookmarks in the repository (except local divergent ones) | |||||
Categories may have overlapping nodes: a branch tip can be a closed branch head | |||||
and have a bookmark on it, for example. | |||||
""" | |||||
branch_tips = {} | |||||
branch_open_heads = defaultdict(list) | |||||
branch_closed_heads = defaultdict(list) | |||||
all_bookmarks = bookmarks.listbookmarks(repo) | |||||
for branch_name, heads in repo.branchmap().items(): | |||||
# Sort the heads by node id since it's stable and doesn't depend on local | |||||
# topology like cloning order. | |||||
tip = sorted(heads)[0] | |||||
ctx = repo[tip] | |||||
if ctx.rev() not in blacklist and not ctx.closesbranch(): | |||||
branch_tips[branch_name] = ctx.node() | |||||
for head in heads: | |||||
head = repo[head] | |||||
if head.rev() in blacklist: | |||||
# This revision or one of its ancestors is corrupted, ignore it | |||||
continue | |||||
node_id = head.node() | |||||
if head.closesbranch(): | |||||
branch_closed_heads[branch_name].append(node_id) | |||||
else: | |||||
branch_open_heads[branch_name].append(node_id) | |||||
if branch_tips.get(b"default") is None: | |||||
# `default`'s tip is corrupted, take the first revision that works since this is | |||||
# a pretty broken repo. First try with the ones we've just collected since they | |||||
# might be good candidates for `HEAD`. | |||||
try: | |||||
first_working_revision = next( | |||||
itertools.chain( | |||||
branch_tips.values(), | |||||
itertools.chain.from_iterable(branch_open_heads.values()), | |||||
itertools.chain.from_iterable(branch_closed_heads.values()), | |||||
branch_tips.values(), | |||||
) | |||||
) | |||||
branch_tips[b"default"] = first_working_revision | |||||
except StopIteration: | |||||
# No noteworthy revisions could be found, so take the first one. | |||||
# If your first revision is broken, the repo is useless, so it's okay to | |||||
# crash the loader since that would realistically never happen. But we've | |||||
# gated this function to only be called when at least one revision was | |||||
# loaded, so this will always be fine. | |||||
branch_tips[b"default"] = repo[0].node() | |||||
if all(len(h) == 1 for h in branch_open_heads.values()): | |||||
# The most common case is one head per branch. Simplifying this means we have | |||||
# less duplicate data, because open heads are the same as open branch tips. | |||||
# We don't do more complex deduplication, this is just a simple optimization. | |||||
branch_open_heads.clear() | |||||
return branch_tips, branch_open_heads, branch_closed_heads, all_bookmarks | |||||
class CloneTimeout(Exception): | class CloneTimeout(Exception): | ||||
pass | pass | ||||
class CloneFailure(Exception): | class CloneFailure(Exception): | ||||
pass | pass | ||||
▲ Show 20 Lines • Show All 55 Lines • Show Last 20 Lines |