Changeset View
Standalone View
swh/loader/mercurial/hgutil.py
# Copyright (C) 2020-2021 The Software Heritage developers | # Copyright (C) 2020-2021 The Software Heritage developers | ||||||||||||||||||||||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||||||||||||||||||||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||||||||||||||||||||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||||||||||||||||||||||
from collections import defaultdict | |||||||||||||||||||||||||
import io | import io | ||||||||||||||||||||||||
import itertools | |||||||||||||||||||||||||
import os | import os | ||||||||||||||||||||||||
import signal | import signal | ||||||||||||||||||||||||
import time | import time | ||||||||||||||||||||||||
import traceback | import traceback | ||||||||||||||||||||||||
from typing import Dict, NewType | from typing import List, Mapping, NewType, Set, Tuple | ||||||||||||||||||||||||
from billiard import Process, Queue | from billiard import Process, Queue | ||||||||||||||||||||||||
# The internal Mercurial API is not guaranteed to be stable. | # The internal Mercurial API is not guaranteed to be stable. | ||||||||||||||||||||||||
from mercurial import context, error, hg, smartset, util # type: ignore | from mercurial import bookmarks, context, error, hg, smartset, util # type: ignore | ||||||||||||||||||||||||
import mercurial.ui # type: ignore | import mercurial.ui # type: ignore | ||||||||||||||||||||||||
NULLID = mercurial.node.nullid | NULLID = mercurial.node.nullid | ||||||||||||||||||||||||
HgNodeId = NewType("HgNodeId", bytes) | HgNodeId = NewType("HgNodeId", bytes) | ||||||||||||||||||||||||
Repository = hg.localrepo | Repository = hg.localrepo | ||||||||||||||||||||||||
BaseContext = context.basectx | BaseContext = context.basectx | ||||||||||||||||||||||||
LRUCacheDict = util.lrucachedict | LRUCacheDict = util.lrucachedict | ||||||||||||||||||||||||
HgSpanSet = smartset._spanset | HgSpanSet = smartset._spanset | ||||||||||||||||||||||||
HgFilteredSet = smartset.filteredset | HgFilteredSet = smartset.filteredset | ||||||||||||||||||||||||
LookupError = error.LookupError | LookupError = error.LookupError | ||||||||||||||||||||||||
def repository(path: str) -> hg.localrepo: | def repository(path: str) -> hg.localrepo: | ||||||||||||||||||||||||
ui = mercurial.ui.ui.load() | ui = mercurial.ui.ui.load() | ||||||||||||||||||||||||
return hg.repository(ui, path.encode()) | return hg.repository(ui, path.encode()) | ||||||||||||||||||||||||
def branches(repo: hg.localrepo) -> Dict[bytes, HgNodeId]: | MultipleMapping = Mapping[bytes, List[HgNodeId]] | ||||||||||||||||||||||||
"""List repository named branches and their tip node.""" | SingleMapping = Mapping[bytes, HgNodeId] | ||||||||||||||||||||||||
result = {} | |||||||||||||||||||||||||
for tag, heads, tip, isclosed in repo.branchmap().iterbranches(): | |||||||||||||||||||||||||
result[tag] = tip | def branches_info( | ||||||||||||||||||||||||
olasd: same here, `s/blacklist/ignored/g` | |||||||||||||||||||||||||
return result | repo: hg.localrepo, blacklist: Set[int] | ||||||||||||||||||||||||
Done Inline ActionsThis is probably worth using a dataclass, so we know which tuple elements mean what directly. olasd: This is probably worth using a `dataclass`, so we know which tuple elements mean what directly. | |||||||||||||||||||||||||
) -> (Tuple[SingleMapping, MultipleMapping, MultipleMapping, SingleMapping]): | |||||||||||||||||||||||||
"""Lists all relevant information about branch heads and bookmarks, grouped by type. | |||||||||||||||||||||||||
Branch tip: tip-most open head | |||||||||||||||||||||||||
Branch open heads: all open heads of the given branch | |||||||||||||||||||||||||
Branch closed heads: all closed heads of the given branch | |||||||||||||||||||||||||
Bookmarks: all bookmarks in the repository (except local divergent ones) | |||||||||||||||||||||||||
Categories may have overlapping nodes: a branch tip can be a closed branch head | |||||||||||||||||||||||||
Done Inline ActionsThe docstring is missing a description of what the ignored list does. olasd: The docstring is missing a description of what the ignored list does. | |||||||||||||||||||||||||
and have a bookmark on it, for example. | |||||||||||||||||||||||||
""" | |||||||||||||||||||||||||
branch_tips = {} | |||||||||||||||||||||||||
branch_open_heads = defaultdict(list) | |||||||||||||||||||||||||
branch_closed_heads = defaultdict(list) | |||||||||||||||||||||||||
all_bookmarks = bookmarks.listbookmarks(repo) | |||||||||||||||||||||||||
for branch_name, heads in repo.branchmap().items(): | |||||||||||||||||||||||||
# Sort the heads by node id since it's stable and doesn't depend on local | |||||||||||||||||||||||||
# topology like cloning order. | |||||||||||||||||||||||||
tip = sorted(heads)[0] | |||||||||||||||||||||||||
ctx = repo[tip] | |||||||||||||||||||||||||
if ctx.rev() not in blacklist and not ctx.closesbranch(): | |||||||||||||||||||||||||
branch_tips[branch_name] = ctx.node() | |||||||||||||||||||||||||
for head in heads: | |||||||||||||||||||||||||
head = repo[head] | |||||||||||||||||||||||||
if head.rev() in blacklist: | |||||||||||||||||||||||||
# This revision or one of its ancestors is corrupted, ignore it | |||||||||||||||||||||||||
continue | |||||||||||||||||||||||||
node_id = head.node() | |||||||||||||||||||||||||
if head.closesbranch(): | |||||||||||||||||||||||||
branch_closed_heads[branch_name].append(node_id) | |||||||||||||||||||||||||
else: | |||||||||||||||||||||||||
branch_open_heads[branch_name].append(node_id) | |||||||||||||||||||||||||
if branch_tips.get(b"default") is None: | |||||||||||||||||||||||||
# `default`'s tip is corrupted, take the first revision that works since this is | |||||||||||||||||||||||||
Not Done Inline Actions
I would suggest moving the assignation of the "branch tip" inside the heads loop, rather than only considering the "first" head. I'm also a bit surprised that mercurial doesn't provide a consistent sorting of the heads. How would it pick on which one one would do the next commit? olasd: I would suggest moving the assignation of the "branch tip" inside the heads loop, rather than… | |||||||||||||||||||||||||
Done Inline ActionsThis is probably simpler yep.
Mercurial's sorting is consistent per-repo. If you re-clone a repository, the order of branches or heads isn't guaranteed, since there are multiple valid topologies of any given repo and one of them might be more optimal for the current wire-protocol, or arbitrarily chosen. Alphare: This is probably simpler yep.
> I'm also a bit surprised that mercurial doesn't provide a… | |||||||||||||||||||||||||
Done Inline Actions
Sorry, I haven't answered that question, but I'm not sure I understand it, since it's going to apply it on top of whichever head you're on. Alphare: > How would it pick on which one one would do the next commit?
Sorry, I haven't answered that… | |||||||||||||||||||||||||
Not Done Inline ActionsAck. I'm still a bit surprised that the "tipmost head" of a branch depends on the order of operations rather than an actual property of the changeset, but if that's the way it is... olasd: Ack. I'm still a bit surprised that the "tipmost head" of a branch depends on the order of… | |||||||||||||||||||||||||
Done Inline ActionsYeah, tip is an approximation of "what is most recent is probably the thing you want", but is kind of hellish if you think about it too much. Mercurial discourages you from having multiple heads on a branch, and we (Octobus with Heptapod) are even stricter than this. Alphare: Yeah, `tip` is an approximation of "what is most recent is probably the thing you want", but is… | |||||||||||||||||||||||||
# a pretty broken repo. First try with the ones we've just collected since they | |||||||||||||||||||||||||
# might be good candidates for `HEAD`. | |||||||||||||||||||||||||
try: | |||||||||||||||||||||||||
first_working_revision = next( | |||||||||||||||||||||||||
itertools.chain( | |||||||||||||||||||||||||
branch_tips.values(), | |||||||||||||||||||||||||
Not Done Inline ActionsDo we really need to generate a fake "default" branch here? (or worse, erase an existing one) I'd suggest returning a "default rev pointer", in addition to the lists of branches, bookmarks, etc., which would either point to the @ bookmark, or to the tip of the default branch. olasd: Do we really need to generate a fake "default" branch here? (or worse, erase an existing one)… | |||||||||||||||||||||||||
Done Inline ActionsI should have thought about this :) Alphare: I should have thought about this :) | |||||||||||||||||||||||||
itertools.chain.from_iterable(branch_open_heads.values()), | |||||||||||||||||||||||||
itertools.chain.from_iterable(branch_closed_heads.values()), | |||||||||||||||||||||||||
branch_tips.values(), | |||||||||||||||||||||||||
) | |||||||||||||||||||||||||
) | |||||||||||||||||||||||||
branch_tips[b"default"] = first_working_revision | |||||||||||||||||||||||||
except StopIteration: | |||||||||||||||||||||||||
# No noteworthy revisions could be found, so take the first one. | |||||||||||||||||||||||||
# If your first revision is broken, the repo is useless, so it's okay to | |||||||||||||||||||||||||
# crash the loader since that would realistically never happen. But we've | |||||||||||||||||||||||||
# gated this function to only be called when at least one revision was | |||||||||||||||||||||||||
# loaded, so this will always be fine. | |||||||||||||||||||||||||
branch_tips[b"default"] = repo[0].node() | |||||||||||||||||||||||||
if all(len(h) == 1 for h in branch_open_heads.values()): | |||||||||||||||||||||||||
# The most common case is one head per branch. Simplifying this means we have | |||||||||||||||||||||||||
# less duplicate data, because open heads are the same as open branch tips. | |||||||||||||||||||||||||
# We don't do more complex deduplication, this is just a simple optimization. | |||||||||||||||||||||||||
branch_open_heads.clear() | |||||||||||||||||||||||||
return branch_tips, branch_open_heads, branch_closed_heads, all_bookmarks | |||||||||||||||||||||||||
Not Done Inline ActionsI think I would rather return a snapshot with no default (HEAD) branch than to pick one arbitrarily. What would mercurial itself do if I were to clone a repo with neither a default branch nor an @ bookmark? olasd: I think I would rather return a snapshot with no default (`HEAD`) branch than to pick one… | |||||||||||||||||||||||||
Done Inline Actions
If that's an option, I'm happy to oblige.
It takes the tip of the first branch it sees in the branchmap, looks like. $ hg init no-default $ cd no-default $ hg branch stable $ touch a $ hg commit -Adm "Initial" $ hg branches stable $ cd .. $ hg clone no-default no-default-clone updating to branch stable 1 files updated, 0 files merged, 0 files removed, 0 files unresolved Alphare: > I think I would rather return a snapshot with no default (HEAD) branch than to pick one… | |||||||||||||||||||||||||
Done Inline ActionsI found the documentation: Mercurial will update the working directory to the first applicable revision from this list: a) null if -U or the source repository has no changesets b) if -u . and the source repository is local, the first parent of the source repository's working directory c) the changeset specified with -u (if a branch name, this means the latest head of that branch) d) the changeset specified with -r e) the tipmost head specified with -b f) the tipmost head specified with the url#branch source syntax g) the revision marked with the '@' bookmark, if present h) the tipmost head of the default branch i) tip Alphare: I found the documentation:
Mercurial will update the working directory to the first… | |||||||||||||||||||||||||
Not Done Inline ActionsAnd, what you're saying is that the "tipmost head" for a given branch depends on the local order of operations on the repository? (sorry for being slow, I'm just trying to make sure we use the most faithful representation) olasd: And, what you're saying is that the "tipmost head" for a given branch depends on the local… | |||||||||||||||||||||||||
Done Inline ActionsThat is correct. Alphare: That is correct. | |||||||||||||||||||||||||
Not Done Inline ActionsThen I'm fine with you just implementing g), h) and i), and generating a snapshot with no HEAD if neither of these yields a reference to a changeset. olasd: Then I'm fine with you just implementing g), h) and i), and generating a snapshot with no HEAD… | |||||||||||||||||||||||||
class CloneTimeout(Exception): | class CloneTimeout(Exception): | ||||||||||||||||||||||||
pass | pass | ||||||||||||||||||||||||
class CloneFailure(Exception): | class CloneFailure(Exception): | ||||||||||||||||||||||||
Not Done Inline ActionsShouldn't we remove all the members of branch_open_heads with a single value instead (this would avoid generating branch-heads/foo/0 for all branches when there's a single one with multiple open heads)? olasd: Shouldn't we remove all the members of `branch_open_heads` with a single value instead (this… | |||||||||||||||||||||||||
Done Inline ActionsYep, probably better Alphare: Yep, probably better | |||||||||||||||||||||||||
pass | pass | ||||||||||||||||||||||||
def _clone_task(src: str, dest: str, errors: Queue) -> None: | def _clone_task(src: str, dest: str, errors: Queue) -> None: | ||||||||||||||||||||||||
"""Clone task to run in a subprocess. | """Clone task to run in a subprocess. | ||||||||||||||||||||||||
Args: | Args: | ||||||||||||||||||||||||
Done Inline ActionsIf you apply my other change, these lists should already be sorted. olasd: If you apply my other change, these lists should already be sorted. | |||||||||||||||||||||||||
src: clone source | src: clone source | ||||||||||||||||||||||||
dest: clone destination | dest: clone destination | ||||||||||||||||||||||||
errors: message queue to communicate errors | errors: message queue to communicate errors | ||||||||||||||||||||||||
""" | """ | ||||||||||||||||||||||||
try: | try: | ||||||||||||||||||||||||
hg.clone( | hg.clone( | ||||||||||||||||||||||||
ui=mercurial.ui.ui.load(), | ui=mercurial.ui.ui.load(), | ||||||||||||||||||||||||
peeropts={}, | peeropts={}, | ||||||||||||||||||||||||
▲ Show 20 Lines • Show All 41 Lines • Show Last 20 Lines |
same here, s/blacklist/ignored/g