Changeset View
Standalone View
swh/loader/mercurial/hgutil.py
# Copyright (C) 2020-2021 The Software Heritage developers | # Copyright (C) 2020-2021 The Software Heritage developers | ||||||||||||||||||||||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||||||||||||||||||||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||||||||||||||||||||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||||||||||||||||||||||
from collections import defaultdict | |||||||||||||||||||||||||
from dataclasses import dataclass | |||||||||||||||||||||||||
import io | import io | ||||||||||||||||||||||||
import os | import os | ||||||||||||||||||||||||
import signal | import signal | ||||||||||||||||||||||||
import time | import time | ||||||||||||||||||||||||
import traceback | import traceback | ||||||||||||||||||||||||
from typing import Dict, NewType | from typing import Dict, List, Mapping, NewType, Optional, Set | ||||||||||||||||||||||||
from billiard import Process, Queue | from billiard import Process, Queue | ||||||||||||||||||||||||
# The internal Mercurial API is not guaranteed to be stable. | # The internal Mercurial API is not guaranteed to be stable. | ||||||||||||||||||||||||
from mercurial import context, error, hg, smartset, util # type: ignore | from mercurial import bookmarks, context, error, hg, smartset, util # type: ignore | ||||||||||||||||||||||||
import mercurial.ui # type: ignore | import mercurial.ui # type: ignore | ||||||||||||||||||||||||
NULLID = mercurial.node.nullid | NULLID = mercurial.node.nullid | ||||||||||||||||||||||||
HgNodeId = NewType("HgNodeId", bytes) | HgNodeId = NewType("HgNodeId", bytes) | ||||||||||||||||||||||||
Repository = hg.localrepo | Repository = hg.localrepo | ||||||||||||||||||||||||
BaseContext = context.basectx | BaseContext = context.basectx | ||||||||||||||||||||||||
LRUCacheDict = util.lrucachedict | LRUCacheDict = util.lrucachedict | ||||||||||||||||||||||||
HgSpanSet = smartset._spanset | HgSpanSet = smartset._spanset | ||||||||||||||||||||||||
HgFilteredSet = smartset.filteredset | HgFilteredSet = smartset.filteredset | ||||||||||||||||||||||||
LookupError = error.LookupError | LookupError = error.LookupError | ||||||||||||||||||||||||
def repository(path: str) -> hg.localrepo: | def repository(path: str) -> hg.localrepo: | ||||||||||||||||||||||||
ui = mercurial.ui.ui.load() | ui = mercurial.ui.ui.load() | ||||||||||||||||||||||||
return hg.repository(ui, path.encode()) | return hg.repository(ui, path.encode()) | ||||||||||||||||||||||||
def branches(repo: hg.localrepo) -> Dict[bytes, HgNodeId]: | @dataclass | ||||||||||||||||||||||||
"""List repository named branches and their tip node.""" | class BranchingInfo: | ||||||||||||||||||||||||
result = {} | tips: Mapping[bytes, HgNodeId] | ||||||||||||||||||||||||
for tag, heads, tip, isclosed in repo.branchmap().iterbranches(): | """The first head of the branch, sorted by nodeid if there are multiple heads.""" | ||||||||||||||||||||||||
result[tag] = tip | bookmarks: Mapping[bytes, HgNodeId] | ||||||||||||||||||||||||
olasd: same here, `s/blacklist/ignored/g` | |||||||||||||||||||||||||
return result | """all bookmarks in the repository (except local divergent ones)""" | ||||||||||||||||||||||||
Done Inline ActionsThis is probably worth using a dataclass, so we know which tuple elements mean what directly. olasd: This is probably worth using a `dataclass`, so we know which tuple elements mean what directly. | |||||||||||||||||||||||||
open_heads: Mapping[bytes, List[HgNodeId]] | |||||||||||||||||||||||||
"""All *open* heads of a given branch, sorted by nodeid""" | |||||||||||||||||||||||||
closed_heads: Mapping[bytes, List[HgNodeId]] | |||||||||||||||||||||||||
"""All *closed* heads of a given branch, sorted by nodeid, if any""" | |||||||||||||||||||||||||
default_branch_alias: Optional[bytes] | |||||||||||||||||||||||||
"""The default snapshot branch to show in the UI""" | |||||||||||||||||||||||||
def branching_info(repo: hg.localrepo, ignored: Set[int]) -> BranchingInfo: | |||||||||||||||||||||||||
Done Inline ActionsThe docstring is missing a description of what the ignored list does. olasd: The docstring is missing a description of what the ignored list does. | |||||||||||||||||||||||||
"""Lists all relevant information about branch heads and bookmarks, grouped by type. | |||||||||||||||||||||||||
`ignored`: Revisions that we ignore during loading because they are corrupted or | |||||||||||||||||||||||||
have a corrupted ancestor. | |||||||||||||||||||||||||
Categories may have overlapping nodes: a branch tip can be a closed branch head | |||||||||||||||||||||||||
and have a bookmark on it, for example. | |||||||||||||||||||||||||
""" | |||||||||||||||||||||||||
branch_tips: Dict[bytes, HgNodeId] = {} | |||||||||||||||||||||||||
branch_open_heads = defaultdict(list) | |||||||||||||||||||||||||
branch_closed_heads = defaultdict(list) | |||||||||||||||||||||||||
all_bookmarks = bookmarks.listbookmarks(repo) | |||||||||||||||||||||||||
for branch_name, heads in repo.branchmap().items(): | |||||||||||||||||||||||||
# Sort the heads by node id since it's stable and doesn't depend on local | |||||||||||||||||||||||||
# topology like cloning order. | |||||||||||||||||||||||||
for head in sorted(heads): | |||||||||||||||||||||||||
head = repo[head] | |||||||||||||||||||||||||
if head.rev() in ignored: | |||||||||||||||||||||||||
# This revision or one of its ancestors is corrupted, ignore it | |||||||||||||||||||||||||
continue | |||||||||||||||||||||||||
node_id = head.node() | |||||||||||||||||||||||||
if head.closesbranch(): | |||||||||||||||||||||||||
branch_closed_heads[branch_name].append(node_id) | |||||||||||||||||||||||||
else: | |||||||||||||||||||||||||
if not branch_tips.get(branch_name): | |||||||||||||||||||||||||
branch_tips[branch_name] = node_id | |||||||||||||||||||||||||
Not Done Inline Actions
I would suggest moving the assignation of the "branch tip" inside the heads loop, rather than only considering the "first" head. I'm also a bit surprised that mercurial doesn't provide a consistent sorting of the heads. How would it pick on which one one would do the next commit? olasd: I would suggest moving the assignation of the "branch tip" inside the heads loop, rather than… | |||||||||||||||||||||||||
Done Inline ActionsThis is probably simpler yep.
Mercurial's sorting is consistent per-repo. If you re-clone a repository, the order of branches or heads isn't guaranteed, since there are multiple valid topologies of any given repo and one of them might be more optimal for the current wire-protocol, or arbitrarily chosen. Alphare: This is probably simpler yep.
> I'm also a bit surprised that mercurial doesn't provide a… | |||||||||||||||||||||||||
Done Inline Actions
Sorry, I haven't answered that question, but I'm not sure I understand it, since it's going to apply it on top of whichever head you're on. Alphare: > How would it pick on which one one would do the next commit?
Sorry, I haven't answered that… | |||||||||||||||||||||||||
Not Done Inline ActionsAck. I'm still a bit surprised that the "tipmost head" of a branch depends on the order of operations rather than an actual property of the changeset, but if that's the way it is... olasd: Ack. I'm still a bit surprised that the "tipmost head" of a branch depends on the order of… | |||||||||||||||||||||||||
Done Inline ActionsYeah, tip is an approximation of "what is most recent is probably the thing you want", but is kind of hellish if you think about it too much. Mercurial discourages you from having multiple heads on a branch, and we (Octobus with Heptapod) are even stricter than this. Alphare: Yeah, `tip` is an approximation of "what is most recent is probably the thing you want", but is… | |||||||||||||||||||||||||
branch_open_heads[branch_name].append(node_id) | |||||||||||||||||||||||||
# The default revision is where the "@" bookmark is, or failing that the tip of the | |||||||||||||||||||||||||
# `default` branch. For our purposes we're trying to find a branch tip to alias to, | |||||||||||||||||||||||||
# so only return those if they are branch tips, otherwise don't bother. | |||||||||||||||||||||||||
default_rev_alias = None | |||||||||||||||||||||||||
Not Done Inline ActionsDo we really need to generate a fake "default" branch here? (or worse, erase an existing one) I'd suggest returning a "default rev pointer", in addition to the lists of branches, bookmarks, etc., which would either point to the @ bookmark, or to the tip of the default branch. olasd: Do we really need to generate a fake "default" branch here? (or worse, erase an existing one)… | |||||||||||||||||||||||||
Done Inline ActionsI should have thought about this :) Alphare: I should have thought about this :) | |||||||||||||||||||||||||
at_bookmark = all_bookmarks.get(b"@") | |||||||||||||||||||||||||
if at_bookmark is not None: | |||||||||||||||||||||||||
bookmark_at_branch = repo[at_bookmark].branch() | |||||||||||||||||||||||||
if branch_tips.get(bookmark_at_branch) is not None: | |||||||||||||||||||||||||
default_rev_alias = b"bookmarks/@" | |||||||||||||||||||||||||
if default_rev_alias is None and branch_tips.get(b"default") is not None: | |||||||||||||||||||||||||
default_rev_alias = b"branch-tip/default" | |||||||||||||||||||||||||
branches_with_one_head = set() | |||||||||||||||||||||||||
for branch, heads in branch_open_heads.items(): | |||||||||||||||||||||||||
if len(heads) == 1: | |||||||||||||||||||||||||
branches_with_one_head.add(branch) | |||||||||||||||||||||||||
# The most common case is one head per branch. Simplifying this means we have | |||||||||||||||||||||||||
# less duplicate data, because open heads are the same as open branch tips. | |||||||||||||||||||||||||
# We don't do more complex deduplication, this is just a simple optimization. | |||||||||||||||||||||||||
for branch in branches_with_one_head: | |||||||||||||||||||||||||
del branch_open_heads[branch] | |||||||||||||||||||||||||
return BranchingInfo( | |||||||||||||||||||||||||
tips=branch_tips, | |||||||||||||||||||||||||
Not Done Inline ActionsI think I would rather return a snapshot with no default (HEAD) branch than to pick one arbitrarily. What would mercurial itself do if I were to clone a repo with neither a default branch nor an @ bookmark? olasd: I think I would rather return a snapshot with no default (`HEAD`) branch than to pick one… | |||||||||||||||||||||||||
Done Inline Actions
If that's an option, I'm happy to oblige.
It takes the tip of the first branch it sees in the branchmap, looks like. $ hg init no-default $ cd no-default $ hg branch stable $ touch a $ hg commit -Adm "Initial" $ hg branches stable $ cd .. $ hg clone no-default no-default-clone updating to branch stable 1 files updated, 0 files merged, 0 files removed, 0 files unresolved Alphare: > I think I would rather return a snapshot with no default (HEAD) branch than to pick one… | |||||||||||||||||||||||||
Done Inline ActionsI found the documentation: Mercurial will update the working directory to the first applicable revision from this list: a) null if -U or the source repository has no changesets b) if -u . and the source repository is local, the first parent of the source repository's working directory c) the changeset specified with -u (if a branch name, this means the latest head of that branch) d) the changeset specified with -r e) the tipmost head specified with -b f) the tipmost head specified with the url#branch source syntax g) the revision marked with the '@' bookmark, if present h) the tipmost head of the default branch i) tip Alphare: I found the documentation:
Mercurial will update the working directory to the first… | |||||||||||||||||||||||||
Not Done Inline ActionsAnd, what you're saying is that the "tipmost head" for a given branch depends on the local order of operations on the repository? (sorry for being slow, I'm just trying to make sure we use the most faithful representation) olasd: And, what you're saying is that the "tipmost head" for a given branch depends on the local… | |||||||||||||||||||||||||
Done Inline ActionsThat is correct. Alphare: That is correct. | |||||||||||||||||||||||||
Not Done Inline ActionsThen I'm fine with you just implementing g), h) and i), and generating a snapshot with no HEAD if neither of these yields a reference to a changeset. olasd: Then I'm fine with you just implementing g), h) and i), and generating a snapshot with no HEAD… | |||||||||||||||||||||||||
bookmarks=all_bookmarks, | |||||||||||||||||||||||||
open_heads=branch_open_heads, | |||||||||||||||||||||||||
closed_heads=branch_closed_heads, | |||||||||||||||||||||||||
default_branch_alias=default_rev_alias, | |||||||||||||||||||||||||
) | |||||||||||||||||||||||||
Not Done Inline ActionsShouldn't we remove all the members of branch_open_heads with a single value instead (this would avoid generating branch-heads/foo/0 for all branches when there's a single one with multiple open heads)? olasd: Shouldn't we remove all the members of `branch_open_heads` with a single value instead (this… | |||||||||||||||||||||||||
Done Inline ActionsYep, probably better Alphare: Yep, probably better | |||||||||||||||||||||||||
class CloneTimeout(Exception): | class CloneTimeout(Exception): | ||||||||||||||||||||||||
pass | pass | ||||||||||||||||||||||||
class CloneFailure(Exception): | class CloneFailure(Exception): | ||||||||||||||||||||||||
pass | pass | ||||||||||||||||||||||||
Done Inline ActionsIf you apply my other change, these lists should already be sorted. olasd: If you apply my other change, these lists should already be sorted. | |||||||||||||||||||||||||
def _clone_task(src: str, dest: str, errors: Queue) -> None: | def _clone_task(src: str, dest: str, errors: Queue) -> None: | ||||||||||||||||||||||||
"""Clone task to run in a subprocess. | """Clone task to run in a subprocess. | ||||||||||||||||||||||||
Args: | Args: | ||||||||||||||||||||||||
src: clone source | src: clone source | ||||||||||||||||||||||||
dest: clone destination | dest: clone destination | ||||||||||||||||||||||||
▲ Show 20 Lines • Show All 47 Lines • Show Last 20 Lines |
same here, s/blacklist/ignored/g