Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/mercurial/from_disk.py
Show First 20 Lines • Show All 222 Lines • ▼ Show 20 Lines | def _set_recorded_state(self, latest_snapshot: Snapshot) -> None: | ||||
and adds them to `self._latest_heads`. | and adds them to `self._latest_heads`. | ||||
Also looks up the currently saved releases ("tags" in Mercurial speak). | Also looks up the currently saved releases ("tags" in Mercurial speak). | ||||
The tags are all listed for easy comparison at the end, while only the latest | The tags are all listed for easy comparison at the end, while only the latest | ||||
heads are needed for revisions. | heads are needed for revisions. | ||||
""" | """ | ||||
heads = [] | heads = [] | ||||
tags = [] | tags = [] | ||||
for branch in latest_snapshot.branches.values(): | for name, branch in latest_snapshot.branches.items(): | ||||
if branch.target_type == TargetType.REVISION: | if branch.target_type == TargetType.REVISION: | ||||
heads.append(branch.target) | heads.append(branch.target) | ||||
elif branch.target_type == TargetType.RELEASE: | elif branch.target_type == TargetType.RELEASE: | ||||
tags.append(branch.target) | tags.append(branch.target) | ||||
self._latest_heads.extend( | self._latest_heads.extend( | ||||
extid.extid for extid in self._get_extids_for_targets(heads) | extid.extid for extid in self._get_extids_for_targets(heads) | ||||
) | ) | ||||
▲ Show 20 Lines • Show All 59 Lines • ▼ Show 20 Lines | def get_hg_revs_to_load(self) -> Union[HgFilteredSet, HgSpanSet]: | ||||
existing_heads = [] # heads that still exist in the repository | existing_heads = [] # heads that still exist in the repository | ||||
for hg_nodeid in self._latest_heads: | for hg_nodeid in self._latest_heads: | ||||
try: | try: | ||||
rev = repo[hg_nodeid].rev() | rev = repo[hg_nodeid].rev() | ||||
existing_heads.append(rev) | existing_heads.append(rev) | ||||
except KeyError: # the node does not exist anymore | except KeyError: # the node does not exist anymore | ||||
pass | pass | ||||
# Mercurial can have more than one head per branch, so we need to exclude | |||||
# local heads that have already been loaded as revisions but don't | |||||
# correspond to a SnapshotBranch. | |||||
# In the future, if the SnapshotBranch model evolves to support multiple | |||||
# heads per branch (or anything else that fixes this issue) this might | |||||
# become useless. | |||||
extids = self.storage.extid_get_from_extid(EXTID_TYPE, repo.heads()) | |||||
known_heads = {extid.extid for extid in extids} | |||||
existing_heads.extend([repo[head].rev() for head in known_heads]) | |||||
# select revisions that are not ancestors of heads | # select revisions that are not ancestors of heads | ||||
# and not the heads themselves | # and not the heads themselves | ||||
new_revs = repo.revs("not ::(%ld)", existing_heads) | new_revs = repo.revs("not ::(%ld)", existing_heads) | ||||
if new_revs: | if new_revs: | ||||
self.log.info("New revisions found: %d", len(new_revs)) | self.log.info("New revisions found: %d", len(new_revs)) | ||||
return new_revs | return new_revs | ||||
else: | else: | ||||
return repo.revs("all()") | return repo.revs("all()") | ||||
def store_data(self): | def store_data(self): | ||||
"""Store fetched data in the database.""" | """Store fetched data in the database.""" | ||||
revs = self.get_hg_revs_to_load() | revs = self.get_hg_revs_to_load() | ||||
if not revs: | if not revs: | ||||
self._load_status = "uneventful" | self._load_status = "uneventful" | ||||
return | return | ||||
assert self._repo is not None | assert self._repo is not None | ||||
repo = self._repo | repo = self._repo | ||||
blacklisted_revs: List[int] = [] | blacklisted_revs: Set[int] = set() | ||||
for rev in revs: | for rev in revs: | ||||
if rev in blacklisted_revs: | if rev in blacklisted_revs: | ||||
continue | continue | ||||
try: | try: | ||||
self.store_revision(repo[rev]) | self.store_revision(repo[rev]) | ||||
except CorruptedRevision as e: | except CorruptedRevision as e: | ||||
self._visit_status = "partial" | self._visit_status = "partial" | ||||
self.log.warning("Corrupted revision %s", e) | self.log.warning("Corrupted revision %s", e) | ||||
descendents = repo.revs("(%ld)::", [rev]) | descendents = repo.revs("(%ld)::", [rev]) | ||||
blacklisted_revs.extend(descendents) | blacklisted_revs.update(descendents) | ||||
branch_by_hg_nodeid: Dict[HgNodeId, bytes] = { | if len(blacklisted_revs) == len(revs): | ||||
hg_nodeid: name for name, hg_nodeid in hgutil.branches(repo).items() | # The repository is completely broken, nothing can be loaded | ||||
} | self._load_status = "uneventful" | ||||
return | |||||
tips, heads, closed_heads, bookmarks = hgutil.branches_info( | |||||
repo, blacklisted_revs | |||||
) | |||||
tags_by_name: Dict[bytes, HgNodeId] = repo.tags() | tags_by_name: Dict[bytes, HgNodeId] = repo.tags() | ||||
snapshot_branches: Dict[bytes, SnapshotBranch] = {} | snapshot_branches: Dict[bytes, SnapshotBranch] = {} | ||||
for tag_name, hg_nodeid in tags_by_name.items(): | for tag_name, hg_nodeid in tags_by_name.items(): | ||||
if tag_name == b"tip": | if tag_name == b"tip": | ||||
# tip is listed in the tags by the mercurial api | # `tip` is listed in the tags by the Mercurial API but its not a tag | ||||
# but its not a tag defined by the user in `.hgtags` | # defined by the user in `.hgtags`. | ||||
continue | continue | ||||
if hg_nodeid not in self._saved_tags: | if hg_nodeid not in self._saved_tags: | ||||
revision_sha1git = self.get_revision_id_from_hg_nodeid(hg_nodeid) | target = self.get_revision_id_from_hg_nodeid(hg_nodeid) | ||||
snapshot_branches[tag_name] = SnapshotBranch( | snapshot_branches[tag_name] = SnapshotBranch( | ||||
target=self.store_release(tag_name, revision_sha1git), | target=self.store_release(tag_name, target), | ||||
target_type=TargetType.RELEASE, | target_type=TargetType.RELEASE, | ||||
) | ) | ||||
for hg_nodeid, revision_sha1git in self._revision_nodeid_to_sha1git.items(): | for branch_name, node_id in tips.items(): | ||||
if hg_nodeid in branch_by_hg_nodeid: | name = b"refs/hg/branch-tip/%s" % branch_name | ||||
name = branch_by_hg_nodeid[hg_nodeid] | target = self.get_revision_id_from_hg_nodeid(node_id) | ||||
snapshot_branches[name] = SnapshotBranch( | snapshot_branches[name] = SnapshotBranch( | ||||
target=revision_sha1git, target_type=TargetType.REVISION, | target=target, target_type=TargetType.REVISION | ||||
) | ) | ||||
# The tip is mapped to `HEAD` to match | for bookmark_name, node_id in bookmarks.items(): | ||||
# the historical implementation | name = b"refs/hg/bookmarks/%s" % bookmark_name | ||||
if hg_nodeid == tags_by_name[b"tip"]: | target = self.get_revision_id_from_hg_nodeid(node_id) | ||||
snapshot_branches[b"HEAD"] = SnapshotBranch( | snapshot_branches[name] = SnapshotBranch( | ||||
target=name, target_type=TargetType.ALIAS, | target=target, target_type=TargetType.REVISION | ||||
) | ) | ||||
for branch_name, branch_heads in heads.items(): | |||||
for index, head in enumerate(branch_heads): | |||||
index = str(index).encode() | |||||
name = b"refs/hg/branch-heads/%s/%s" % (branch_name, index) | |||||
target = self.get_revision_id_from_hg_nodeid(head) | |||||
snapshot_branches[name] = SnapshotBranch( | |||||
target=target, target_type=TargetType.REVISION | |||||
) | |||||
for branch_name, closed_heads in closed_heads.items(): | |||||
for index, head in enumerate(closed_heads): | |||||
index = str(index).encode() | |||||
name = b"refs/hg/branch-closed-heads/%s/%s" % (branch_name, index) | |||||
target = self.get_revision_id_from_hg_nodeid(head) | |||||
snapshot_branches[name] = SnapshotBranch( | |||||
target=target, target_type=TargetType.REVISION | |||||
) | |||||
# `tip` is mapped to `HEAD` to match the historical implementation. | |||||
tip_node_id = tags_by_name[b"tip"] | |||||
branch_name = repo[tip_node_id].branch() | |||||
target = b"refs/hg/branch-tip/%s" % branch_name | |||||
snapshot_branches[b"HEAD"] = SnapshotBranch( | |||||
target=target, target_type=TargetType.ALIAS, | |||||
) | |||||
snapshot = Snapshot(branches=snapshot_branches) | snapshot = Snapshot(branches=snapshot_branches) | ||||
self.storage.snapshot_add([snapshot]) | self.storage.snapshot_add([snapshot]) | ||||
self.flush() | self.flush() | ||||
self.loaded_snapshot_id = snapshot.id | self.loaded_snapshot_id = snapshot.id | ||||
def load_status(self) -> Dict[str, str]: | def load_status(self) -> Dict[str, str]: | ||||
"""Detailed loading status. | """Detailed loading status. | ||||
▲ Show 20 Lines • Show All 315 Lines • Show Last 20 Lines |