Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/mercurial/from_disk.py
Show First 20 Lines • Show All 298 Lines • ▼ Show 20 Lines | def get_hg_revs_to_load(self) -> Union[HgFilteredSet, HgSpanSet]: | ||||
existing_heads = [] # heads that still exist in the repository | existing_heads = [] # heads that still exist in the repository | ||||
for hg_nodeid in self._latest_heads: | for hg_nodeid in self._latest_heads: | ||||
try: | try: | ||||
rev = repo[hg_nodeid].rev() | rev = repo[hg_nodeid].rev() | ||||
existing_heads.append(rev) | existing_heads.append(rev) | ||||
except KeyError: # the node does not exist anymore | except KeyError: # the node does not exist anymore | ||||
pass | pass | ||||
# Mercurial can have more than one head per branch, so we need to exclude | |||||
# local heads that have already been loaded as revisions but don't | |||||
# correspond to a SnapshotBranch. | |||||
# In the future, if the SnapshotBranch model evolves to support multiple | |||||
# heads per branch (or anything else that fixes this issue) this might | |||||
# become useless. | |||||
extids = self.storage.extid_get_from_extid(EXTID_TYPE, repo.heads()) | |||||
known_heads = {extid.extid for extid in extids} | |||||
existing_heads.extend([repo[head].rev() for head in known_heads]) | |||||
# select revisions that are not ancestors of heads | # select revisions that are not ancestors of heads | ||||
vlorentz: What about this instead, to batch RPC calls?
```
extids = self.storage.extid_get_from_extid… | |||||
Done Inline ActionsOf course, what was I thinking! Alphare: Of course, what was I thinking! | |||||
# and not the heads themselves | # and not the heads themselves | ||||
new_revs = repo.revs("not ::(%ld)", existing_heads) | new_revs = repo.revs("not ::(%ld)", existing_heads) | ||||
if new_revs: | if new_revs: | ||||
self.log.info("New revisions found: %d", len(new_revs)) | self.log.info("New revisions found: %d", len(new_revs)) | ||||
return new_revs | return new_revs | ||||
else: | else: | ||||
return repo.revs("all()") | return repo.revs("all()") | ||||
▲ Show 20 Lines • Show All 90 Lines • ▼ Show 20 Lines | def get_revision_id_from_hg_nodeid(self, hg_nodeid: HgNodeId) -> Sha1Git: | ||||
from_cache = self._revision_nodeid_to_sha1git.get(hg_nodeid) | from_cache = self._revision_nodeid_to_sha1git.get(hg_nodeid) | ||||
if from_cache is not None: | if from_cache is not None: | ||||
return from_cache | return from_cache | ||||
# The parent was not loaded in this run, get it from storage | # The parent was not loaded in this run, get it from storage | ||||
from_storage = self.storage.extid_get_from_extid(EXTID_TYPE, ids=[hg_nodeid]) | from_storage = self.storage.extid_get_from_extid(EXTID_TYPE, ids=[hg_nodeid]) | ||||
msg = "Expected 1 match from storage for hg node %r, got %d" | msg = "Expected 1 match from storage for hg node %r, got %d" | ||||
assert len(from_storage) == 1, msg % (hg_nodeid, len(from_storage)) | assert len(from_storage) == 1, msg % (hg_nodeid.hex(), len(from_storage)) | ||||
return from_storage[0].target.object_id | return from_storage[0].target.object_id | ||||
def get_revision_parents(self, rev_ctx: hgutil.BaseContext) -> Tuple[Sha1Git, ...]: | def get_revision_parents(self, rev_ctx: hgutil.BaseContext) -> Tuple[Sha1Git, ...]: | ||||
"""Return the git sha1 of the parent revisions. | """Return the git sha1 of the parent revisions. | ||||
Args: | Args: | ||||
hg_nodeid: the hg nodeid of the revision. | hg_nodeid: the hg nodeid of the revision. | ||||
▲ Show 20 Lines • Show All 272 Lines • Show Last 20 Lines |
What about this instead, to batch RPC calls?
(not tested)