Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/mercurial/from_disk.py
Show All 9 Lines | |||||
from tempfile import mkdtemp | from tempfile import mkdtemp | ||||
from typing import Deque, Dict, List, Optional, Tuple, TypeVar, Union | from typing import Deque, Dict, List, Optional, Tuple, TypeVar, Union | ||||
from swh.loader.core.loader import BaseLoader | from swh.loader.core.loader import BaseLoader | ||||
from swh.loader.core.utils import clean_dangling_folders | from swh.loader.core.utils import clean_dangling_folders | ||||
from swh.loader.mercurial.utils import parse_visit_date | from swh.loader.mercurial.utils import parse_visit_date | ||||
from swh.model import identifiers | from swh.model import identifiers | ||||
from swh.model.from_disk import Content, DentryPerms, Directory | from swh.model.from_disk import Content, DentryPerms, Directory | ||||
from swh.model.hashutil import hash_to_bytehex, hash_to_bytes | from swh.model.hashutil import hash_to_bytehex | ||||
from swh.model.model import ( | from swh.model.model import ( | ||||
ExtID, | ExtID, | ||||
ObjectType, | ObjectType, | ||||
Origin, | Origin, | ||||
Person, | Person, | ||||
Release, | Release, | ||||
Revision, | Revision, | ||||
RevisionType, | RevisionType, | ||||
▲ Show 20 Lines • Show All 174 Lines • ▼ Show 20 Lines | def prepare(self) -> None: | ||||
self._latest_heads = [] | self._latest_heads = [] | ||||
latest_snapshot = snapshot_get_latest(self.storage, self.origin_url) | latest_snapshot = snapshot_get_latest(self.storage, self.origin_url) | ||||
if latest_snapshot: | if latest_snapshot: | ||||
self._set_latest_heads(latest_snapshot) | self._set_latest_heads(latest_snapshot) | ||||
def _set_latest_heads(self, latest_snapshot: Snapshot) -> None: | def _set_latest_heads(self, latest_snapshot: Snapshot) -> None: | ||||
""" | """ | ||||
Looks up the nodeid for all revisions in the snapshot, and adds them to | Looks up the nodeid for all revisions in the snapshot via extid_get_from_target, | ||||
self._latest_heads. | and adds them to self._latest_heads. | ||||
This works in two steps: | |||||
1. Query the revisions with extid_get_from_target, to find nodeids from | |||||
revision ids, using the new ExtID architecture | |||||
2. For all revisions that were not found this way, fetch the revision | |||||
and look for the nodeid in its metadata. | |||||
This is a temporary process. When we are done migrating away from revision | |||||
metadata, step 2 will be removed. | |||||
""" | """ | ||||
# TODO: add support for releases | # TODO: add support for releases | ||||
snapshot_branches = [ | snapshot_branches = [ | ||||
branch.target | branch.target | ||||
for branch in latest_snapshot.branches.values() | for branch in latest_snapshot.branches.values() | ||||
if branch.target_type == TargetType.REVISION | if branch.target_type == TargetType.REVISION | ||||
] | ] | ||||
Show All 14 Lines | def _set_latest_heads(self, latest_snapshot: Snapshot) -> None: | ||||
extid | extid | ||||
for extid in extids | for extid in extids | ||||
if extid.target.object_id not in revisions_missing | if extid.target.object_id not in revisions_missing | ||||
] | ] | ||||
# Add the found nodeids to self.latest_heads | # Add the found nodeids to self.latest_heads | ||||
self._latest_heads.extend(extid.extid for extid in extids) | self._latest_heads.extend(extid.extid for extid in extids) | ||||
# For each revision without a nodeid, get the revision metadata | |||||
# to see if it is found there. | |||||
found_revisions = {extid.target.object_id for extid in extids if extid} | |||||
revisions_without_extid = list(set(snapshot_branches) - found_revisions) | |||||
self._latest_heads.extend( | |||||
hash_to_bytes(revision.metadata["node"]) | |||||
for revision in self.storage.revision_get(revisions_without_extid) | |||||
if revision and revision.metadata | |||||
) | |||||
def fetch_data(self) -> bool: | def fetch_data(self) -> bool: | ||||
"""Fetch the data from the source the loader is currently loading | """Fetch the data from the source the loader is currently loading | ||||
Returns: | Returns: | ||||
a value that is interpreted as a boolean. If True, fetch_data needs | a value that is interpreted as a boolean. If True, fetch_data needs | ||||
to be called again to complete loading. | to be called again to complete loading. | ||||
""" | """ | ||||
▲ Show 20 Lines • Show All 97 Lines • ▼ Show 20 Lines | def store_data(self): | ||||
# The tip is mapped to `HEAD` to match | # The tip is mapped to `HEAD` to match | ||||
# the historical implementation | # the historical implementation | ||||
if hg_nodeid == tags_by_name[b"tip"]: | if hg_nodeid == tags_by_name[b"tip"]: | ||||
snapshot_branches[b"HEAD"] = SnapshotBranch( | snapshot_branches[b"HEAD"] = SnapshotBranch( | ||||
target=name, target_type=TargetType.ALIAS, | target=name, target_type=TargetType.ALIAS, | ||||
) | ) | ||||
# TODO: do not write an ExtID if we got this branch from an ExtID that | if hg_nodeid not in self._latest_heads: | ||||
# already exists. | |||||
# When we are done migrating away from revision metadata, this will | |||||
# be as simple as checking if the target is in self._latest_heads | |||||
revision_swhid = identifiers.CoreSWHID( | revision_swhid = identifiers.CoreSWHID( | ||||
object_type=identifiers.ObjectType.REVISION, object_id=revision_sha1git | object_type=identifiers.ObjectType.REVISION, | ||||
object_id=revision_sha1git, | |||||
) | ) | ||||
extids.append( | extids.append( | ||||
ExtID(extid_type=EXTID_TYPE, extid=hg_nodeid, target=revision_swhid) | ExtID(extid_type=EXTID_TYPE, extid=hg_nodeid, target=revision_swhid) | ||||
) | ) | ||||
snapshot = Snapshot(branches=snapshot_branches) | snapshot = Snapshot(branches=snapshot_branches) | ||||
self.storage.snapshot_add([snapshot]) | self.storage.snapshot_add([snapshot]) | ||||
self.storage.extid_add(extids) | self.storage.extid_add(extids) | ||||
self.flush() | self.flush() | ||||
self.loaded_snapshot_id = snapshot.id | self.loaded_snapshot_id = snapshot.id | ||||
▲ Show 20 Lines • Show All 88 Lines • ▼ Show 20 Lines | def store_revision(self, rev_ctx: hgutil.BaseContext) -> None: | ||||
revision = Revision( | revision = Revision( | ||||
author=author, | author=author, | ||||
date=rev_date, | date=rev_date, | ||||
committer=author, | committer=author, | ||||
committer_date=rev_date, | committer_date=rev_date, | ||||
type=RevisionType.MERCURIAL, | type=RevisionType.MERCURIAL, | ||||
directory=root_sha1git, | directory=root_sha1git, | ||||
message=rev_ctx.description(), | message=rev_ctx.description(), | ||||
metadata={"node": hg_nodeid.hex()}, | |||||
extra_headers=tuple(extra_headers), | extra_headers=tuple(extra_headers), | ||||
synthetic=False, | synthetic=False, | ||||
parents=self.get_revision_parents(rev_ctx), | parents=self.get_revision_parents(rev_ctx), | ||||
) | ) | ||||
self._revision_nodeid_to_sha1git[hg_nodeid] = revision.id | self._revision_nodeid_to_sha1git[hg_nodeid] = revision.id | ||||
self.storage.revision_add([revision]) | self.storage.revision_add([revision]) | ||||
▲ Show 20 Lines • Show All 184 Lines • Show Last 20 Lines |