Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/mercurial/from_disk.py
Show First 20 Lines • Show All 136 Lines • ▼ Show 20 Lines | ): | ||||
self._temp_directory = temp_directory | self._temp_directory = temp_directory | ||||
self._clone_timeout = clone_timeout_seconds | self._clone_timeout = clone_timeout_seconds | ||||
self.origin_url = url | self.origin_url = url | ||||
self.visit_date = visit_date | self.visit_date = visit_date | ||||
self.directory = directory | self.directory = directory | ||||
self._repo: Optional[hgutil.Repository] = None | self._repo: Optional[hgutil.Repository] = None | ||||
self._revision_nodeid_to_swhid: Dict[HgNodeId, Sha1Git] = {} | self._revision_nodeid_to_sha1git: Dict[HgNodeId, Sha1Git] = {} | ||||
self._repo_directory: Optional[str] = None | self._repo_directory: Optional[str] = None | ||||
# keeps the last processed hg nodeid | # keeps the last processed hg nodeid | ||||
# it is used for differential tree update by store_directories | # it is used for differential tree update by store_directories | ||||
# NULLID is the parent of the first revision | # NULLID is the parent of the first revision | ||||
self._last_hg_nodeid = hgutil.NULLID | self._last_hg_nodeid = hgutil.NULLID | ||||
# keeps the last revision tree | # keeps the last revision tree | ||||
▲ Show 20 Lines • Show All 189 Lines • ▼ Show 20 Lines | def store_data(self): | ||||
tags_by_hg_nodeid: Dict[HgNodeId, bytes] = { | tags_by_hg_nodeid: Dict[HgNodeId, bytes] = { | ||||
hg_nodeid: name for name, hg_nodeid in tags_by_name.items() | hg_nodeid: name for name, hg_nodeid in tags_by_name.items() | ||||
} | } | ||||
snapshot_branches: Dict[bytes, SnapshotBranch] = {} | snapshot_branches: Dict[bytes, SnapshotBranch] = {} | ||||
extids = [] | extids = [] | ||||
for hg_nodeid, revision_swhid in self._revision_nodeid_to_swhid.items(): | for hg_nodeid, revision_sha1git in self._revision_nodeid_to_sha1git.items(): | ||||
tag_name = tags_by_hg_nodeid.get(hg_nodeid) | tag_name = tags_by_hg_nodeid.get(hg_nodeid) | ||||
# tip is listed in the tags by the mercurial api | # tip is listed in the tags by the mercurial api | ||||
# but its not a tag defined by the user in `.hgtags` | # but its not a tag defined by the user in `.hgtags` | ||||
if tag_name and tag_name != b"tip": | if tag_name and tag_name != b"tip": | ||||
snapshot_branches[tag_name] = SnapshotBranch( | snapshot_branches[tag_name] = SnapshotBranch( | ||||
target=self.store_release(tag_name, revision_swhid), | target=self.store_release(tag_name, revision_sha1git), | ||||
target_type=TargetType.RELEASE, | target_type=TargetType.RELEASE, | ||||
) | ) | ||||
if hg_nodeid in branch_by_hg_nodeid: | if hg_nodeid in branch_by_hg_nodeid: | ||||
name = branch_by_hg_nodeid[hg_nodeid] | name = branch_by_hg_nodeid[hg_nodeid] | ||||
snapshot_branches[name] = SnapshotBranch( | snapshot_branches[name] = SnapshotBranch( | ||||
target=revision_swhid, target_type=TargetType.REVISION, | target=revision_sha1git, target_type=TargetType.REVISION, | ||||
) | ) | ||||
# The tip is mapped to `HEAD` to match | # The tip is mapped to `HEAD` to match | ||||
# the historical implementation | # the historical implementation | ||||
if hg_nodeid == tags_by_name[b"tip"]: | if hg_nodeid == tags_by_name[b"tip"]: | ||||
snapshot_branches[b"HEAD"] = SnapshotBranch( | snapshot_branches[b"HEAD"] = SnapshotBranch( | ||||
target=name, target_type=TargetType.ALIAS, | target=name, target_type=TargetType.ALIAS, | ||||
) | ) | ||||
# TODO: do not write an ExtID if we got this branch from an ExtID that | # TODO: do not write an ExtID if we got this branch from an ExtID that | ||||
# already exists. | # already exists. | ||||
# When we are done migrating away from revision metadata, this will | # When we are done migrating away from revision metadata, this will | ||||
# be as simple as checking if the target is in self._latest_heads | # be as simple as checking if the target is in self._latest_heads | ||||
extids.append( | revision_swhid = identifiers.CoreSWHID( | ||||
ExtID( | object_type=identifiers.ObjectType.REVISION, object_id=revision_sha1git | ||||
extid_type=EXTID_TYPE, | |||||
extid=hg_nodeid, | |||||
target=identifiers.CoreSWHID( | |||||
object_type=identifiers.ObjectType.REVISION, | |||||
object_id=revision_swhid, | |||||
), | |||||
) | ) | ||||
extids.append( | |||||
ExtID(extid_type=EXTID_TYPE, extid=hg_nodeid, target=revision_swhid) | |||||
) | ) | ||||
snapshot = Snapshot(branches=snapshot_branches) | snapshot = Snapshot(branches=snapshot_branches) | ||||
self.storage.snapshot_add([snapshot]) | self.storage.snapshot_add([snapshot]) | ||||
self.storage.extid_add(extids) | self.storage.extid_add(extids) | ||||
self.flush() | self.flush() | ||||
Show All 14 Lines | class HgLoaderFromDisk(BaseLoader): | ||||
def visit_status(self) -> str: | def visit_status(self) -> str: | ||||
"""Allow overriding the visit status in case of partial load""" | """Allow overriding the visit status in case of partial load""" | ||||
if self._visit_status is not None: | if self._visit_status is not None: | ||||
return self._visit_status | return self._visit_status | ||||
return super().visit_status() | return super().visit_status() | ||||
def get_revision_id_from_hg_nodeid(self, hg_nodeid: HgNodeId) -> Sha1Git: | def get_revision_id_from_hg_nodeid(self, hg_nodeid: HgNodeId) -> Sha1Git: | ||||
"""Return the swhid of a revision given its hg nodeid. | """Return the git sha1 of a revision given its hg nodeid. | ||||
Args: | Args: | ||||
hg_nodeid: the hg nodeid of the revision. | hg_nodeid: the hg nodeid of the revision. | ||||
Returns: | Returns: | ||||
the swhid of the revision. | the sha1_git of the revision. | ||||
""" | """ | ||||
return self._revision_nodeid_to_swhid[hg_nodeid] | return self._revision_nodeid_to_sha1git[hg_nodeid] | ||||
def get_revision_parents(self, rev_ctx: hgutil.BaseContext) -> Tuple[Sha1Git, ...]: | def get_revision_parents(self, rev_ctx: hgutil.BaseContext) -> Tuple[Sha1Git, ...]: | ||||
"""Return the swhids of the parent revisions. | """Return the git sha1 of the parent revisions. | ||||
Args: | Args: | ||||
hg_nodeid: the hg nodeid of the revision. | hg_nodeid: the hg nodeid of the revision. | ||||
Returns: | Returns: | ||||
the swhids of the parent revisions. | the sha1_git of the parent revisions. | ||||
""" | """ | ||||
parents = [] | parents = [] | ||||
for parent_ctx in rev_ctx.parents(): | for parent_ctx in rev_ctx.parents(): | ||||
parent_hg_nodeid = parent_ctx.node() | parent_hg_nodeid = parent_ctx.node() | ||||
# nullid is the value of a parent that does not exist | # nullid is the value of a parent that does not exist | ||||
if parent_hg_nodeid == hgutil.NULLID: | if parent_hg_nodeid == hgutil.NULLID: | ||||
continue | continue | ||||
parents.append(self.get_revision_id_from_hg_nodeid(parent_hg_nodeid)) | parents.append(self.get_revision_id_from_hg_nodeid(parent_hg_nodeid)) | ||||
return tuple(parents) | return tuple(parents) | ||||
def store_revision(self, rev_ctx: hgutil.BaseContext) -> None: | def store_revision(self, rev_ctx: hgutil.BaseContext) -> None: | ||||
"""Store a revision given its hg nodeid. | """Store a revision given its hg nodeid. | ||||
Args: | Args: | ||||
rev_ctx: the he revision context. | rev_ctx: the he revision context. | ||||
Returns: | Returns: | ||||
the swhid of the stored revision. | the sha1_git of the stored revision. | ||||
""" | """ | ||||
hg_nodeid = rev_ctx.node() | hg_nodeid = rev_ctx.node() | ||||
root_swhid = self.store_directories(rev_ctx) | root_sha1git = self.store_directories(rev_ctx) | ||||
# `Person.from_fullname` is compatible with mercurial's freeform author | # `Person.from_fullname` is compatible with mercurial's freeform author | ||||
# as fullname is what is used in revision hash when available. | # as fullname is what is used in revision hash when available. | ||||
author = Person.from_fullname(rev_ctx.user()) | author = Person.from_fullname(rev_ctx.user()) | ||||
(timestamp, offset) = rev_ctx.date() | (timestamp, offset) = rev_ctx.date() | ||||
# TimestampWithTimezone.from_dict will change name | # TimestampWithTimezone.from_dict will change name | ||||
Show All 16 Lines | def store_revision(self, rev_ctx: hgutil.BaseContext) -> None: | ||||
extra_headers.append((key, value)) | extra_headers.append((key, value)) | ||||
revision = Revision( | revision = Revision( | ||||
author=author, | author=author, | ||||
date=rev_date, | date=rev_date, | ||||
committer=author, | committer=author, | ||||
committer_date=rev_date, | committer_date=rev_date, | ||||
type=RevisionType.MERCURIAL, | type=RevisionType.MERCURIAL, | ||||
directory=root_swhid, | directory=root_sha1git, | ||||
message=rev_ctx.description(), | message=rev_ctx.description(), | ||||
metadata={"node": hg_nodeid.hex()}, | metadata={"node": hg_nodeid.hex()}, | ||||
extra_headers=tuple(extra_headers), | extra_headers=tuple(extra_headers), | ||||
synthetic=False, | synthetic=False, | ||||
parents=self.get_revision_parents(rev_ctx), | parents=self.get_revision_parents(rev_ctx), | ||||
) | ) | ||||
self._revision_nodeid_to_swhid[hg_nodeid] = revision.id | self._revision_nodeid_to_sha1git[hg_nodeid] = revision.id | ||||
self.storage.revision_add([revision]) | self.storage.revision_add([revision]) | ||||
def store_release(self, name: bytes, target: Sha1Git) -> Sha1Git: | def store_release(self, name: bytes, target: Sha1Git) -> Sha1Git: | ||||
"""Store a release given its name and its target. | """Store a release given its name and its target. | ||||
A release correspond to a user defined tag in mercurial. | A release correspond to a user defined tag in mercurial. | ||||
The mercurial api as a `tip` tag that must be ignored. | The mercurial api as a `tip` tag that must be ignored. | ||||
Args: | Args: | ||||
name: name of the release. | name: name of the release. | ||||
target: swhid of the target revision. | target: sha1_git of the target revision. | ||||
Returns: | Returns: | ||||
the swhid of the stored release. | the sha1_git of the stored release. | ||||
""" | """ | ||||
release = Release( | release = Release( | ||||
name=name, | name=name, | ||||
target=target, | target=target, | ||||
target_type=ObjectType.REVISION, | target_type=ObjectType.REVISION, | ||||
message=None, | message=None, | ||||
metadata=None, | metadata=None, | ||||
synthetic=False, | synthetic=False, | ||||
Show All 11 Lines | def store_content(self, rev_ctx: hgutil.BaseContext, file_path: bytes) -> Content: | ||||
Content is a mix of file content at a given revision | Content is a mix of file content at a given revision | ||||
and its permissions found in the changeset's manifest. | and its permissions found in the changeset's manifest. | ||||
Args: | Args: | ||||
rev_ctx: the he revision context. | rev_ctx: the he revision context. | ||||
file_path: the hg path of the content. | file_path: the hg path of the content. | ||||
Returns: | Returns: | ||||
the swhid of the top level directory. | the sha1_git of the top level directory. | ||||
""" | """ | ||||
hg_nodeid = rev_ctx.node() | hg_nodeid = rev_ctx.node() | ||||
file_ctx = rev_ctx[file_path] | file_ctx = rev_ctx[file_path] | ||||
try: | try: | ||||
file_nodeid = file_ctx.filenode() | file_nodeid = file_ctx.filenode() | ||||
except hgutil.LookupError: | except hgutil.LookupError: | ||||
# TODO | # TODO | ||||
Show All 30 Lines | def store_directories(self, rev_ctx: hgutil.BaseContext) -> Sha1Git: | ||||
Mercurial as no directory as in git. A Git like tree must be build | Mercurial as no directory as in git. A Git like tree must be build | ||||
from file paths to obtain each directory hash. | from file paths to obtain each directory hash. | ||||
Args: | Args: | ||||
rev_ctx: the he revision context. | rev_ctx: the he revision context. | ||||
Returns: | Returns: | ||||
the swhid of the top level directory. | the sha1_git of the top level directory. | ||||
""" | """ | ||||
repo: hgutil.Repository = self._repo # mypy can't infer that repo is not None | repo: hgutil.Repository = self._repo # mypy can't infer that repo is not None | ||||
prev_ctx = repo[self._last_hg_nodeid] | prev_ctx = repo[self._last_hg_nodeid] | ||||
# TODO maybe do diff on parents | # TODO maybe do diff on parents | ||||
status = prev_ctx.status(rev_ctx) | status = prev_ctx.status(rev_ctx) | ||||
for file_path in status.removed: | for file_path in status.removed: | ||||
▲ Show 20 Lines • Show All 89 Lines • Show Last 20 Lines |