Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/mercurial/from_disk.py
Show First 20 Lines • Show All 526 Lines • ▼ Show 20 Lines | def store_content(self, rev_ctx: hgutil.BaseContext, file_path: bytes) -> Content: | ||||
except hgutil.LookupError: | except hgutil.LookupError: | ||||
# TODO | # TODO | ||||
# Raising CorruptedRevision avoid crashing the whole loading | # Raising CorruptedRevision avoid crashing the whole loading | ||||
# but can lead to a lot of missing revisions. | # but can lead to a lot of missing revisions. | ||||
# SkippedContent could be used but need actual content to calculate its id. | # SkippedContent could be used but need actual content to calculate its id. | ||||
# Maybe the hg_nodeid can be used instead. | # Maybe the hg_nodeid can be used instead. | ||||
# Another option could be to just ignore the missing content. | # Another option could be to just ignore the missing content. | ||||
# This point is left to future commits. | # This point is left to future commits. | ||||
# Check for other uses to apply the same logic there. | |||||
raise CorruptedRevision(hg_nodeid) | raise CorruptedRevision(hg_nodeid) | ||||
perms = FLAG_PERMS[file_ctx.flags()] | perms = FLAG_PERMS[file_ctx.flags()] | ||||
# Key is file_nodeid + perms because permissions does not participate | # Key is file_nodeid + perms because permissions does not participate | ||||
# in content hash in hg while it is the case in swh. | # in content hash in hg while it is the case in swh. | ||||
cache_key = (file_nodeid, perms) | cache_key = (file_nodeid, perms) | ||||
sha1_git = self._content_hash_cache.get(cache_key) | sha1_git = self._content_hash_cache.get(cache_key) | ||||
if sha1_git is None: | if sha1_git is None: | ||||
try: | |||||
data = file_ctx.data() | data = file_ctx.data() | ||||
except hgutil.error.RevlogError: | |||||
# TODO | |||||
# See above use of `CorruptedRevision` | |||||
raise CorruptedRevision(hg_nodeid) | |||||
content = ModelContent.from_data(data) | content = ModelContent.from_data(data) | ||||
self.storage.content_add([content]) | self.storage.content_add([content]) | ||||
sha1_git = content.sha1_git | sha1_git = content.sha1_git | ||||
self._content_hash_cache[cache_key] = sha1_git | self._content_hash_cache[cache_key] = sha1_git | ||||
Show All 11 Lines | def store_directories(self, rev_ctx: hgutil.BaseContext) -> Sha1Git: | ||||
Returns: | Returns: | ||||
the sha1_git of the top level directory. | the sha1_git of the top level directory. | ||||
""" | """ | ||||
repo: hgutil.Repository = self._repo # mypy can't infer that repo is not None | repo: hgutil.Repository = self._repo # mypy can't infer that repo is not None | ||||
prev_ctx = repo[self._last_hg_nodeid] | prev_ctx = repo[self._last_hg_nodeid] | ||||
# TODO maybe do diff on parents | # TODO maybe do diff on parents | ||||
try: | |||||
status = prev_ctx.status(rev_ctx) | status = prev_ctx.status(rev_ctx) | ||||
except hgutil.error.LookupError: | |||||
raise CorruptedRevision(rev_ctx.node()) | |||||
for file_path in status.removed: | for file_path in status.removed: | ||||
try: | |||||
del self._last_root[file_path] | del self._last_root[file_path] | ||||
except KeyError: | |||||
raise CorruptedRevision(rev_ctx.node()) | |||||
for file_path in status.added: | for file_path in status.added: | ||||
content = self.store_content(rev_ctx, file_path) | content = self.store_content(rev_ctx, file_path) | ||||
self._last_root[file_path] = content | self._last_root[file_path] = content | ||||
for file_path in status.modified: | for file_path in status.modified: | ||||
content = self.store_content(rev_ctx, file_path) | content = self.store_content(rev_ctx, file_path) | ||||
self._last_root[file_path] = content | self._last_root[file_path] = content | ||||
▲ Show 20 Lines • Show All 80 Lines • Show Last 20 Lines |