Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/mercurial/from_disk.py
# Copyright (C) 2020-2021 The Software Heritage developers | # Copyright (C) 2020-2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from collections import deque | from collections import deque | ||||
from datetime import datetime | from datetime import datetime | ||||
import os | import os | ||||
from shutil import rmtree | from shutil import rmtree | ||||
from tempfile import mkdtemp | from tempfile import mkdtemp | ||||
from typing import Deque, Dict, List, Optional, Tuple, TypeVar, Union | from typing import Deque, Dict, List, Optional, Tuple, TypeVar, Union | ||||
from swh.loader.core.loader import BaseLoader | from swh.loader.core.loader import BaseLoader | ||||
from swh.loader.core.utils import clean_dangling_folders | from swh.loader.core.utils import clean_dangling_folders | ||||
from swh.loader.mercurial.utils import parse_visit_date | from swh.loader.mercurial.utils import parse_visit_date | ||||
from swh.model.from_disk import Content, DentryPerms, Directory | from swh.model.from_disk import Content, DentryPerms, Directory | ||||
from swh.model.hashutil import MultiHash, hash_to_bytehex, hash_to_bytes | from swh.model.hashutil import hash_to_bytehex, hash_to_bytes | ||||
from swh.model.model import ( | from swh.model.model import ( | ||||
ObjectType, | ObjectType, | ||||
Origin, | Origin, | ||||
Person, | Person, | ||||
Release, | Release, | ||||
Revision, | Revision, | ||||
RevisionType, | RevisionType, | ||||
Sha1Git, | Sha1Git, | ||||
▲ Show 20 Lines • Show All 462 Lines • ▼ Show 20 Lines | def store_content(self, rev_ctx: hgutil.BaseContext, file_path: bytes) -> Content: | ||||
perms = FLAG_PERMS[file_ctx.flags()] | perms = FLAG_PERMS[file_ctx.flags()] | ||||
# Key is file_nodeid + perms because permissions does not participate | # Key is file_nodeid + perms because permissions does not participate | ||||
# in content hash in hg while it is the case in swh. | # in content hash in hg while it is the case in swh. | ||||
cache_key = (file_nodeid, perms) | cache_key = (file_nodeid, perms) | ||||
sha1_git = self._content_hash_cache.get(cache_key) | sha1_git = self._content_hash_cache.get(cache_key) | ||||
if sha1_git is not None: | if sha1_git is None: | ||||
return Content({"sha1_git": sha1_git, "perms": perms}) | |||||
data = file_ctx.data() | data = file_ctx.data() | ||||
content_data = MultiHash.from_data(data).digest() | content = ModelContent.from_data(data) | ||||
content_data["length"] = len(data) | |||||
content_data["perms"] = perms | self.storage.content_add([content]) | ||||
content_data["data"] = data | |||||
content_data["status"] = "visible" | |||||
content = Content(content_data) | |||||
model = content.to_model() | |||||
if isinstance(model, ModelContent): | |||||
self.storage.content_add([model]) | |||||
else: | |||||
raise ValueError( | |||||
f"{file_path!r} at rev {hg_nodeid.hex()!r} " | |||||
"produced {type(model)!r} instead of {ModelContent!r}" | |||||
) | |||||
self._content_hash_cache[cache_key] = content.hash | sha1_git = content.sha1_git | ||||
self._content_hash_cache[cache_key] = sha1_git | |||||
# Here we make sure to return only necessary data. | # Here we make sure to return only necessary data. | ||||
return Content({"sha1_git": content.hash, "perms": perms}) | return Content({"sha1_git": sha1_git, "perms": perms}) | ||||
def store_directories(self, rev_ctx: hgutil.BaseContext) -> Sha1Git: | def store_directories(self, rev_ctx: hgutil.BaseContext) -> Sha1Git: | ||||
"""Store a revision directories given its hg nodeid. | """Store a revision directories given its hg nodeid. | ||||
Mercurial as no directory as in git. A Git like tree must be build | Mercurial as no directory as in git. A Git like tree must be build | ||||
from file paths to obtain each directory hash. | from file paths to obtain each directory hash. | ||||
Args: | Args: | ||||
▲ Show 20 Lines • Show All 101 Lines • Show Last 20 Lines |