Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/mercurial/from_disk.py
# Copyright (C) 2020-2021 The Software Heritage developers | # Copyright (C) 2020-2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from collections import deque | from collections import deque | ||||
from datetime import datetime | from datetime import datetime | ||||
import os | import os | ||||
from shutil import rmtree | from shutil import rmtree | ||||
from tempfile import mkdtemp | from tempfile import mkdtemp | ||||
from typing import Deque, Dict, List, Optional, Tuple, TypeVar, Union | from typing import Deque, Dict, List, Optional, Tuple, TypeVar, Union | ||||
from swh.loader.core.loader import BaseLoader | from swh.loader.core.loader import BaseLoader | ||||
from swh.loader.core.utils import clean_dangling_folders | from swh.loader.core.utils import clean_dangling_folders | ||||
from swh.loader.mercurial.utils import parse_visit_date | from swh.loader.mercurial.utils import get_minimum_env, parse_visit_date | ||||
from swh.model import identifiers | from swh.model import identifiers | ||||
from swh.model.from_disk import Content, DentryPerms, Directory | from swh.model.from_disk import Content, DentryPerms, Directory | ||||
from swh.model.hashutil import hash_to_bytehex | from swh.model.hashutil import hash_to_bytehex | ||||
from swh.model.model import ( | from swh.model.model import ( | ||||
ExtID, | ExtID, | ||||
ObjectType, | ObjectType, | ||||
Origin, | Origin, | ||||
Person, | Person, | ||||
▲ Show 20 Lines • Show All 138 Lines • ▼ Show 20 Lines | ): | ||||
# hg node id of the latest snapshot branch heads | # hg node id of the latest snapshot branch heads | ||||
# used to find what are the new revisions since last snapshot | # used to find what are the new revisions since last snapshot | ||||
self._latest_heads: List[bytes] = [] | self._latest_heads: List[bytes] = [] | ||||
self._load_status = "eventful" | self._load_status = "eventful" | ||||
# If set, will override the default value | # If set, will override the default value | ||||
self._visit_status = None | self._visit_status = None | ||||
self.old_environ = os.environ.copy() | |||||
os.environ.clear() | |||||
os.environ.update(get_minimum_env()) | |||||
def pre_cleanup(self) -> None: | def pre_cleanup(self) -> None: | ||||
"""As a first step, will try and check for dangling data to cleanup. | """As a first step, will try and check for dangling data to cleanup. | ||||
This should do its best to avoid raising issues. | This should do its best to avoid raising issues. | ||||
""" | """ | ||||
clean_dangling_folders( | clean_dangling_folders( | ||||
self._temp_directory, | self._temp_directory, | ||||
pattern_check=TEMPORARY_DIR_PREFIX_PATTERN, | pattern_check=TEMPORARY_DIR_PREFIX_PATTERN, | ||||
log=self.log, | log=self.log, | ||||
) | ) | ||||
def cleanup(self) -> None: | def cleanup(self) -> None: | ||||
"""Last step executed by the loader.""" | """Last step executed by the loader.""" | ||||
os.environ.clear() | |||||
os.environ.update(self.old_environ) | |||||
if self._repo_directory and os.path.exists(self._repo_directory): | if self._repo_directory and os.path.exists(self._repo_directory): | ||||
self.log.debug(f"Cleanup up repository {self._repo_directory}") | self.log.debug(f"Cleanup up repository {self._repo_directory}") | ||||
rmtree(self._repo_directory) | rmtree(self._repo_directory) | ||||
def prepare_origin_visit(self) -> None: | def prepare_origin_visit(self) -> None: | ||||
"""First step executed by the loader to prepare origin and visit | """First step executed by the loader to prepare origin and visit | ||||
references. Set/update self.origin, and | references. Set/update self.origin, and | ||||
optionally self.origin_url, self.visit_date. | optionally self.origin_url, self.visit_date. | ||||
▲ Show 20 Lines • Show All 467 Lines • Show Last 20 Lines |