diff --git a/swh/loader/mercurial/identify.py b/swh/loader/mercurial/identify.py --- a/swh/loader/mercurial/identify.py +++ b/swh/loader/mercurial/identify.py @@ -1,11 +1,10 @@ -# Copyright (C) 2020 The Software Heritage developers +# Copyright (C) 2020-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from codecs import escape_decode # type: ignore import json -import os from pathlib import Path import re import subprocess @@ -15,6 +14,7 @@ # control import click +from swh.loader.mercurial.utils import get_minimum_env from swh.model.cli import identify_object from swh.model.hashutil import hash_to_bytehex from swh.model.identifiers import normalize_timestamp, swhid @@ -198,7 +198,7 @@ def _output(self, *args) -> bytes: """Return the outpout of a `hg` call.""" return subprocess.check_output( - ["hg", *args], cwd=self._root, env=self._get_env() + ["hg", *args], cwd=self._root, env=get_minimum_env() ) def _call(self, *args) -> None: @@ -208,18 +208,9 @@ cwd=self._root, stderr=subprocess.PIPE, stdout=subprocess.PIPE, - env=self._get_env(), + env=get_minimum_env(), ) - def _get_env(self) -> Dict[str, str]: - """Return the smallest viable environment for `hg` suprocesses""" - env = { - "PATH": os.environ["PATH"], - "HGPLAIN": "", # Tells Mercurial to disable output customization - "HGRCPATH": "", # Tells Mercurial to ignore config files - } - return env - def root(self) -> Path: """Return the root of the Mercurial repository.""" return self._root diff --git a/swh/loader/mercurial/loader.py b/swh/loader/mercurial/loader.py --- a/swh/loader/mercurial/loader.py +++ b/swh/loader/mercurial/loader.py @@ -34,6 +34,7 @@ from swh.loader.core.loader import DVCSLoader from swh.loader.core.utils import clean_dangling_folders from swh.loader.exception import NotFound +from swh.loader.mercurial.utils import get_minimum_env from swh.model import identifiers from swh.model.hashutil import ( DEFAULT_ALGORITHMS, @@ -136,6 +137,9 @@ self.heads: Dict[bytes, Any] = {} self.releases: Dict[bytes, Any] = {} self.last_snapshot_id: Optional[bytes] = None + self.old_environ = os.environ.copy() + os.environ.clear() + os.environ.update(get_minimum_env()) def pre_cleanup(self): """Cleanup potential dangling files from prior runs (e.g. OOM killed @@ -152,6 +156,8 @@ """Clean temporary working directory """ + os.environ.clear() + os.environ.update(self.old_environ) if self.bundle_path and os.path.exists(self.bundle_path): self.log.debug("Cleanup up working bundle %s" % self.bundle_path) os.unlink(self.bundle_path) diff --git a/swh/loader/mercurial/utils.py b/swh/loader/mercurial/utils.py --- a/swh/loader/mercurial/utils.py +++ b/swh/loader/mercurial/utils.py @@ -4,7 +4,8 @@ # See top-level LICENSE file for more information from datetime import datetime, timezone -from typing import Optional, Union +import os +from typing import Dict, Optional, Union from dateutil.parser import parse @@ -27,3 +28,17 @@ return parse(visit_date) raise ValueError(f"invalid visit date {visit_date!r}") + + +def get_minimum_env() -> Dict[str, str]: + """Return the smallest viable environment for `hg` suprocesses""" + env = { + "HGPLAIN": "", # Tells Mercurial to disable output customization + "HGRCPATH": "", # Tells Mercurial to ignore config files + } + path = os.environ.get("PATH") + if path: + # Sometimes (in tests for example), there is no PATH. An empty PATH could be + # interpreted differently than a lack of PATH by some programs. + env["PATH"] = path + return env