Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/mercurial/identify.py
# Copyright (C) 2020 The Software Heritage developers | # Copyright (C) 2020-2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from codecs import escape_decode # type: ignore | from codecs import escape_decode # type: ignore | ||||
import json | import json | ||||
import os | |||||
from pathlib import Path | from pathlib import Path | ||||
import re | import re | ||||
import subprocess | import subprocess | ||||
from typing import Any, Dict, Iterator, List, NamedTuple, Optional, Union | from typing import Any, Dict, Iterator, List, NamedTuple, Optional, Union | ||||
# WARNING: do not import unnecessary things here to keep cli startup time under | # WARNING: do not import unnecessary things here to keep cli startup time under | ||||
# control | # control | ||||
import click | import click | ||||
from swh.loader.mercurial.utils import get_minimum_env | |||||
from swh.model.cli import identify_object | from swh.model.cli import identify_object | ||||
from swh.model.hashutil import hash_to_bytehex | from swh.model.hashutil import hash_to_bytehex | ||||
from swh.model.identifiers import normalize_timestamp, swhid | from swh.model.identifiers import normalize_timestamp, swhid | ||||
from swh.model.model import RevisionType | from swh.model.model import RevisionType | ||||
TAG_PATTERN = re.compile(b"([0-9A-Fa-f]{40}) +(.+)") | TAG_PATTERN = re.compile(b"([0-9A-Fa-f]{40}) +(.+)") | ||||
▲ Show 20 Lines • Show All 167 Lines • ▼ Show 20 Lines | class Hg: | ||||
"""Provide methods to extract data from a Mercurial repository.""" | """Provide methods to extract data from a Mercurial repository.""" | ||||
def __init__(self, repository_root: Path) -> None: | def __init__(self, repository_root: Path) -> None: | ||||
self._root = repository_root | self._root = repository_root | ||||
def _output(self, *args) -> bytes: | def _output(self, *args) -> bytes: | ||||
"""Return the outpout of a `hg` call.""" | """Return the outpout of a `hg` call.""" | ||||
return subprocess.check_output( | return subprocess.check_output( | ||||
["hg", *args], cwd=self._root, env=self._get_env() | ["hg", *args], cwd=self._root, env=get_minimum_env() | ||||
) | ) | ||||
def _call(self, *args) -> None: | def _call(self, *args) -> None: | ||||
"""Perform a `hg` call.""" | """Perform a `hg` call.""" | ||||
subprocess.check_call( | subprocess.check_call( | ||||
["hg", *args], | ["hg", *args], | ||||
cwd=self._root, | cwd=self._root, | ||||
stderr=subprocess.PIPE, | stderr=subprocess.PIPE, | ||||
stdout=subprocess.PIPE, | stdout=subprocess.PIPE, | ||||
env=self._get_env(), | env=get_minimum_env(), | ||||
) | ) | ||||
def _get_env(self) -> Dict[str, str]: | |||||
"""Return the smallest viable environment for `hg` suprocesses""" | |||||
env = { | |||||
"PATH": os.environ["PATH"], | |||||
"HGPLAIN": "", # Tells Mercurial to disable output customization | |||||
"HGRCPATH": "", # Tells Mercurial to ignore config files | |||||
} | |||||
return env | |||||
def root(self) -> Path: | def root(self) -> Path: | ||||
"""Return the root of the Mercurial repository.""" | """Return the root of the Mercurial repository.""" | ||||
return self._root | return self._root | ||||
def log(self, rev: Optional[Union[bytes, str]] = None) -> List[HgRevision]: | def log(self, rev: Optional[Union[bytes, str]] = None) -> List[HgRevision]: | ||||
"""Return the specified revisions of the Mercurial repository. | """Return the specified revisions of the Mercurial repository. | ||||
Mercurial revsets are supported. (See `hg help revsets`) | Mercurial revsets are supported. (See `hg help revsets`) | ||||
▲ Show 20 Lines • Show All 324 Lines • Show Last 20 Lines |