Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/svn/loader.py
# Copyright (C) 2015-2022 The Software Heritage developers | # Copyright (C) 2015-2022 The Software Heritage developers | |||||||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | |||||||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | |||||||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | |||||||||
"""Loader in charge of injecting either new or existing svn mirrors to | """Loader in charge of injecting either new or existing svn mirrors to | |||||||||
swh-storage. | swh-storage. | |||||||||
""" | """ | |||||||||
from datetime import datetime | from datetime import datetime | |||||||||
import difflib | ||||||||||
import os | import os | |||||||||
import pty | import pty | |||||||||
import re | import re | |||||||||
import shutil | import shutil | |||||||||
from subprocess import PIPE, Popen | from subprocess import PIPE, Popen | |||||||||
import tempfile | import tempfile | |||||||||
from typing import Any, Dict, Iterator, List, Optional, Sequence, Tuple | from typing import Any, Dict, Iterator, List, Optional, Sequence, Tuple | |||||||||
Show All 17 Lines | ||||||||||
from swh.storage.interface import StorageInterface | from swh.storage.interface import StorageInterface | |||||||||
from . import converters | from . import converters | |||||||||
from .exception import SvnLoaderHistoryAltered, SvnLoaderUneventful | from .exception import SvnLoaderHistoryAltered, SvnLoaderUneventful | |||||||||
from .utils import ( | from .utils import ( | |||||||||
OutputStream, | OutputStream, | |||||||||
init_svn_repo_from_archive_dump, | init_svn_repo_from_archive_dump, | |||||||||
init_svn_repo_from_dump, | init_svn_repo_from_dump, | |||||||||
svn_urljoin, | ||||||||||
) | ) | |||||||||
DEFAULT_BRANCH = b"HEAD" | DEFAULT_BRANCH = b"HEAD" | |||||||||
TEMPORARY_DIR_PREFIX_PATTERN = "swh.loader.svn." | TEMPORARY_DIR_PREFIX_PATTERN = "swh.loader.svn." | |||||||||
SUBVERSION_ERROR = re.compile(r".*(E[0-9]{6}):.*") | SUBVERSION_ERROR = re.compile(r".*(E[0-9]{6}):.*") | |||||||||
SUBVERSION_NOT_FOUND = "E170013" | SUBVERSION_NOT_FOUND = "E170013" | |||||||||
▲ Show 20 Lines • Show All 274 Lines • ▼ Show 20 Lines | ) -> None: | |||||||||
path, | path, | |||||||||
) | ) | |||||||||
elif dir[path].hash != checked_dir[path].hash: | elif dir[path].hash != checked_dir[path].hash: | |||||||||
self.log.debug( | self.log.debug( | |||||||||
"%s with path %s has different hash in reconstructed repository filesystem", # noqa | "%s with path %s has different hash in reconstructed repository filesystem", # noqa | |||||||||
obj.object_type, # type: ignore | obj.object_type, # type: ignore | |||||||||
path, | path, | |||||||||
) | ) | |||||||||
if obj.object_type == "content": # type: ignore | ||||||||||
ardumont: Given the following instructions seem expensive (svn export, diff computation, ...) you… | ||||||||||
ardumontUnsubmitted Not Done Inline Actions
I think that's ^ ardumont: I think that's ^ | ||||||||||
anlambertAuthorUnsubmitted Done Inline ActionsThat code path is only executed when setting the debug parameter of the loader to True so we are good here. anlambert: That code path is only executed when setting the `debug` parameter of the loader to `True` so… | ||||||||||
self.log.debug( | ||||||||||
"expected sha1: %s, actual sha1: %s", | ||||||||||
hashutil.hash_to_hex(checked_dir[path].data["sha1"]), | ||||||||||
hashutil.hash_to_hex(dir[path].data["sha1"]), | ||||||||||
) | ||||||||||
# compute and display diff between contents | ||||||||||
file_path = ( | ||||||||||
checked_dir[path] | ||||||||||
.data["path"] | ||||||||||
.replace(checked_dir.data["path"], b"") | ||||||||||
).decode() | ||||||||||
with tempfile.TemporaryDirectory() as tmpdir: | ||||||||||
export_path = os.path.join( | ||||||||||
tmpdir, os.path.basename(file_path) | ||||||||||
) | ||||||||||
assert self.svnrepo is not None | ||||||||||
self.svnrepo.export( | ||||||||||
url=svn_urljoin(self.svnrepo.remote_url, file_path), | ||||||||||
to=export_path, | ||||||||||
rev=rev, | ||||||||||
peg_rev=rev, | ||||||||||
ignore_keywords=True, | ||||||||||
overwrite=True, | ||||||||||
) | ||||||||||
with open(export_path, "rb") as exported_file, open( | ||||||||||
dir[path].data["path"], "rb" | ||||||||||
) as checkout_file: | ||||||||||
diff_lines = difflib.diff_bytes( | ||||||||||
difflib.unified_diff, | ||||||||||
exported_file.read().split(b"\n"), | ||||||||||
checkout_file.read().split(b"\n"), | ||||||||||
) | ||||||||||
self.log.debug( | ||||||||||
"below is diff between files:\n" | ||||||||||
+ os.fsdecode(b"\n".join(list(diff_lines)[2:])) | ||||||||||
) | ||||||||||
err = ( | err = ( | |||||||||
"Hash tree computation divergence detected at revision %s " | "Hash tree computation divergence detected at revision %s " | |||||||||
"(%s != %s), stopping!" | "(%s != %s), stopping!" | |||||||||
% ( | % ( | |||||||||
rev, | rev, | |||||||||
hashutil.hash_to_hex(dir_id), | hashutil.hash_to_hex(dir_id), | |||||||||
hashutil.hash_to_hex(checked_dir_id), | hashutil.hash_to_hex(checked_dir_id), | |||||||||
) | ) | |||||||||
▲ Show 20 Lines • Show All 516 Lines • Show Last 20 Lines |
Given the following instructions seem expensive (svn export, diff computation, ...) you probably want to check the logging level against debug too (in the conditional).
Otherwise, you the diff computation ends up being unused.