Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/svn/replay.py
# Copyright (C) 2016-2022 The Software Heritage developers | # Copyright (C) 2016-2022 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
"""Remote Access client to svn server. | """Remote Access client to svn server. | ||||
""" | """ | ||||
from __future__ import annotations | from __future__ import annotations | ||||
import codecs | import codecs | ||||
from collections import defaultdict | from collections import defaultdict | ||||
from dataclasses import dataclass, field | from dataclasses import dataclass, field | ||||
from distutils.dir_util import copy_tree | |||||
from itertools import chain | from itertools import chain | ||||
import logging | import logging | ||||
import os | import os | ||||
import shutil | import shutil | ||||
import tempfile | import tempfile | ||||
from typing import ( | from typing import ( | ||||
TYPE_CHECKING, | TYPE_CHECKING, | ||||
Any, | Any, | ||||
▲ Show 20 Lines • Show All 618 Lines • ▼ Show 20 Lines | ) -> None: | ||||
logger.debug("Exporting external %s to path %s", external_url, path) | logger.debug("Exporting external %s to path %s", external_url, path) | ||||
self.svnrepo.client.export( | self.svnrepo.client.export( | ||||
external_url.rstrip("/"), | external_url.rstrip("/"), | ||||
to=temp_path, | to=temp_path, | ||||
peg_rev=revision, | peg_rev=revision, | ||||
ignore_keywords=True, | ignore_keywords=True, | ||||
) | ) | ||||
self.editor.externals_cache[external] = temp_path | self.editor.externals_cache[external] = temp_path | ||||
self.editor.valid_externals[dest_fullpath] = ( | |||||
external_url, | |||||
relative_url, | |||||
) | |||||
except SubversionException as se: | except SubversionException as se: | ||||
# external no longer available (404) | # external no longer available (404) | ||||
logger.debug(se) | logger.debug(se) | ||||
self.editor.dead_externals.add(external_url) | self.editor.dead_externals.add(external_url) | ||||
else: | else: | ||||
temp_path = self.editor.externals_cache[external] | temp_path = self.editor.externals_cache[external] | ||||
# subversion export will always create the subdirectories of the external | # subversion export will always create the subdirectories of the external | ||||
# path regardless the validity of the remote URL | # path regardless the validity of the remote URL | ||||
dest_path_split = dest_path.split(b"/") | dest_path_split = dest_path.split(b"/") | ||||
current_path = self.path | current_path = self.path | ||||
self.add_directory(os.fsdecode(current_path)) | self.add_directory(os.fsdecode(current_path)) | ||||
for subpath in dest_path_split[:-1]: | for subpath in dest_path_split[:-1]: | ||||
current_path = os.path.join(current_path, subpath) | current_path = os.path.join(current_path, subpath) | ||||
self.add_directory(os.fsdecode(current_path)) | self.add_directory(os.fsdecode(current_path)) | ||||
if os.path.exists(temp_path): | if os.path.exists(temp_path): | ||||
# external successfully exported | # external successfully exported | ||||
# remove previous path in from_disk model | # remove previous path in from_disk model | ||||
self.remove_child(dest_fullpath) | self.remove_external_path(dest_path, remove_subpaths=False) | ||||
# mark external as valid | |||||
self.editor.valid_externals[dest_fullpath] = ( | |||||
external_url, | |||||
relative_url, | |||||
) | |||||
# copy exported path to reconstructed filesystem | # copy exported path to reconstructed filesystem | ||||
fullpath = os.path.join(self.rootpath, dest_fullpath) | fullpath = os.path.join(self.rootpath, dest_fullpath) | ||||
self.editor.external_paths.add(dest_fullpath) | |||||
self.editor.modified_paths.add(dest_fullpath) | |||||
# update from_disk model and store external paths | # update from_disk model and store external paths | ||||
self.editor.external_paths.add(dest_fullpath) | self.editor.external_paths.add(dest_fullpath) | ||||
self.editor.modified_paths.add(dest_fullpath) | |||||
if os.path.isfile(temp_path): | if os.path.isfile(temp_path): | ||||
if os.path.islink(fullpath): | |||||
# remove destination file if it is a link | |||||
os.remove(fullpath) | |||||
shutil.copy(os.fsdecode(temp_path), os.fsdecode(fullpath)) | shutil.copy(os.fsdecode(temp_path), os.fsdecode(fullpath)) | ||||
self.directory[dest_fullpath] = from_disk.Content.from_file( | self.directory[dest_fullpath] = from_disk.Content.from_file( | ||||
path=fullpath | path=fullpath | ||||
) | ) | ||||
else: | else: | ||||
shutil.copytree( | self.add_directory(os.fsdecode(dest_fullpath)) | ||||
os.fsdecode(temp_path), os.fsdecode(fullpath), symlinks=True | |||||
# copy_tree needs sub-directories to exist in destination | |||||
for root, dirs, files in os.walk(temp_path): | |||||
for dir in dirs: | |||||
vlorentz: just in case names clash | |||||
Done Inline Actionsright, thanks ! anlambert: right, thanks ! | |||||
Done Inline ActionsIn fact temp_path is a path generated by the tempfile module so we are good here. anlambert: In fact `temp_path` is a path generated by the `tempfile` module so we are good here. | |||||
Not Done Inline ActionsI'm aware, but there is still a very low likelyhood of it happening vlorentz: I'm aware, but there is still a very low likelyhood of it happening | |||||
subdir = os.path.join(root, dir).replace(temp_path + b"/", b"") | |||||
self.add_directory( | |||||
os.fsdecode(os.path.join(dest_fullpath, subdir)) | |||||
) | ) | ||||
copy_tree( | |||||
os.fsdecode(temp_path), | |||||
os.fsdecode(fullpath), | |||||
preserve_symlinks=True, | |||||
) | |||||
# TODO: replace code above by the line below once we use Python >= 3.8 in production # noqa | |||||
# shutil.copytree(temp_path, fullpath, symlinks=True, dirs_exist_ok=True) # noqa | |||||
self.directory[dest_fullpath] = from_disk.Directory.from_disk( | self.directory[dest_fullpath] = from_disk.Directory.from_disk( | ||||
path=fullpath | path=fullpath | ||||
) | ) | ||||
external_paths = set() | external_paths = set() | ||||
for root, dirs, files in os.walk(fullpath): | for root, dirs, files in os.walk(fullpath): | ||||
external_paths.update( | external_paths.update( | ||||
[ | [ | ||||
os.path.join(root.replace(self.rootpath + b"/", b""), p) | os.path.join(root.replace(self.rootpath + b"/", b""), p) | ||||
for p in chain(dirs, files) | for p in chain(dirs, files) | ||||
] | ] | ||||
) | ) | ||||
self.editor.external_paths.update(external_paths) | self.editor.external_paths.update(external_paths) | ||||
self.editor.modified_paths.update(external_paths) | self.editor.modified_paths.update(external_paths) | ||||
# ensure hash update for the directory with externals set | # ensure hash update for the directory with externals set | ||||
self.directory[self.path].update_hash(force=True) | self.directory[self.path].update_hash(force=True) | ||||
def remove_external_path(self, external_path: bytes) -> None: | def remove_external_path(self, external_path: bytes, remove_subpaths=True) -> None: | ||||
"""Remove a previously exported SVN external path from | """Remove a previously exported SVN external path from | ||||
the reconstruted filesystem. | the reconstruted filesystem. | ||||
""" | """ | ||||
fullpath = os.path.join(self.path, external_path) | fullpath = os.path.join(self.path, external_path) | ||||
self.remove_child(fullpath) | self.remove_child(fullpath) | ||||
self.editor.external_paths.discard(fullpath) | self.editor.external_paths.discard(fullpath) | ||||
self.editor.valid_externals.pop(fullpath, None) | self.editor.valid_externals.pop(fullpath, None) | ||||
for path in list(self.editor.external_paths): | for path in list(self.editor.external_paths): | ||||
if path.startswith(fullpath + b"/"): | if path.startswith(fullpath + b"/"): | ||||
self.editor.external_paths.remove(path) | self.editor.external_paths.remove(path) | ||||
if remove_subpaths: | |||||
subpath_split = external_path.split(b"/")[:-1] | subpath_split = external_path.split(b"/")[:-1] | ||||
for i in reversed(range(1, len(subpath_split) + 1)): | for i in reversed(range(1, len(subpath_split) + 1)): | ||||
# delete external sub-directory only if it is not versioned | # delete external sub-directory only if it is not versioned | ||||
subpath = os.path.join(self.path, b"/".join(subpath_split[0:i])) | subpath = os.path.join(self.path, b"/".join(subpath_split[0:i])) | ||||
try: | try: | ||||
self.svnrepo.client.info( | self.svnrepo.client.info( | ||||
svn_urljoin(self.svnrepo.remote_url, os.fsdecode(subpath)), | svn_urljoin(self.svnrepo.remote_url, os.fsdecode(subpath)), | ||||
peg_revision=self.editor.revnum, | peg_revision=self.editor.revnum, | ||||
revision=self.editor.revnum, | revision=self.editor.revnum, | ||||
) | ) | ||||
except SubversionException: | except SubversionException: | ||||
self.remove_child(subpath) | self.remove_child(subpath) | ||||
else: | else: | ||||
break | break | ||||
try: | try: | ||||
# externals can overlap with versioned files so we must restore | # externals can overlap with versioned files so we must restore | ||||
# them after removing the path above | # them after removing the path above | ||||
dest_path = os.path.join(self.rootpath, fullpath) | dest_path = os.path.join(self.rootpath, fullpath) | ||||
self.svnrepo.client.export( | self.svnrepo.client.export( | ||||
svn_urljoin(self.svnrepo.remote_url, os.fsdecode(fullpath)), | svn_urljoin(self.svnrepo.remote_url, os.fsdecode(fullpath)), | ||||
to=dest_path, | to=dest_path, | ||||
▲ Show 20 Lines • Show All 208 Lines • Show Last 20 Lines |
just in case names clash