Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/svn/replay.py
Show First 20 Lines • Show All 355 Lines • ▼ Show 20 Lines | |||||
@dataclass | @dataclass | ||||
class DirState: | class DirState: | ||||
"""Persists some directory states (eg. externals) across revisions while | """Persists some directory states (eg. externals) across revisions while | ||||
replaying them.""" | replaying them.""" | ||||
externals: Dict[str, List[ExternalDefinition]] = field(default_factory=dict) | externals: Dict[str, List[ExternalDefinition]] = field(default_factory=dict) | ||||
"""Map a path in the directory to a list of (external_url, revision, relative_url) | """Map a path in the directory to a list of (external_url, revision, relative_url) | ||||
targeting it""" | targeting it""" | ||||
externals_paths: Set[bytes] = field(default_factory=set) | |||||
"""Keep track of all external paths reachable from the directory""" | |||||
class DirEditor: | class DirEditor: | ||||
"""Directory Editor in charge of updating directory hashes computation. | """Directory Editor in charge of updating directory hashes computation. | ||||
This implementation includes empty folder in the hash computation. | This implementation includes empty folder in the hash computation. | ||||
""" | """ | ||||
Show All 36 Lines | def remove_child(self, path: bytes) -> None: | ||||
This function takes also care of removing the link between the | This function takes also care of removing the link between the | ||||
child and the parent. | child and the parent. | ||||
Args: | Args: | ||||
path: to remove from the current objects. | path: to remove from the current objects. | ||||
""" | """ | ||||
try: | if path in self.directory: | ||||
entry_removed = self.directory[path] | entry_removed = self.directory[path] | ||||
except KeyError: | |||||
entry_removed = None | |||||
else: | |||||
del self.directory[path] | del self.directory[path] | ||||
fpath = os.path.join(self.rootpath, path) | fpath = os.path.join(self.rootpath, path) | ||||
if isinstance(entry_removed, from_disk.Directory): | if isinstance(entry_removed, from_disk.Directory): | ||||
shutil.rmtree(fpath) | shutil.rmtree(fpath) | ||||
else: | else: | ||||
os.remove(fpath) | os.remove(fpath) | ||||
# when deleting a directory ensure to remove any svn property for the | # when deleting a directory ensure to remove any svn property for the | ||||
▲ Show 20 Lines • Show All 108 Lines • ▼ Show 20 Lines | def change_prop(self, key: str, value: str) -> None: | ||||
for path in externals.keys(): | for path in externals.keys(): | ||||
self.remove_external_path(os.fsencode(path)) | self.remove_external_path(os.fsencode(path)) | ||||
self.dir_states[self.path].externals = {} | self.dir_states[self.path].externals = {} | ||||
def delete_entry(self, path: str, revision: int) -> None: | def delete_entry(self, path: str, revision: int) -> None: | ||||
"""Remove a path.""" | """Remove a path.""" | ||||
path_bytes = os.fsencode(path) | path_bytes = os.fsencode(path) | ||||
if path_bytes not in self.editor.external_paths: | |||||
fullpath = os.path.join(self.rootpath, path_bytes) | fullpath = os.path.join(self.rootpath, path_bytes) | ||||
if os.path.isdir(fullpath): | |||||
# remove all external paths associated to the removed directory | |||||
# (we cannot simply remove a root external directory as externals | |||||
# paths associated to ancestor directories can overlap) | |||||
for external_path in self.dir_states[path_bytes].externals_paths: | |||||
self.remove_external_path( | |||||
external_path, | |||||
root_path=path_bytes, | |||||
remove_subpaths=False, | |||||
force=True, | |||||
) | |||||
if path_bytes not in self.editor.external_paths: | |||||
self.file_states.pop(fullpath, None) | self.file_states.pop(fullpath, None) | ||||
self.remove_child(path_bytes) | self.remove_child(path_bytes) | ||||
elif os.path.isdir(fullpath): | |||||
# versioned and external paths can overlap so we need to iterate on | |||||
# all subpaths to check which ones to remove | |||||
for root, dirs, files in os.walk(fullpath): | |||||
for p in chain(dirs, files): | |||||
full_repo_path = os.path.join(root, p) | |||||
repo_path = full_repo_path.replace(self.rootpath + b"/", b"") | |||||
if repo_path not in self.editor.external_paths: | |||||
self.file_states.pop(full_repo_path, None) | |||||
self.remove_child(repo_path) | |||||
def close(self): | def close(self): | ||||
"""Function called when we finish processing a repository. | """Function called when we finish processing a repository. | ||||
SVN external definitions are processed by it. | SVN external definitions are processed by it. | ||||
""" | """ | ||||
prev_externals = self.dir_states[self.path].externals | prev_externals = self.dir_states[self.path].externals | ||||
▲ Show 20 Lines • Show All 152 Lines • ▼ Show 20 Lines | ) -> None: | ||||
self.editor.valid_externals[dest_fullpath] = ( | self.editor.valid_externals[dest_fullpath] = ( | ||||
external_url, | external_url, | ||||
relative_url, | relative_url, | ||||
) | ) | ||||
# copy exported path to reconstructed filesystem | # copy exported path to reconstructed filesystem | ||||
fullpath = os.path.join(self.rootpath, dest_fullpath) | fullpath = os.path.join(self.rootpath, dest_fullpath) | ||||
# update from_disk model and store external paths | |||||
self.editor.external_paths[dest_fullpath] += 1 | |||||
if os.path.isfile(temp_path): | if os.path.isfile(temp_path): | ||||
if os.path.islink(fullpath): | if os.path.islink(fullpath): | ||||
# remove destination file if it is a link | # remove destination file if it is a link | ||||
os.remove(fullpath) | os.remove(fullpath) | ||||
shutil.copy(os.fsdecode(temp_path), os.fsdecode(fullpath)) | shutil.copy(os.fsdecode(temp_path), os.fsdecode(fullpath)) | ||||
self.directory[dest_fullpath] = from_disk.Content.from_file( | self.directory[dest_fullpath] = from_disk.Content.from_file( | ||||
path=fullpath | path=fullpath | ||||
) | ) | ||||
Show All 19 Lines | ) -> None: | ||||
) | ) | ||||
# TODO: replace code above by the line below once we use Python >= 3.8 in production # noqa | # TODO: replace code above by the line below once we use Python >= 3.8 in production # noqa | ||||
# shutil.copytree(temp_path, fullpath, symlinks=True, dirs_exist_ok=True) # noqa | # shutil.copytree(temp_path, fullpath, symlinks=True, dirs_exist_ok=True) # noqa | ||||
self.directory[dest_fullpath] = from_disk.Directory.from_disk( | self.directory[dest_fullpath] = from_disk.Directory.from_disk( | ||||
path=fullpath | path=fullpath | ||||
) | ) | ||||
# update set of external paths reachable from the directory | |||||
external_paths = set() | external_paths = set() | ||||
for root, dirs, files in os.walk(fullpath): | dest_path_part = dest_path.split(b"/") | ||||
for i in range(1, len(dest_path_part) + 1): | |||||
external_paths.add(b"/".join(dest_path_part[:i])) | |||||
for root, dirs, files in os.walk(temp_path): | |||||
external_paths.update( | external_paths.update( | ||||
[ | [ | ||||
os.path.join(root.replace(self.rootpath + b"/", b""), p) | os.path.join( | ||||
dest_path, | |||||
os.path.join(root, p).replace(temp_path, b"").strip(b"/"), | |||||
) | |||||
for p in chain(dirs, files) | for p in chain(dirs, files) | ||||
] | ] | ||||
) | ) | ||||
self.dir_states[self.path].externals_paths.update(external_paths) | |||||
for external_path in external_paths: | for external_path in external_paths: | ||||
self.editor.external_paths[external_path] += 1 | self.editor.external_paths[os.path.join(self.path, external_path)] += 1 | ||||
# ensure hash update for the directory with externals set | # ensure hash update for the directory with externals set | ||||
self.directory[self.path].update_hash(force=True) | self.directory[self.path].update_hash(force=True) | ||||
def remove_external_path( | def remove_external_path( | ||||
self, external_path: bytes, remove_subpaths: bool = True, force: bool = False | self, | ||||
external_path: bytes, | |||||
remove_subpaths: bool = True, | |||||
force: bool = False, | |||||
root_path: Optional[bytes] = None, | |||||
) -> None: | ) -> None: | ||||
"""Remove a previously exported SVN external path from | """Remove a previously exported SVN external path from | ||||
the reconstructed filesystem. | the reconstructed filesystem. | ||||
""" | """ | ||||
fullpath = os.path.join(self.path, external_path) | path = root_path if root_path else self.path | ||||
fullpath = os.path.join(path, external_path) | |||||
# decrement number of references for external path when we really remove it | # decrement number of references for external path when we really remove it | ||||
# (when remove_subpaths is False, we just cleanup the external path before | # (when remove_subpaths is False, we just cleanup the external path before | ||||
# copying exported paths in it) | # copying exported paths in it) | ||||
if fullpath in self.editor.external_paths and remove_subpaths: | if force or (fullpath in self.editor.external_paths and remove_subpaths): | ||||
self.editor.external_paths[fullpath] -= 1 | self.editor.external_paths[fullpath] -= 1 | ||||
if ( | if ( | ||||
force | fullpath in self.editor.external_paths | ||||
or fullpath in self.editor.external_paths | |||||
and self.editor.external_paths[fullpath] == 0 | and self.editor.external_paths[fullpath] == 0 | ||||
): | ): | ||||
self.remove_child(fullpath) | self.remove_child(fullpath) | ||||
self.editor.external_paths.pop(fullpath, None) | self.editor.external_paths.pop(fullpath, None) | ||||
self.editor.valid_externals.pop(fullpath, None) | self.editor.valid_externals.pop(fullpath, None) | ||||
for path in list(self.editor.external_paths): | for path in list(self.editor.external_paths): | ||||
if path.startswith(fullpath + b"/"): | if path.startswith(fullpath + b"/"): | ||||
self.editor.external_paths[path] -= 1 | self.editor.external_paths[path] -= 1 | ||||
if self.editor.external_paths[path] == 0: | if self.editor.external_paths[path] == 0: | ||||
self.editor.external_paths.pop(path) | self.editor.external_paths.pop(path) | ||||
if remove_subpaths: | if remove_subpaths: | ||||
subpath_split = external_path.split(b"/")[:-1] | subpath_split = fullpath.split(b"/")[:-1] | ||||
for i in reversed(range(1, len(subpath_split) + 1)): | for i in reversed(range(1, len(subpath_split) + 1)): | ||||
# delete external sub-directory only if it is not versioned | # delete external sub-directory only if it is not versioned | ||||
subpath = os.path.join(self.path, b"/".join(subpath_split[0:i])) | subpath = b"/".join(subpath_split[0:i]) | ||||
try: | try: | ||||
self.svnrepo.client.info( | self.svnrepo.client.info( | ||||
svn_urljoin(self.svnrepo.remote_url, os.fsdecode(subpath)), | svn_urljoin(self.svnrepo.remote_url, os.fsdecode(subpath)), | ||||
peg_revision=self.editor.revnum, | peg_revision=self.editor.revnum, | ||||
revision=self.editor.revnum, | revision=self.editor.revnum, | ||||
) | ) | ||||
except SubversionException: | except SubversionException: | ||||
self.remove_child(subpath) | self.remove_child(subpath) | ||||
▲ Show 20 Lines • Show All 212 Lines • Show Last 20 Lines |