Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/svn/ra.py
Show First 20 Lines • Show All 560 Lines • ▼ Show 20 Lines | def close(self): | ||||
if ( | if ( | ||||
path in prev_externals | path in prev_externals | ||||
and prev_externals[path] == external | and prev_externals[path] == external | ||||
and dest_fullpath in self.directory | and dest_fullpath in self.directory | ||||
): | ): | ||||
# external already exported, nothing to do | # external already exported, nothing to do | ||||
continue | continue | ||||
if external not in self.editor.externals_cache: | |||||
try: | try: | ||||
# try to export external in a temporary path, destination path could | # try to export external in a temporary path, destination path could | ||||
# be versioned and must be overridden only if the external URL is | # be versioned and must be overridden only if the external URL is | ||||
# still valid | # still valid | ||||
temp_dir = os.fsencode(tempfile.mkdtemp()) | temp_dir = os.fsencode( | ||||
tempfile.mkdtemp(dir=self.editor.externals_cache_dir) | |||||
) | |||||
temp_path = os.path.join(temp_dir, dest_path) | temp_path = os.path.join(temp_dir, dest_path) | ||||
os.makedirs(b"/".join(temp_path.split(b"/")[:-1]), exist_ok=True) | os.makedirs(b"/".join(temp_path.split(b"/")[:-1]), exist_ok=True) | ||||
if external_url not in self.editor.dead_externals: | if external_url not in self.editor.dead_externals: | ||||
logger.debug("Exporting external %s to path %s", external_url, path) | logger.debug( | ||||
"Exporting external %s to path %s", external_url, path | |||||
) | |||||
self.svnrepo.client.export( | self.svnrepo.client.export( | ||||
external_url.rstrip("/"), | external_url.rstrip("/"), | ||||
to=temp_path, | to=temp_path, | ||||
rev=revision, | rev=revision, | ||||
ignore_keywords=True, | ignore_keywords=True, | ||||
) | ) | ||||
self.editor.externals_cache[external] = temp_path | |||||
self.editor.valid_externals[dest_fullpath] = ( | self.editor.valid_externals[dest_fullpath] = ( | ||||
external_url, | external_url, | ||||
relative_url, | relative_url, | ||||
) | ) | ||||
except SubversionException as se: | except SubversionException as se: | ||||
# external no longer available (404) | # external no longer available (404) | ||||
logger.debug(se) | logger.debug(se) | ||||
self.editor.dead_externals.add(external_url) | self.editor.dead_externals.add(external_url) | ||||
else: | |||||
temp_path = self.editor.externals_cache[external] | |||||
# subversion export will always create the subdirectories of the external | # subversion export will always create the subdirectories of the external | ||||
# path regardless the validity of the remote URL | # path regardless the validity of the remote URL | ||||
dest_path_split = dest_path.split(b"/") | dest_path_split = dest_path.split(b"/") | ||||
current_path = self.path | current_path = self.path | ||||
self.add_directory(current_path) | self.add_directory(current_path) | ||||
for subpath in dest_path_split[:-1]: | for subpath in dest_path_split[:-1]: | ||||
current_path = os.path.join(current_path, subpath) | current_path = os.path.join(current_path, subpath) | ||||
self.add_directory(current_path) | self.add_directory(current_path) | ||||
if os.path.exists(temp_path): | if os.path.exists(temp_path): | ||||
# external successfully exported | # external successfully exported | ||||
# remove previous path in from_disk model | # remove previous path in from_disk model | ||||
self.remove_child(dest_fullpath) | self.remove_child(dest_fullpath) | ||||
# move exported path to reconstructed filesystem | # copy exported path to reconstructed filesystem | ||||
fullpath = os.path.join(self.rootpath, dest_fullpath) | fullpath = os.path.join(self.rootpath, dest_fullpath) | ||||
shutil.move(temp_path, fullpath) | |||||
# update from_disk model and store external paths | # update from_disk model and store external paths | ||||
self.editor.external_paths.add(dest_fullpath) | self.editor.external_paths.add(dest_fullpath) | ||||
if os.path.isfile(fullpath): | if os.path.isfile(temp_path): | ||||
shutil.copy(temp_path, fullpath) | |||||
self.directory[dest_fullpath] = from_disk.Content.from_file( | self.directory[dest_fullpath] = from_disk.Content.from_file( | ||||
path=fullpath | path=fullpath | ||||
) | ) | ||||
else: | else: | ||||
vlorentz: does the loader ever mutate files? if not, you can use `os.link` instead of `shutil.copy`, and… | |||||
shutil.copytree(temp_path, fullpath, symlinks=True) | |||||
self.directory[dest_fullpath] = from_disk.Directory.from_disk( | self.directory[dest_fullpath] = from_disk.Directory.from_disk( | ||||
path=fullpath | path=fullpath | ||||
) | ) | ||||
for root, dirs, files in os.walk(fullpath): | for root, dirs, files in os.walk(fullpath): | ||||
self.editor.external_paths.update( | self.editor.external_paths.update( | ||||
[ | [ | ||||
os.path.join(root.replace(self.rootpath + b"/", b""), p) | os.path.join(root.replace(self.rootpath + b"/", b""), p) | ||||
for p in chain(dirs, files) | for p in chain(dirs, files) | ||||
▲ Show 20 Lines • Show All 54 Lines • ▼ Show 20 Lines | """Editor in charge of replaying svn events and computing objects | ||||
along. | along. | ||||
This implementation accounts for empty folder during hash | This implementation accounts for empty folder during hash | ||||
computations. | computations. | ||||
""" | """ | ||||
def __init__( | def __init__( | ||||
self, rootpath: bytes, directory: from_disk.Directory, svnrepo: SvnRepo | self, | ||||
rootpath: bytes, | |||||
directory: from_disk.Directory, | |||||
svnrepo: SvnRepo, | |||||
temp_dir: str, | |||||
): | ): | ||||
self.rootpath = rootpath | self.rootpath = rootpath | ||||
self.directory = directory | self.directory = directory | ||||
self.file_states: Dict[bytes, FileState] = defaultdict(FileState) | self.file_states: Dict[bytes, FileState] = defaultdict(FileState) | ||||
self.dir_states: Dict[bytes, DirState] = defaultdict(DirState) | self.dir_states: Dict[bytes, DirState] = defaultdict(DirState) | ||||
self.external_paths: Set[bytes] = set() | self.external_paths: Set[bytes] = set() | ||||
self.valid_externals: Dict[bytes, Tuple[str, bool]] = {} | self.valid_externals: Dict[bytes, Tuple[str, bool]] = {} | ||||
self.dead_externals: Set[str] = set() | self.dead_externals: Set[str] = set() | ||||
self.externals_cache_dir = tempfile.mkdtemp(dir=temp_dir) | |||||
self.externals_cache: Dict[Tuple[str, Optional[int]], str] = {} | |||||
self.svnrepo = svnrepo | self.svnrepo = svnrepo | ||||
self.revnum = None | self.revnum = None | ||||
def set_target_revision(self, revnum) -> None: | def set_target_revision(self, revnum) -> None: | ||||
self.revnum = revnum | self.revnum = revnum | ||||
def abort(self) -> None: | def abort(self) -> None: | ||||
pass | pass | ||||
Show All 16 Lines | class Replay: | ||||
"""Replay class. | """Replay class. | ||||
""" | """ | ||||
def __init__( | def __init__( | ||||
self, | self, | ||||
conn: RemoteAccess, | conn: RemoteAccess, | ||||
rootpath: bytes, | rootpath: bytes, | ||||
svnrepo: SvnRepo, | svnrepo: SvnRepo, | ||||
temp_dir: str, | |||||
directory: Optional[from_disk.Directory] = None, | directory: Optional[from_disk.Directory] = None, | ||||
): | ): | ||||
self.conn = conn | self.conn = conn | ||||
self.rootpath = rootpath | self.rootpath = rootpath | ||||
if directory is None: | if directory is None: | ||||
directory = from_disk.Directory() | directory = from_disk.Directory() | ||||
self.directory = directory | self.directory = directory | ||||
self.editor = Editor(rootpath=rootpath, directory=directory, svnrepo=svnrepo) | self.editor = Editor( | ||||
rootpath=rootpath, directory=directory, svnrepo=svnrepo, temp_dir=temp_dir | |||||
) | |||||
def replay(self, rev: int) -> from_disk.Directory: | def replay(self, rev: int) -> from_disk.Directory: | ||||
"""Replay svn actions between rev and rev+1. | """Replay svn actions between rev and rev+1. | ||||
This method updates in place the self.editor.directory, as well as the | This method updates in place the self.editor.directory, as well as the | ||||
filesystem. | filesystem. | ||||
Returns: | Returns: | ||||
▲ Show 20 Lines • Show All 101 Lines • Show Last 20 Lines |
does the loader ever mutate files? if not, you can use os.link instead of shutil.copy, and pass copy_function=os.link to shutil.copytree, to create hard links instead of actual copies, which should save some time and space