Page MenuHomeSoftware Heritage
Paste P1240

subversion loader svn:exernals support (quick and dirty POC)
ActivePublic

Authored by anlambert on Dec 8 2021, 1:42 PM.
diff --git a/swh/loader/svn/ra.py b/swh/loader/svn/ra.py
index 8a51c49..d88496f 100644
--- a/swh/loader/svn/ra.py
+++ b/swh/loader/svn/ra.py
@@ -11,6 +11,7 @@ from __future__ import annotations
import codecs
import dataclasses
+import logging
import os
import shutil
import tempfile
@@ -28,7 +29,8 @@ from typing import (
)
import click
-from subvertpy import delta, properties
+from subvertpy import SubversionException, delta, properties
+from subvertpy.client import Client
from subvertpy.ra import Auth, RemoteAccess, get_username_provider
from swh.model import from_disk, hashutil
@@ -39,6 +41,8 @@ if TYPE_CHECKING:
_eol_style = {"native": b"\n", "CRLF": b"\r\n", "LF": b"\n", "CR": b"\r"}
+logger = logging.getLogger(__name__)
+
def _normalize_line_endings(lines: bytes, eol_style: str = "native") -> bytes:
r"""Normalize line endings to unix (\\n), windows (\\r\\n) or mac (\\r).
@@ -196,6 +200,10 @@ class FileEditor:
self.svnrepo = svnrepo
def change_prop(self, key: str, value: str) -> None:
+ logger.debug(
+ f"Setting property '{key}' with value '{value}' "
+ f"on path '{self.path}' detected."
+ )
if key == properties.PROP_EXECUTABLE:
if value is None: # bit flip off
self.state.executable = NOEXEC_FLAG
@@ -332,41 +340,31 @@ class FileEditor:
self.directory[self.path] = from_disk.Content.from_file(path=self.fullpath)
-class BaseDirEditor:
- """Base class implementation of dir editor.
-
- see :class:`DirEditor` for an implementation that hashes every
- directory encountered.
-
- Instantiate a new class inheriting from this class and define the following
- functions::
-
- def update_checksum(self):
- # Compute the checksums at current state
-
- def open_directory(self, *args):
- # Update an existing folder.
+class DirEditor:
+ """Directory Editor in charge of updating directory hashes computation.
- def add_directory(self, *args):
- # Add a new one.
+ This implementation includes empty folder in the hash computation.
"""
- __slots__ = ["directory", "rootpath", "svnrepo"]
+ __slots__ = ["directory", "rootpath", "path", "file_states", "svnrepo", "externals"]
def __init__(
self,
directory: from_disk.Directory,
rootpath: bytes,
+ path: bytes,
file_states: Dict[bytes, FileState],
svnrepo: SvnRepo,
):
self.directory = directory
self.rootpath = rootpath
+ self.path = path
# build directory on init
os.makedirs(rootpath, exist_ok=True)
self.file_states = file_states
self.svnrepo = svnrepo
+ self.externals: List[Tuple[str, str]] = []
def remove_child(self, path: bytes) -> None:
"""Remove a path from the current objects.
@@ -400,14 +398,45 @@ class BaseDirEditor:
if state_path.startswith(fullpath + b"/"):
del self.file_states[state_path]
- def update_checksum(self):
- raise NotImplementedError("This should be implemented.")
+ def update_checksum(self) -> None:
+ """Update the root path self.path's checksums according to the
+ children's objects.
- def open_directory(self, *args):
- raise NotImplementedError("This should be implemented.")
+ This function is expected to be called when the folder has
+ been completely 'walked'.
+
+ """
+ pass
- def add_directory(self, path: str, *args):
- raise NotImplementedError("This should be implemented.")
+ def open_directory(self, path: str, *args) -> DirEditor:
+ """Updating existing directory.
+
+ """
+ return DirEditor(
+ self.directory,
+ self.rootpath,
+ os.fsencode(path),
+ self.file_states,
+ svnrepo=self.svnrepo,
+ )
+
+ def add_directory(self, path: str, *args) -> DirEditor:
+ """Adding a new directory.
+
+ """
+ path_bytes = os.fsencode(path)
+ try:
+ os.makedirs(os.path.join(self.rootpath, path_bytes))
+ self.directory[path_bytes] = from_disk.Directory()
+ except FileExistsError:
+ pass
+ return DirEditor(
+ self.directory,
+ self.rootpath,
+ path_bytes,
+ self.file_states,
+ svnrepo=self.svnrepo,
+ )
def open_file(self, *args) -> FileEditor:
"""Updating existing file.
@@ -445,7 +474,23 @@ class BaseDirEditor:
"""
if key == properties.PROP_EXTERNALS:
- raise ValueError("Property '%s' detected. Not implemented yet." % key)
+ logger.debug(
+ f"Setting property '{key}' with value '{value}' "
+ f"on path '{self.path}' detected."
+ )
+ for external_line in value.split("\n"):
+ external_line = external_line.rstrip("\r")
+ if not external_line:
+ continue
+ path = None
+ remote_url = None
+ # revision = None
+ for external_token in " ".join(external_line.split()).split(" "):
+ if external_token.startswith(("http://", "https://", "svn://")):
+ remote_url = external_token
+ else:
+ path = external_token
+ self.externals.append((remote_url, path))
def delete_entry(self, path: str, revision: int) -> None:
"""Remove a path.
@@ -459,42 +504,35 @@ class BaseDirEditor:
"""Function called when we finish walking a repository.
"""
+ for remote_url, path in self.externals:
+ logger.debug("%s %s", path, remote_url)
+ try:
+ auth = Auth([get_username_provider()])
+ client = Client(auth=auth)
+ fullpath = os.path.join(self.rootpath, self.path, path.encode())
+ client.export(
+ remote_url.encode(),
+ to=fullpath,
+ ignore_keywords=True,
+ overwrite=True,
+ )
+ dest_path = path.encode()
+ dest_path_split = dest_path.split(b"/")
+ current_path = self.path
+ for subpath in dest_path_split[:-1]:
+ current_path = os.path.join(current_path, subpath)
+ if current_path not in self.directory:
+ self.directory[current_path] = from_disk.Directory()
+ self.directory[
+ os.path.join(self.path, dest_path)
+ ] = from_disk.Directory.from_disk(path=fullpath)
+ except SubversionException as se:
+ # external no longer available
+ logger.debug(se)
+ pass
self.update_checksum()
-class DirEditor(BaseDirEditor):
- """Directory Editor in charge of updating directory hashes computation.
-
- This implementation includes empty folder in the hash computation.
-
- """
-
- def update_checksum(self) -> None:
- """Update the root path self.path's checksums according to the
- children's objects.
-
- This function is expected to be called when the folder has
- been completely 'walked'.
-
- """
- pass
-
- def open_directory(self, *args) -> DirEditor:
- """Updating existing directory.
-
- """
- return self
-
- def add_directory(self, path: str, *args) -> DirEditor:
- """Adding a new directory.
-
- """
- path_bytes = os.fsencode(path)
- os.makedirs(os.path.join(self.rootpath, path_bytes), exist_ok=True)
- self.directory[path_bytes] = from_disk.Directory()
- return self
-
-
class Editor:
"""Editor in charge of replaying svn events and computing objects
along.
@@ -526,6 +564,7 @@ class Editor:
return DirEditor(
self.directory,
rootpath=self.rootpath,
+ path=b"",
file_states=self.file_states,
svnrepo=self.svnrepo,
)

Event Timeline

anlambert changed the title of this paste from subversion loader sv:exernals support (quick and dirty POC) to subversion loader svn:exernals support (quick and dirty POC).