Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7123310
D5075.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
30 KB
Subscribers
None
D5075.diff
View Options
diff --git a/mypy.ini b/mypy.ini
--- a/mypy.ini
+++ b/mypy.ini
@@ -8,11 +8,14 @@
[mypy-celery.*]
ignore_missing_imports = True
-[mypy-subvertpy.*]
+[mypy-iso8601.*]
ignore_missing_imports = True
[mypy-pytest.*]
ignore_missing_imports = True
+[mypy-subvertpy.*]
+ignore_missing_imports = True
+
[mypy-swh.loader.*]
ignore_missing_imports = True
diff --git a/requirements-swh.txt b/requirements-swh.txt
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -1,4 +1,4 @@
swh.storage >= 0.11.3
swh.model >= 0.4.0
swh.scheduler >= 0.0.39
-swh.loader.core >= 0.17
+swh.loader.core >= 0.18
diff --git a/requirements.txt b/requirements.txt
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,3 +4,4 @@
click
python-dateutil
subvertpy >= 0.9.4
+iso8601
diff --git a/swh/loader/svn/loader.py b/swh/loader/svn/loader.py
--- a/swh/loader/svn/loader.py
+++ b/swh/loader/svn/loader.py
@@ -14,11 +14,12 @@
import shutil
from subprocess import Popen
import tempfile
-from typing import Any, Dict, Iterator, List, Optional, Tuple
+from typing import Dict, Iterator, List, Optional, Tuple
+
+import iso8601
from subvertpy import SubversionException
-from swh.core.config import merge_configs
from swh.loader.core.loader import BaseLoader
from swh.loader.core.utils import clean_dangling_folders
from swh.loader.exception import NotFound
@@ -35,6 +36,7 @@
TargetType,
)
from swh.storage.algos.snapshot import snapshot_get_latest
+from swh.storage.interface import StorageInterface
from . import converters
from .exception import SvnLoaderHistoryAltered, SvnLoaderUneventful
@@ -50,16 +52,6 @@
TEMPORARY_DIR_PREFIX_PATTERN = "swh.loader.svn."
-DEFAULT_CONFIG: Dict[str, Any] = {
- "temp_directory": "/tmp",
- "debug": False, # NOT FOR PRODUCTION: False for production
- "check_revision": {
- "status": False, # True: check the revision, False: don't check
- "limit": 1000, # Periodicity check
- },
-}
-
-
class SvnLoader(BaseLoader):
"""Swh svn loader.
@@ -72,43 +64,49 @@
def __init__(
self,
- url,
- origin_url=None,
- visit_date=None,
- destination_path=None,
- swh_revision=None,
- start_from_scratch=False,
+ storage: StorageInterface,
+ url: str,
+ origin_url: Optional[str] = None,
+ visit_date: Optional[str] = None,
+ destination_path: Optional[str] = None,
+ swh_revision: Optional[str] = None,
+ start_from_scratch: bool = False,
+ temp_directory: str = "/tmp",
+ debug: bool = False,
+ check_revision: int = 0,
+ max_content_size: Optional[int] = None,
):
- super().__init__(logging_class="swh.loader.svn.SvnLoader")
- self.config = merge_configs(DEFAULT_CONFIG, self.config)
+ super().__init__(
+ storage=storage,
+ logging_class="swh.loader.svn.SvnLoader",
+ max_content_size=max_content_size,
+ )
# technical svn uri to act on svn repository
self.svn_url = url
# origin url as unique identifier for origin in swh archive
self.origin_url = origin_url if origin_url else self.svn_url
- self.debug = self.config["debug"]
- self.temp_directory = self.config["temp_directory"]
+ self.debug = debug
+ self.temp_directory = temp_directory
self.done = False
self.svnrepo = None
# Revision check is configurable
- check_revision = self.config["check_revision"]
- if check_revision["status"]:
- self.check_revision = check_revision["limit"]
- else:
- self.check_revision = None
+ self.check_revision = check_revision
# internal state used to store swh objects
- self._contents = []
- self._skipped_contents = []
- self._directories = []
- self._revisions = []
+ self._contents: List[Content] = []
+ self._skipped_contents: List[SkippedContent] = []
+ self._directories: List[Directory] = []
+ self._revisions: List[Revision] = []
self._snapshot: Optional[Snapshot] = None
# internal state, current visit
self._last_revision = None
self._visit_status = "full"
self._load_status = "uneventful"
- self.visit_date = visit_date
+ if visit_date:
+ self.visit_date = iso8601.parse_date(visit_date)
+ else:
+ self.visit_date = None
self.destination_path = destination_path
self.start_from_scratch = start_from_scratch
- self.max_content_length = self.config["max_content_size"]
self.snapshot = None
# state from previous visit
self.latest_snapshot = None
@@ -248,6 +246,7 @@
SvnLoaderUneventful: Nothing changed since last visit
"""
+ assert self.svnrepo is not None, "svnrepo initialized in the `prepare` method"
revision_head = self.svnrepo.head_revision()
if revision_head == 0: # empty repository case
revision_start = 0
@@ -317,7 +316,8 @@
ValueError if a hash divergence is detected
"""
- if (count % self.check_revision) == 0: # hash computation check
+ # hash computation check
+ if (self.check_revision != 0 and count % self.check_revision) == 0:
self.log.debug("Checking hash computations on revision %s..." % rev)
checked_dir_id = self.swh_revision_hash_tree_at_svn_revision(rev)
if checked_dir_id != dir_id:
@@ -385,10 +385,10 @@
yield _contents, _skipped_contents, _directories, swh_revision
- def prepare_origin_visit(self, *args, **kwargs):
+ def prepare_origin_visit(self):
self.origin = Origin(url=self.origin_url if self.origin_url else self.svn_url)
- def prepare(self, *args, **kwargs):
+ def prepare(self):
latest_snapshot_revision = self._latest_snapshot_revision(self.origin_url)
if latest_snapshot_revision:
self.latest_snapshot, self.latest_revision = latest_snapshot_revision
@@ -404,7 +404,7 @@
try:
self.svnrepo = SvnRepo(
- self.svn_url, self.origin_url, local_dirname, self.max_content_length
+ self.svn_url, self.origin_url, local_dirname, self.max_content_size
)
except SubversionException as e:
error_msgs = [
@@ -549,27 +549,37 @@
def __init__(
self,
- url,
- archive_path,
- origin_url=None,
- destination_path=None,
- swh_revision=None,
- start_from_scratch=None,
- visit_date=None,
+ storage: StorageInterface,
+ url: str,
+ archive_path: str,
+ origin_url: Optional[str] = None,
+ destination_path: Optional[str] = None,
+ swh_revision: Optional[str] = None,
+ start_from_scratch: bool = False,
+ visit_date: Optional[str] = None,
+ temp_directory: str = "/tmp",
+ debug: bool = False,
+ check_revision: int = 0,
+ max_content_size: Optional[int] = None,
):
super().__init__(
- url,
+ storage=storage,
+ url=url,
origin_url=origin_url,
destination_path=destination_path,
swh_revision=swh_revision,
start_from_scratch=start_from_scratch,
visit_date=visit_date,
+ temp_directory=temp_directory,
+ debug=debug,
+ check_revision=check_revision,
+ max_content_size=max_content_size,
)
self.archive_path = archive_path
self.temp_dir = None
self.repo_path = None
- def prepare(self, *args, **kwargs):
+ def prepare(self):
self.log.info("Archive to mount and load %s" % self.archive_path)
self.temp_dir, self.repo_path = init_svn_repo_from_archive_dump(
self.archive_path,
@@ -577,7 +587,7 @@
suffix="-%s" % os.getpid(),
root_dir=self.temp_directory,
)
- super().prepare(*args, **kwargs)
+ super().prepare()
def cleanup(self):
super().cleanup()
@@ -599,20 +609,30 @@
def __init__(
self,
- url,
- origin_url=None,
- destination_path=None,
- swh_revision=None,
- start_from_scratch=False,
- visit_date=None,
+ storage: StorageInterface,
+ url: str,
+ origin_url: Optional[str] = None,
+ destination_path: Optional[str] = None,
+ swh_revision: Optional[str] = None,
+ start_from_scratch: bool = False,
+ visit_date: Optional[str] = None,
+ temp_directory: str = "/tmp",
+ debug: bool = False,
+ check_revision: int = 0,
+ max_content_size: Optional[int] = None,
):
super().__init__(
- url,
+ storage=storage,
+ url=url,
origin_url=origin_url,
destination_path=destination_path,
swh_revision=swh_revision,
start_from_scratch=start_from_scratch,
visit_date=visit_date,
+ temp_directory=temp_directory,
+ debug=debug,
+ check_revision=check_revision,
+ max_content_size=max_content_size,
)
self.temp_dir = tempfile.mkdtemp(dir=self.temp_directory)
self.repo_path = None
@@ -730,7 +750,7 @@
"no exploitable dump file has been generated."
)
- def prepare(self, *args, **kwargs):
+ def prepare(self):
# First, check if previous revisions have been loaded for the
# subversion origin and get the number of the last one
last_loaded_svn_rev = self.get_last_loaded_svn_rev(self.svn_url)
@@ -748,7 +768,7 @@
root_dir=self.temp_dir,
)
self.svn_url = "file://%s" % self.repo_path
- super().prepare(*args, **kwargs)
+ super().prepare()
def cleanup(self):
super().cleanup()
diff --git a/swh/loader/svn/tasks.py b/swh/loader/svn/tasks.py
--- a/swh/loader/svn/tasks.py
+++ b/swh/loader/svn/tasks.py
@@ -1,8 +1,10 @@
-# Copyright (C) 2015-2019 The Software Heritage developers
+# Copyright (C) 2015-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+from typing import Optional
+
from celery import shared_task
from .loader import SvnLoader, SvnLoaderFromDumpArchive, SvnLoaderFromRemoteDump
@@ -11,30 +13,31 @@
@shared_task(name=__name__ + ".LoadSvnRepository")
def load_svn(
*,
- url=None,
- origin_url=None,
- destination_path=None,
- swh_revision=None,
- visit_date=None,
- start_from_scratch=False,
+ url: Optional[str] = None,
+ origin_url: Optional[str] = None,
+ destination_path: Optional[str] = None,
+ swh_revision: Optional[str] = None,
+ visit_date: Optional[str] = None,
+ start_from_scratch: Optional[bool] = False,
):
"""Import a svn repository
Args:
- args: ordered arguments (expected None)
- kwargs: Dictionary with the following expected keys:
-
- - url (str): (mandatory) svn's repository url
- - origin_url (str): Optional original url override
- - destination_path (str): (optional) root directory to
- locally retrieve svn's data
- - swh_revision (dict): (optional) extra revision hex to
- start from. see swh.loader.svn.SvnLoader.process
- docstring
+ - url: (mandatory) svn's repository url to ingest data from
+ - origin_url: Optional original url override to use as origin reference
+ in the archive. If not provided, "url" is used as origin.
+ - destination_path: (optional) root directory to
+ locally retrieve svn's data
+ - swh_revision: (optional) extra revision hex to
+ start from. See swh.loader.svn.SvnLoader.process
+ docstring
+ - visit_date: Optional date to override the visit date
+ - start_from_scratch: Flag to allow starting back the svn repository from the
+ start
"""
- loader = SvnLoader(
- url,
+ loader = SvnLoader.from_configfile(
+ url=url,
origin_url=origin_url,
destination_path=destination_path,
swh_revision=swh_revision,
@@ -46,15 +49,26 @@
@shared_task(name=__name__ + ".MountAndLoadSvnRepository")
def load_svn_from_archive(
- *, url=None, archive_path=None, visit_date=None, start_from_scratch=False
+ *,
+ url: Optional[str] = None,
+ archive_path: Optional[str] = None,
+ visit_date: Optional[str] = None,
+ start_from_scratch: Optional[bool] = False,
):
"""1. Mount an svn dump from archive as a local svn repository
2. Load it through the svn loader
3. Clean up mounted svn repository archive
+ Args:
+ - url: origin url
+ - archive_path: Path on disk to the archive holdin the svn repository to ingest
+ - visit_date: Optional date to override the visit date
+ - start_from_scratch: Flag to allow starting back the svn repository from the
+ start
+
"""
- loader = SvnLoaderFromDumpArchive(
- url,
+ loader = SvnLoaderFromDumpArchive.from_configfile(
+ url=url,
archive_path=archive_path,
visit_date=visit_date,
start_from_scratch=start_from_scratch,
@@ -64,15 +78,27 @@
@shared_task(name=__name__ + ".DumpMountAndLoadSvnRepository")
def load_svn_from_remote_dump(
- *, url=None, origin_url=None, visit_date=None, start_from_scratch=False
+ *,
+ url: Optional[str] = None,
+ origin_url: Optional[str] = None,
+ visit_date: Optional[str] = None,
+ start_from_scratch: Optional[bool] = False,
):
"""1. Mount a remote svn dump as a local svn repository.
2. Load it through the svn loader.
3. Clean up mounted svn repository archive.
+ Args:
+ - url: (mandatory) svn's repository url to ingest data from
+ - origin_url: Optional original url override to use as origin reference
+ in the archive. If not provided, "url" is used as origin.
+ - visit_date: Optional date to override the visit date
+ - start_from_scratch: Flag to allow starting back the svn repository from the
+ start
+
"""
- loader = SvnLoaderFromRemoteDump(
- url,
+ loader = SvnLoaderFromRemoteDump.from_configfile(
+ url=url,
origin_url=origin_url,
visit_date=visit_date,
start_from_scratch=start_from_scratch,
diff --git a/swh/loader/svn/tests/conftest.py b/swh/loader/svn/tests/conftest.py
--- a/swh/loader/svn/tests/conftest.py
+++ b/swh/loader/svn/tests/conftest.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2019-2020 The Software Heritage developers
+# Copyright (C) 2019-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -9,27 +9,31 @@
@pytest.fixture
-def swh_loader_config(swh_storage_backend_config) -> Dict[str, Any]:
- swh_storage_backend_config["journal_writer"] = {}
+def swh_storage_backend_config(swh_storage_backend_config):
+ """Basic pg storage configuration with no journal collaborator
+ (to avoid pulling optional dependency on clients of this fixture)
+
+ """
return {
+ "cls": "filter",
"storage": {
- "cls": "pipeline",
- "steps": [
- {"cls": "filter"},
- {
- "cls": "buffer",
- "min_batch_size": {
- "content": 10000,
- "content_bytes": 1073741824,
- "directory": 2500,
- "revision": 10,
- "release": 100,
- },
- },
- swh_storage_backend_config,
- ],
+ "cls": "buffer",
+ "min_batch_size": {
+ "content": 10000,
+ "content_bytes": 1073741824,
+ "directory": 2500,
+ "revision": 10,
+ "release": 100,
+ },
+ "storage": swh_storage_backend_config,
},
- "check_revision": {"limit": 100, "status": False},
- "log_db": "dbname=softwareheritage-log",
+ }
+
+
+@pytest.fixture
+def swh_loader_config(swh_storage_backend_config) -> Dict[str, Any]:
+ return {
+ "storage": swh_storage_backend_config,
+ "check_revision": 100,
"temp_directory": "/tmp",
}
diff --git a/swh/loader/svn/tests/test_loader.py b/swh/loader/svn/tests/test_loader.py
--- a/swh/loader/svn/tests/test_loader.py
+++ b/swh/loader/svn/tests/test_loader.py
@@ -39,33 +39,33 @@
)
-def test_loader_svn_not_found_no_mock(swh_config, tmp_path):
+def test_loader_svn_not_found_no_mock(swh_storage, tmp_path):
"""Given an unknown repository, the loader visit ends up in status not_found"""
unknown_repo_url = "unknown-repository"
- loader = SvnLoader(unknown_repo_url, destination_path=tmp_path)
+ loader = SvnLoader(swh_storage, unknown_repo_url, destination_path=tmp_path)
assert loader.load() == {"status": "uneventful"}
assert_last_visit_matches(
- loader.storage, unknown_repo_url, status="not_found", type="svn",
+ swh_storage, unknown_repo_url, status="not_found", type="svn",
)
@pytest.mark.parametrize(
"exception_msg", ["Unable to connect to a repository at URL", "Unknown URL type",]
)
-def test_loader_svn_not_found(swh_config, tmp_path, exception_msg, mocker):
+def test_loader_svn_not_found(swh_storage, tmp_path, exception_msg, mocker):
"""Given unknown repository issues, the loader visit ends up in status not_found"""
mock = mocker.patch("swh.loader.svn.loader.SvnRepo")
mock.side_effect = SubversionException(exception_msg, 0)
unknown_repo_url = "unknown-repository"
- loader = SvnLoader(unknown_repo_url, destination_path=tmp_path)
+ loader = SvnLoader(swh_storage, unknown_repo_url, destination_path=tmp_path)
assert loader.load() == {"status": "uneventful"}
assert_last_visit_matches(
- loader.storage, unknown_repo_url, status="not_found", type="svn",
+ swh_storage, unknown_repo_url, status="not_found", type="svn",
)
@@ -77,28 +77,28 @@
ValueError("considered a failure"),
],
)
-def test_loader_svn_failures(swh_config, tmp_path, exception, mocker):
+def test_loader_svn_failures(swh_storage, tmp_path, exception, mocker):
"""Given any errors raised, the loader visit ends up in status failed"""
mock = mocker.patch("swh.loader.svn.loader.SvnRepo")
mock.side_effect = exception
existing_repo_url = "existing-repo-url"
- loader = SvnLoader(existing_repo_url, destination_path=tmp_path)
+ loader = SvnLoader(swh_storage, existing_repo_url, destination_path=tmp_path)
assert loader.load() == {"status": "failed"}
assert_last_visit_matches(
- loader.storage, existing_repo_url, status="failed", type="svn",
+ swh_storage, existing_repo_url, status="failed", type="svn",
)
-def test_loader_svn_new_visit(swh_config, datadir, tmp_path):
+def test_loader_svn_new_visit(swh_storage, datadir, tmp_path):
"""Eventful visit should yield 1 snapshot"""
archive_name = "pkg-gourmet"
archive_path = os.path.join(datadir, f"{archive_name}.tgz")
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
- loader = SvnLoader(repo_url, destination_path=tmp_path)
+ loader = SvnLoader(swh_storage, repo_url, destination_path=tmp_path)
assert loader.load() == {"status": "eventful"}
@@ -125,7 +125,7 @@
check_snapshot(GOURMET_SNAPSHOT, loader.storage)
-def test_loader_svn_2_visits_no_change(swh_config, datadir, tmp_path):
+def test_loader_svn_2_visits_no_change(swh_storage, datadir, tmp_path):
"""Visit multiple times a repository with no change should yield the same snapshot
"""
@@ -133,7 +133,7 @@
archive_path = os.path.join(datadir, f"{archive_name}.tgz")
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
- loader = SvnLoader(repo_url)
+ loader = SvnLoader(swh_storage, repo_url)
assert loader.load() == {"status": "eventful"}
visit_status1 = assert_last_visit_matches(
@@ -166,7 +166,7 @@
)[0]
assert start_revision is not None
- loader = SvnLoader(repo_url, swh_revision=start_revision)
+ loader = SvnLoader(swh_storage, repo_url, swh_revision=start_revision)
assert loader.load() == {"status": "uneventful"}
stats = get_stats(loader.storage)
@@ -183,7 +183,7 @@
)
-def test_loader_tampered_repository(swh_config, datadir, tmp_path):
+def test_loader_tampered_repository(swh_storage, datadir, tmp_path):
"""In this scenario, the dump has been tampered with to modify the
commit log [1]. This results in a hash divergence which is
detected at startup after a new run for the same origin.
@@ -204,7 +204,7 @@
archive_path = os.path.join(datadir, f"{archive_name}.tgz")
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
- loader = SvnLoader(repo_url)
+ loader = SvnLoader(swh_storage, repo_url)
assert loader.load() == {"status": "eventful"}
check_snapshot(GOURMET_SNAPSHOT, loader.storage)
@@ -213,7 +213,7 @@
archive_path2, archive_name, tmp_path
)
- loader2 = SvnLoader(repo_tampered_url, origin_url=repo_url)
+ loader2 = SvnLoader(swh_storage, repo_tampered_url, origin_url=repo_url)
assert loader2.load() == {"status": "failed"}
assert_last_visit_matches(
@@ -226,7 +226,7 @@
assert stats["snapshot"] == 1
-def test_loader_svn_visit_with_changes(swh_config, datadir, tmp_path):
+def test_loader_svn_visit_with_changes(swh_storage, datadir, tmp_path):
"""In this scenario, the repository has been updated with new changes.
The loading visit should result in new objects stored and 1 new
snapshot.
@@ -239,7 +239,7 @@
)
# repo_initial_url becomes the origin_url we want to visit some more below
- loader = SvnLoader(repo_initial_url)
+ loader = SvnLoader(swh_storage, repo_initial_url)
assert loader.load() == {"status": "eventful"}
visit_status1 = assert_last_visit_matches(
@@ -255,7 +255,7 @@
archive_path, "pkg-gourmet", tmp_path
)
- loader = SvnLoader(repo_updated_url, origin_url=repo_initial_url,)
+ loader = SvnLoader(swh_storage, repo_updated_url, origin_url=repo_initial_url,)
assert loader.load() == {"status": "eventful"}
visit_status2 = assert_last_visit_matches(
@@ -286,7 +286,10 @@
# Start from scratch loading yields the same result
loader = SvnLoader(
- repo_updated_url, origin_url=repo_initial_url, start_from_scratch=True
+ swh_storage,
+ repo_updated_url,
+ origin_url=repo_initial_url,
+ start_from_scratch=True,
)
assert loader.load() == {"status": "eventful"}
visit_status3 = assert_last_visit_matches(
@@ -306,7 +309,7 @@
assert stats["snapshot"] == 2 # no new snapshot
-def test_loader_svn_visit_start_from_revision(swh_config, datadir, tmp_path):
+def test_loader_svn_visit_start_from_revision(swh_storage, datadir, tmp_path):
"""Starting from existing revision, next visit on changed repo should yield 1 new
snapshot.
@@ -318,7 +321,7 @@
)
# repo_initial_url becomes the origin_url we want to visit some more below
- loader = SvnLoader(repo_initial_url)
+ loader = SvnLoader(swh_storage, repo_initial_url)
assert loader.load() == {"status": "eventful"}
visit_status1 = assert_last_visit_matches(
@@ -341,7 +344,10 @@
# we'll start from start_revision
loader = SvnLoader(
- repo_updated_url, origin_url=repo_initial_url, swh_revision=start_revision
+ swh_storage,
+ repo_updated_url,
+ origin_url=repo_initial_url,
+ swh_revision=start_revision,
)
assert loader.load() == {"status": "eventful"}
@@ -373,7 +379,7 @@
check_snapshot(GOURMET_UPDATES_SNAPSHOT, loader.storage)
-def test_loader_svn_visit_with_eol_style(swh_config, datadir, tmp_path):
+def test_loader_svn_visit_with_eol_style(swh_storage, datadir, tmp_path):
"""Check that a svn repo containing a versioned file with CRLF line
endings with svn:eol-style property set to 'native' (this is a
violation of svn specification as the file should have been
@@ -384,7 +390,7 @@
archive_path = os.path.join(datadir, f"{archive_name}.tgz")
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
- loader = SvnLoader(repo_url)
+ loader = SvnLoader(swh_storage, repo_url)
assert loader.load() == {"status": "eventful"}
mediawiki_snapshot = Snapshot(
@@ -412,7 +418,7 @@
assert stats["snapshot"] == 1
-def test_loader_svn_visit_with_mixed_crlf_lf(swh_config, datadir, tmp_path):
+def test_loader_svn_visit_with_mixed_crlf_lf(swh_storage, datadir, tmp_path):
"""Check that a svn repo containing a versioned file with mixed
CRLF/LF line endings with svn:eol-style property set to 'native'
(this is a violation of svn specification as mixed line endings
@@ -424,7 +430,7 @@
archive_path = os.path.join(datadir, f"{archive_name}.tgz")
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
- loader = SvnLoader(repo_url)
+ loader = SvnLoader(swh_storage, repo_url)
assert loader.load() == {"status": "eventful"}
pyang_snapshot = Snapshot(
@@ -448,7 +454,7 @@
assert stats["snapshot"] == 1
-def test_loader_svn_with_external_properties(swh_config, datadir, tmp_path):
+def test_loader_svn_with_external_properties(swh_storage, datadir, tmp_path):
"""Repository with svn:external properties cannot be fully ingested yet
"""
@@ -456,7 +462,7 @@
archive_path = os.path.join(datadir, "pkg-gourmet-with-external-id.tgz")
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
- loader = SvnLoader(repo_url)
+ loader = SvnLoader(swh_storage, repo_url)
assert loader.load() == {"status": "eventful"}
gourmet_externals_snapshot = Snapshot(
@@ -487,7 +493,7 @@
assert stats["revision"] == 21 - 1 # commit with the svn:external property
-def test_loader_svn_with_symlink(swh_config, datadir, tmp_path):
+def test_loader_svn_with_symlink(swh_storage, datadir, tmp_path):
"""Repository with symlinks should be ingested ok
Edge case:
@@ -502,7 +508,7 @@
)
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
- loader = SvnLoader(repo_url)
+ loader = SvnLoader(swh_storage, repo_url)
assert loader.load() == {"status": "eventful"}
gourmet_edge_cases_snapshot = Snapshot(
@@ -531,7 +537,7 @@
assert stats["revision"] == 19
-def test_loader_svn_with_wrong_symlinks(swh_config, datadir, tmp_path):
+def test_loader_svn_with_wrong_symlinks(swh_storage, datadir, tmp_path):
"""Repository with wrong symlinks should be ingested ok nonetheless
Edge case:
@@ -543,7 +549,7 @@
archive_path = os.path.join(datadir, "pkg-gourmet-with-wrong-link-cases.tgz")
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
- loader = SvnLoader(repo_url)
+ loader = SvnLoader(swh_storage, repo_url)
assert loader.load() == {"status": "eventful"}
gourmet_wrong_links_snapshot = Snapshot(
@@ -572,7 +578,7 @@
assert stats["revision"] == 21
-def test_loader_svn_loader_from_dump_archive(swh_config, datadir, tmp_path):
+def test_loader_svn_loader_from_dump_archive(swh_storage, datadir, tmp_path):
"""Repository with wrong symlinks should be ingested ok nonetheless
Edge case:
@@ -584,7 +590,7 @@
archive_path = os.path.join(datadir, f"{archive_name}.tgz")
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
- loaderFromDump = SvnLoaderFromRemoteDump(repo_url)
+ loaderFromDump = SvnLoaderFromRemoteDump(swh_storage, repo_url)
assert loaderFromDump.load() == {"status": "eventful"}
assert_last_visit_matches(
loaderFromDump.storage,
@@ -595,7 +601,7 @@
)
origin_url = repo_url + "2" # rename to another origin
- loader = SvnLoader(repo_url, origin_url=origin_url)
+ loader = SvnLoader(swh_storage, repo_url, origin_url=origin_url)
assert loader.load() == {"status": "eventful"} # because are working on new origin
assert_last_visit_matches(
loader.storage,
@@ -612,7 +618,7 @@
assert stats["origin_visit"] == 2
assert stats["snapshot"] == 1
- loader = SvnLoader(repo_url) # no change on the origin-url
+ loader = SvnLoader(swh_storage, repo_url) # no change on the origin-url
assert loader.load() == {"status": "uneventful"}
assert_last_visit_matches(
loader.storage,
@@ -628,11 +634,11 @@
assert stats["snapshot"] == 1
# second visit from the dump should be uneventful
- loaderFromDump = SvnLoaderFromRemoteDump(repo_url)
+ loaderFromDump = SvnLoaderFromRemoteDump(swh_storage, repo_url)
assert loaderFromDump.load() == {"status": "uneventful"}
-def test_loader_user_defined_svn_properties(swh_config, datadir, tmp_path):
+def test_loader_user_defined_svn_properties(swh_storage, datadir, tmp_path):
"""Edge cases: The repository held some user defined svn-properties with special
encodings, this prevented the repository from being loaded even though we do not
ingest those information.
@@ -642,7 +648,7 @@
archive_path = os.path.join(datadir, f"{archive_name}.tgz")
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
- loader = SvnLoader(repo_url)
+ loader = SvnLoader(swh_storage, repo_url)
assert loader.load() == {"status": "eventful"}
expected_snapshot = Snapshot(
@@ -671,13 +677,13 @@
assert stats["revision"] == 7
-def test_loader_svn_dir_added_then_removed(swh_config, datadir, tmp_path):
+def test_loader_svn_dir_added_then_removed(swh_storage, datadir, tmp_path):
"""Loader should handle directory removal when processing a commit"""
archive_name = "pkg-gourmet"
archive_path = os.path.join(datadir, f"{archive_name}-add-remove-dir.tgz")
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
- loader = SvnLoader(repo_url, destination_path=tmp_path)
+ loader = SvnLoader(swh_storage, repo_url, destination_path=tmp_path)
assert loader.load() == {"status": "eventful"}
assert loader.visit_status() == "full"
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Dec 18, 4:23 PM (4 h, 33 m ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3224777
Attached To
D5075: Rework loader instantiation logic according to loader core api
Event Timeline
Log In to Comment