Page MenuHomeSoftware Heritage

D5075.diff
No OneTemporary

D5075.diff

diff --git a/mypy.ini b/mypy.ini
--- a/mypy.ini
+++ b/mypy.ini
@@ -8,11 +8,14 @@
[mypy-celery.*]
ignore_missing_imports = True
-[mypy-subvertpy.*]
+[mypy-iso8601.*]
ignore_missing_imports = True
[mypy-pytest.*]
ignore_missing_imports = True
+[mypy-subvertpy.*]
+ignore_missing_imports = True
+
[mypy-swh.loader.*]
ignore_missing_imports = True
diff --git a/requirements-swh.txt b/requirements-swh.txt
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -1,4 +1,4 @@
swh.storage >= 0.11.3
swh.model >= 0.4.0
swh.scheduler >= 0.0.39
-swh.loader.core >= 0.17
+swh.loader.core >= 0.18
diff --git a/requirements.txt b/requirements.txt
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,3 +4,4 @@
click
python-dateutil
subvertpy >= 0.9.4
+iso8601
diff --git a/swh/loader/svn/loader.py b/swh/loader/svn/loader.py
--- a/swh/loader/svn/loader.py
+++ b/swh/loader/svn/loader.py
@@ -14,11 +14,12 @@
import shutil
from subprocess import Popen
import tempfile
-from typing import Any, Dict, Iterator, List, Optional, Tuple
+from typing import Dict, Iterator, List, Optional, Tuple
+
+import iso8601
from subvertpy import SubversionException
-from swh.core.config import merge_configs
from swh.loader.core.loader import BaseLoader
from swh.loader.core.utils import clean_dangling_folders
from swh.loader.exception import NotFound
@@ -35,6 +36,7 @@
TargetType,
)
from swh.storage.algos.snapshot import snapshot_get_latest
+from swh.storage.interface import StorageInterface
from . import converters
from .exception import SvnLoaderHistoryAltered, SvnLoaderUneventful
@@ -50,16 +52,6 @@
TEMPORARY_DIR_PREFIX_PATTERN = "swh.loader.svn."
-DEFAULT_CONFIG: Dict[str, Any] = {
- "temp_directory": "/tmp",
- "debug": False, # NOT FOR PRODUCTION: False for production
- "check_revision": {
- "status": False, # True: check the revision, False: don't check
- "limit": 1000, # Periodicity check
- },
-}
-
-
class SvnLoader(BaseLoader):
"""Swh svn loader.
@@ -72,43 +64,49 @@
def __init__(
self,
- url,
- origin_url=None,
- visit_date=None,
- destination_path=None,
- swh_revision=None,
- start_from_scratch=False,
+ storage: StorageInterface,
+ url: str,
+ origin_url: Optional[str] = None,
+ visit_date: Optional[str] = None,
+ destination_path: Optional[str] = None,
+ swh_revision: Optional[str] = None,
+ start_from_scratch: bool = False,
+ temp_directory: str = "/tmp",
+ debug: bool = False,
+ check_revision: int = 0,
+ max_content_size: Optional[int] = None,
):
- super().__init__(logging_class="swh.loader.svn.SvnLoader")
- self.config = merge_configs(DEFAULT_CONFIG, self.config)
+ super().__init__(
+ storage=storage,
+ logging_class="swh.loader.svn.SvnLoader",
+ max_content_size=max_content_size,
+ )
# technical svn uri to act on svn repository
self.svn_url = url
# origin url as unique identifier for origin in swh archive
self.origin_url = origin_url if origin_url else self.svn_url
- self.debug = self.config["debug"]
- self.temp_directory = self.config["temp_directory"]
+ self.debug = debug
+ self.temp_directory = temp_directory
self.done = False
self.svnrepo = None
# Revision check is configurable
- check_revision = self.config["check_revision"]
- if check_revision["status"]:
- self.check_revision = check_revision["limit"]
- else:
- self.check_revision = None
+ self.check_revision = check_revision
# internal state used to store swh objects
- self._contents = []
- self._skipped_contents = []
- self._directories = []
- self._revisions = []
+ self._contents: List[Content] = []
+ self._skipped_contents: List[SkippedContent] = []
+ self._directories: List[Directory] = []
+ self._revisions: List[Revision] = []
self._snapshot: Optional[Snapshot] = None
# internal state, current visit
self._last_revision = None
self._visit_status = "full"
self._load_status = "uneventful"
- self.visit_date = visit_date
+ if visit_date:
+ self.visit_date = iso8601.parse_date(visit_date)
+ else:
+ self.visit_date = None
self.destination_path = destination_path
self.start_from_scratch = start_from_scratch
- self.max_content_length = self.config["max_content_size"]
self.snapshot = None
# state from previous visit
self.latest_snapshot = None
@@ -248,6 +246,7 @@
SvnLoaderUneventful: Nothing changed since last visit
"""
+ assert self.svnrepo is not None, "svnrepo initialized in the `prepare` method"
revision_head = self.svnrepo.head_revision()
if revision_head == 0: # empty repository case
revision_start = 0
@@ -317,7 +316,8 @@
ValueError if a hash divergence is detected
"""
- if (count % self.check_revision) == 0: # hash computation check
+ # hash computation check
+ if (self.check_revision != 0 and count % self.check_revision) == 0:
self.log.debug("Checking hash computations on revision %s..." % rev)
checked_dir_id = self.swh_revision_hash_tree_at_svn_revision(rev)
if checked_dir_id != dir_id:
@@ -385,10 +385,10 @@
yield _contents, _skipped_contents, _directories, swh_revision
- def prepare_origin_visit(self, *args, **kwargs):
+ def prepare_origin_visit(self):
self.origin = Origin(url=self.origin_url if self.origin_url else self.svn_url)
- def prepare(self, *args, **kwargs):
+ def prepare(self):
latest_snapshot_revision = self._latest_snapshot_revision(self.origin_url)
if latest_snapshot_revision:
self.latest_snapshot, self.latest_revision = latest_snapshot_revision
@@ -404,7 +404,7 @@
try:
self.svnrepo = SvnRepo(
- self.svn_url, self.origin_url, local_dirname, self.max_content_length
+ self.svn_url, self.origin_url, local_dirname, self.max_content_size
)
except SubversionException as e:
error_msgs = [
@@ -549,27 +549,37 @@
def __init__(
self,
- url,
- archive_path,
- origin_url=None,
- destination_path=None,
- swh_revision=None,
- start_from_scratch=None,
- visit_date=None,
+ storage: StorageInterface,
+ url: str,
+ archive_path: str,
+ origin_url: Optional[str] = None,
+ destination_path: Optional[str] = None,
+ swh_revision: Optional[str] = None,
+ start_from_scratch: bool = False,
+ visit_date: Optional[str] = None,
+ temp_directory: str = "/tmp",
+ debug: bool = False,
+ check_revision: int = 0,
+ max_content_size: Optional[int] = None,
):
super().__init__(
- url,
+ storage=storage,
+ url=url,
origin_url=origin_url,
destination_path=destination_path,
swh_revision=swh_revision,
start_from_scratch=start_from_scratch,
visit_date=visit_date,
+ temp_directory=temp_directory,
+ debug=debug,
+ check_revision=check_revision,
+ max_content_size=max_content_size,
)
self.archive_path = archive_path
self.temp_dir = None
self.repo_path = None
- def prepare(self, *args, **kwargs):
+ def prepare(self):
self.log.info("Archive to mount and load %s" % self.archive_path)
self.temp_dir, self.repo_path = init_svn_repo_from_archive_dump(
self.archive_path,
@@ -577,7 +587,7 @@
suffix="-%s" % os.getpid(),
root_dir=self.temp_directory,
)
- super().prepare(*args, **kwargs)
+ super().prepare()
def cleanup(self):
super().cleanup()
@@ -599,20 +609,30 @@
def __init__(
self,
- url,
- origin_url=None,
- destination_path=None,
- swh_revision=None,
- start_from_scratch=False,
- visit_date=None,
+ storage: StorageInterface,
+ url: str,
+ origin_url: Optional[str] = None,
+ destination_path: Optional[str] = None,
+ swh_revision: Optional[str] = None,
+ start_from_scratch: bool = False,
+ visit_date: Optional[str] = None,
+ temp_directory: str = "/tmp",
+ debug: bool = False,
+ check_revision: int = 0,
+ max_content_size: Optional[int] = None,
):
super().__init__(
- url,
+ storage=storage,
+ url=url,
origin_url=origin_url,
destination_path=destination_path,
swh_revision=swh_revision,
start_from_scratch=start_from_scratch,
visit_date=visit_date,
+ temp_directory=temp_directory,
+ debug=debug,
+ check_revision=check_revision,
+ max_content_size=max_content_size,
)
self.temp_dir = tempfile.mkdtemp(dir=self.temp_directory)
self.repo_path = None
@@ -730,7 +750,7 @@
"no exploitable dump file has been generated."
)
- def prepare(self, *args, **kwargs):
+ def prepare(self):
# First, check if previous revisions have been loaded for the
# subversion origin and get the number of the last one
last_loaded_svn_rev = self.get_last_loaded_svn_rev(self.svn_url)
@@ -748,7 +768,7 @@
root_dir=self.temp_dir,
)
self.svn_url = "file://%s" % self.repo_path
- super().prepare(*args, **kwargs)
+ super().prepare()
def cleanup(self):
super().cleanup()
diff --git a/swh/loader/svn/tasks.py b/swh/loader/svn/tasks.py
--- a/swh/loader/svn/tasks.py
+++ b/swh/loader/svn/tasks.py
@@ -1,8 +1,10 @@
-# Copyright (C) 2015-2019 The Software Heritage developers
+# Copyright (C) 2015-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+from typing import Optional
+
from celery import shared_task
from .loader import SvnLoader, SvnLoaderFromDumpArchive, SvnLoaderFromRemoteDump
@@ -11,30 +13,31 @@
@shared_task(name=__name__ + ".LoadSvnRepository")
def load_svn(
*,
- url=None,
- origin_url=None,
- destination_path=None,
- swh_revision=None,
- visit_date=None,
- start_from_scratch=False,
+ url: Optional[str] = None,
+ origin_url: Optional[str] = None,
+ destination_path: Optional[str] = None,
+ swh_revision: Optional[str] = None,
+ visit_date: Optional[str] = None,
+ start_from_scratch: Optional[bool] = False,
):
"""Import a svn repository
Args:
- args: ordered arguments (expected None)
- kwargs: Dictionary with the following expected keys:
-
- - url (str): (mandatory) svn's repository url
- - origin_url (str): Optional original url override
- - destination_path (str): (optional) root directory to
- locally retrieve svn's data
- - swh_revision (dict): (optional) extra revision hex to
- start from. see swh.loader.svn.SvnLoader.process
- docstring
+ - url: (mandatory) svn's repository url to ingest data from
+ - origin_url: Optional original url override to use as origin reference
+ in the archive. If not provided, "url" is used as origin.
+ - destination_path: (optional) root directory to
+ locally retrieve svn's data
+ - swh_revision: (optional) extra revision hex to
+ start from. See swh.loader.svn.SvnLoader.process
+ docstring
+ - visit_date: Optional date to override the visit date
+ - start_from_scratch: Flag to allow starting back the svn repository from the
+ start
"""
- loader = SvnLoader(
- url,
+ loader = SvnLoader.from_configfile(
+ url=url,
origin_url=origin_url,
destination_path=destination_path,
swh_revision=swh_revision,
@@ -46,15 +49,26 @@
@shared_task(name=__name__ + ".MountAndLoadSvnRepository")
def load_svn_from_archive(
- *, url=None, archive_path=None, visit_date=None, start_from_scratch=False
+ *,
+ url: Optional[str] = None,
+ archive_path: Optional[str] = None,
+ visit_date: Optional[str] = None,
+ start_from_scratch: Optional[bool] = False,
):
"""1. Mount an svn dump from archive as a local svn repository
2. Load it through the svn loader
3. Clean up mounted svn repository archive
+ Args:
+ - url: origin url
+ - archive_path: Path on disk to the archive holdin the svn repository to ingest
+ - visit_date: Optional date to override the visit date
+ - start_from_scratch: Flag to allow starting back the svn repository from the
+ start
+
"""
- loader = SvnLoaderFromDumpArchive(
- url,
+ loader = SvnLoaderFromDumpArchive.from_configfile(
+ url=url,
archive_path=archive_path,
visit_date=visit_date,
start_from_scratch=start_from_scratch,
@@ -64,15 +78,27 @@
@shared_task(name=__name__ + ".DumpMountAndLoadSvnRepository")
def load_svn_from_remote_dump(
- *, url=None, origin_url=None, visit_date=None, start_from_scratch=False
+ *,
+ url: Optional[str] = None,
+ origin_url: Optional[str] = None,
+ visit_date: Optional[str] = None,
+ start_from_scratch: Optional[bool] = False,
):
"""1. Mount a remote svn dump as a local svn repository.
2. Load it through the svn loader.
3. Clean up mounted svn repository archive.
+ Args:
+ - url: (mandatory) svn's repository url to ingest data from
+ - origin_url: Optional original url override to use as origin reference
+ in the archive. If not provided, "url" is used as origin.
+ - visit_date: Optional date to override the visit date
+ - start_from_scratch: Flag to allow starting back the svn repository from the
+ start
+
"""
- loader = SvnLoaderFromRemoteDump(
- url,
+ loader = SvnLoaderFromRemoteDump.from_configfile(
+ url=url,
origin_url=origin_url,
visit_date=visit_date,
start_from_scratch=start_from_scratch,
diff --git a/swh/loader/svn/tests/conftest.py b/swh/loader/svn/tests/conftest.py
--- a/swh/loader/svn/tests/conftest.py
+++ b/swh/loader/svn/tests/conftest.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2019-2020 The Software Heritage developers
+# Copyright (C) 2019-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -9,27 +9,31 @@
@pytest.fixture
-def swh_loader_config(swh_storage_backend_config) -> Dict[str, Any]:
- swh_storage_backend_config["journal_writer"] = {}
+def swh_storage_backend_config(swh_storage_backend_config):
+ """Basic pg storage configuration with no journal collaborator
+ (to avoid pulling optional dependency on clients of this fixture)
+
+ """
return {
+ "cls": "filter",
"storage": {
- "cls": "pipeline",
- "steps": [
- {"cls": "filter"},
- {
- "cls": "buffer",
- "min_batch_size": {
- "content": 10000,
- "content_bytes": 1073741824,
- "directory": 2500,
- "revision": 10,
- "release": 100,
- },
- },
- swh_storage_backend_config,
- ],
+ "cls": "buffer",
+ "min_batch_size": {
+ "content": 10000,
+ "content_bytes": 1073741824,
+ "directory": 2500,
+ "revision": 10,
+ "release": 100,
+ },
+ "storage": swh_storage_backend_config,
},
- "check_revision": {"limit": 100, "status": False},
- "log_db": "dbname=softwareheritage-log",
+ }
+
+
+@pytest.fixture
+def swh_loader_config(swh_storage_backend_config) -> Dict[str, Any]:
+ return {
+ "storage": swh_storage_backend_config,
+ "check_revision": 100,
"temp_directory": "/tmp",
}
diff --git a/swh/loader/svn/tests/test_loader.py b/swh/loader/svn/tests/test_loader.py
--- a/swh/loader/svn/tests/test_loader.py
+++ b/swh/loader/svn/tests/test_loader.py
@@ -39,33 +39,33 @@
)
-def test_loader_svn_not_found_no_mock(swh_config, tmp_path):
+def test_loader_svn_not_found_no_mock(swh_storage, tmp_path):
"""Given an unknown repository, the loader visit ends up in status not_found"""
unknown_repo_url = "unknown-repository"
- loader = SvnLoader(unknown_repo_url, destination_path=tmp_path)
+ loader = SvnLoader(swh_storage, unknown_repo_url, destination_path=tmp_path)
assert loader.load() == {"status": "uneventful"}
assert_last_visit_matches(
- loader.storage, unknown_repo_url, status="not_found", type="svn",
+ swh_storage, unknown_repo_url, status="not_found", type="svn",
)
@pytest.mark.parametrize(
"exception_msg", ["Unable to connect to a repository at URL", "Unknown URL type",]
)
-def test_loader_svn_not_found(swh_config, tmp_path, exception_msg, mocker):
+def test_loader_svn_not_found(swh_storage, tmp_path, exception_msg, mocker):
"""Given unknown repository issues, the loader visit ends up in status not_found"""
mock = mocker.patch("swh.loader.svn.loader.SvnRepo")
mock.side_effect = SubversionException(exception_msg, 0)
unknown_repo_url = "unknown-repository"
- loader = SvnLoader(unknown_repo_url, destination_path=tmp_path)
+ loader = SvnLoader(swh_storage, unknown_repo_url, destination_path=tmp_path)
assert loader.load() == {"status": "uneventful"}
assert_last_visit_matches(
- loader.storage, unknown_repo_url, status="not_found", type="svn",
+ swh_storage, unknown_repo_url, status="not_found", type="svn",
)
@@ -77,28 +77,28 @@
ValueError("considered a failure"),
],
)
-def test_loader_svn_failures(swh_config, tmp_path, exception, mocker):
+def test_loader_svn_failures(swh_storage, tmp_path, exception, mocker):
"""Given any errors raised, the loader visit ends up in status failed"""
mock = mocker.patch("swh.loader.svn.loader.SvnRepo")
mock.side_effect = exception
existing_repo_url = "existing-repo-url"
- loader = SvnLoader(existing_repo_url, destination_path=tmp_path)
+ loader = SvnLoader(swh_storage, existing_repo_url, destination_path=tmp_path)
assert loader.load() == {"status": "failed"}
assert_last_visit_matches(
- loader.storage, existing_repo_url, status="failed", type="svn",
+ swh_storage, existing_repo_url, status="failed", type="svn",
)
-def test_loader_svn_new_visit(swh_config, datadir, tmp_path):
+def test_loader_svn_new_visit(swh_storage, datadir, tmp_path):
"""Eventful visit should yield 1 snapshot"""
archive_name = "pkg-gourmet"
archive_path = os.path.join(datadir, f"{archive_name}.tgz")
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
- loader = SvnLoader(repo_url, destination_path=tmp_path)
+ loader = SvnLoader(swh_storage, repo_url, destination_path=tmp_path)
assert loader.load() == {"status": "eventful"}
@@ -125,7 +125,7 @@
check_snapshot(GOURMET_SNAPSHOT, loader.storage)
-def test_loader_svn_2_visits_no_change(swh_config, datadir, tmp_path):
+def test_loader_svn_2_visits_no_change(swh_storage, datadir, tmp_path):
"""Visit multiple times a repository with no change should yield the same snapshot
"""
@@ -133,7 +133,7 @@
archive_path = os.path.join(datadir, f"{archive_name}.tgz")
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
- loader = SvnLoader(repo_url)
+ loader = SvnLoader(swh_storage, repo_url)
assert loader.load() == {"status": "eventful"}
visit_status1 = assert_last_visit_matches(
@@ -166,7 +166,7 @@
)[0]
assert start_revision is not None
- loader = SvnLoader(repo_url, swh_revision=start_revision)
+ loader = SvnLoader(swh_storage, repo_url, swh_revision=start_revision)
assert loader.load() == {"status": "uneventful"}
stats = get_stats(loader.storage)
@@ -183,7 +183,7 @@
)
-def test_loader_tampered_repository(swh_config, datadir, tmp_path):
+def test_loader_tampered_repository(swh_storage, datadir, tmp_path):
"""In this scenario, the dump has been tampered with to modify the
commit log [1]. This results in a hash divergence which is
detected at startup after a new run for the same origin.
@@ -204,7 +204,7 @@
archive_path = os.path.join(datadir, f"{archive_name}.tgz")
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
- loader = SvnLoader(repo_url)
+ loader = SvnLoader(swh_storage, repo_url)
assert loader.load() == {"status": "eventful"}
check_snapshot(GOURMET_SNAPSHOT, loader.storage)
@@ -213,7 +213,7 @@
archive_path2, archive_name, tmp_path
)
- loader2 = SvnLoader(repo_tampered_url, origin_url=repo_url)
+ loader2 = SvnLoader(swh_storage, repo_tampered_url, origin_url=repo_url)
assert loader2.load() == {"status": "failed"}
assert_last_visit_matches(
@@ -226,7 +226,7 @@
assert stats["snapshot"] == 1
-def test_loader_svn_visit_with_changes(swh_config, datadir, tmp_path):
+def test_loader_svn_visit_with_changes(swh_storage, datadir, tmp_path):
"""In this scenario, the repository has been updated with new changes.
The loading visit should result in new objects stored and 1 new
snapshot.
@@ -239,7 +239,7 @@
)
# repo_initial_url becomes the origin_url we want to visit some more below
- loader = SvnLoader(repo_initial_url)
+ loader = SvnLoader(swh_storage, repo_initial_url)
assert loader.load() == {"status": "eventful"}
visit_status1 = assert_last_visit_matches(
@@ -255,7 +255,7 @@
archive_path, "pkg-gourmet", tmp_path
)
- loader = SvnLoader(repo_updated_url, origin_url=repo_initial_url,)
+ loader = SvnLoader(swh_storage, repo_updated_url, origin_url=repo_initial_url,)
assert loader.load() == {"status": "eventful"}
visit_status2 = assert_last_visit_matches(
@@ -286,7 +286,10 @@
# Start from scratch loading yields the same result
loader = SvnLoader(
- repo_updated_url, origin_url=repo_initial_url, start_from_scratch=True
+ swh_storage,
+ repo_updated_url,
+ origin_url=repo_initial_url,
+ start_from_scratch=True,
)
assert loader.load() == {"status": "eventful"}
visit_status3 = assert_last_visit_matches(
@@ -306,7 +309,7 @@
assert stats["snapshot"] == 2 # no new snapshot
-def test_loader_svn_visit_start_from_revision(swh_config, datadir, tmp_path):
+def test_loader_svn_visit_start_from_revision(swh_storage, datadir, tmp_path):
"""Starting from existing revision, next visit on changed repo should yield 1 new
snapshot.
@@ -318,7 +321,7 @@
)
# repo_initial_url becomes the origin_url we want to visit some more below
- loader = SvnLoader(repo_initial_url)
+ loader = SvnLoader(swh_storage, repo_initial_url)
assert loader.load() == {"status": "eventful"}
visit_status1 = assert_last_visit_matches(
@@ -341,7 +344,10 @@
# we'll start from start_revision
loader = SvnLoader(
- repo_updated_url, origin_url=repo_initial_url, swh_revision=start_revision
+ swh_storage,
+ repo_updated_url,
+ origin_url=repo_initial_url,
+ swh_revision=start_revision,
)
assert loader.load() == {"status": "eventful"}
@@ -373,7 +379,7 @@
check_snapshot(GOURMET_UPDATES_SNAPSHOT, loader.storage)
-def test_loader_svn_visit_with_eol_style(swh_config, datadir, tmp_path):
+def test_loader_svn_visit_with_eol_style(swh_storage, datadir, tmp_path):
"""Check that a svn repo containing a versioned file with CRLF line
endings with svn:eol-style property set to 'native' (this is a
violation of svn specification as the file should have been
@@ -384,7 +390,7 @@
archive_path = os.path.join(datadir, f"{archive_name}.tgz")
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
- loader = SvnLoader(repo_url)
+ loader = SvnLoader(swh_storage, repo_url)
assert loader.load() == {"status": "eventful"}
mediawiki_snapshot = Snapshot(
@@ -412,7 +418,7 @@
assert stats["snapshot"] == 1
-def test_loader_svn_visit_with_mixed_crlf_lf(swh_config, datadir, tmp_path):
+def test_loader_svn_visit_with_mixed_crlf_lf(swh_storage, datadir, tmp_path):
"""Check that a svn repo containing a versioned file with mixed
CRLF/LF line endings with svn:eol-style property set to 'native'
(this is a violation of svn specification as mixed line endings
@@ -424,7 +430,7 @@
archive_path = os.path.join(datadir, f"{archive_name}.tgz")
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
- loader = SvnLoader(repo_url)
+ loader = SvnLoader(swh_storage, repo_url)
assert loader.load() == {"status": "eventful"}
pyang_snapshot = Snapshot(
@@ -448,7 +454,7 @@
assert stats["snapshot"] == 1
-def test_loader_svn_with_external_properties(swh_config, datadir, tmp_path):
+def test_loader_svn_with_external_properties(swh_storage, datadir, tmp_path):
"""Repository with svn:external properties cannot be fully ingested yet
"""
@@ -456,7 +462,7 @@
archive_path = os.path.join(datadir, "pkg-gourmet-with-external-id.tgz")
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
- loader = SvnLoader(repo_url)
+ loader = SvnLoader(swh_storage, repo_url)
assert loader.load() == {"status": "eventful"}
gourmet_externals_snapshot = Snapshot(
@@ -487,7 +493,7 @@
assert stats["revision"] == 21 - 1 # commit with the svn:external property
-def test_loader_svn_with_symlink(swh_config, datadir, tmp_path):
+def test_loader_svn_with_symlink(swh_storage, datadir, tmp_path):
"""Repository with symlinks should be ingested ok
Edge case:
@@ -502,7 +508,7 @@
)
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
- loader = SvnLoader(repo_url)
+ loader = SvnLoader(swh_storage, repo_url)
assert loader.load() == {"status": "eventful"}
gourmet_edge_cases_snapshot = Snapshot(
@@ -531,7 +537,7 @@
assert stats["revision"] == 19
-def test_loader_svn_with_wrong_symlinks(swh_config, datadir, tmp_path):
+def test_loader_svn_with_wrong_symlinks(swh_storage, datadir, tmp_path):
"""Repository with wrong symlinks should be ingested ok nonetheless
Edge case:
@@ -543,7 +549,7 @@
archive_path = os.path.join(datadir, "pkg-gourmet-with-wrong-link-cases.tgz")
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
- loader = SvnLoader(repo_url)
+ loader = SvnLoader(swh_storage, repo_url)
assert loader.load() == {"status": "eventful"}
gourmet_wrong_links_snapshot = Snapshot(
@@ -572,7 +578,7 @@
assert stats["revision"] == 21
-def test_loader_svn_loader_from_dump_archive(swh_config, datadir, tmp_path):
+def test_loader_svn_loader_from_dump_archive(swh_storage, datadir, tmp_path):
"""Repository with wrong symlinks should be ingested ok nonetheless
Edge case:
@@ -584,7 +590,7 @@
archive_path = os.path.join(datadir, f"{archive_name}.tgz")
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
- loaderFromDump = SvnLoaderFromRemoteDump(repo_url)
+ loaderFromDump = SvnLoaderFromRemoteDump(swh_storage, repo_url)
assert loaderFromDump.load() == {"status": "eventful"}
assert_last_visit_matches(
loaderFromDump.storage,
@@ -595,7 +601,7 @@
)
origin_url = repo_url + "2" # rename to another origin
- loader = SvnLoader(repo_url, origin_url=origin_url)
+ loader = SvnLoader(swh_storage, repo_url, origin_url=origin_url)
assert loader.load() == {"status": "eventful"} # because are working on new origin
assert_last_visit_matches(
loader.storage,
@@ -612,7 +618,7 @@
assert stats["origin_visit"] == 2
assert stats["snapshot"] == 1
- loader = SvnLoader(repo_url) # no change on the origin-url
+ loader = SvnLoader(swh_storage, repo_url) # no change on the origin-url
assert loader.load() == {"status": "uneventful"}
assert_last_visit_matches(
loader.storage,
@@ -628,11 +634,11 @@
assert stats["snapshot"] == 1
# second visit from the dump should be uneventful
- loaderFromDump = SvnLoaderFromRemoteDump(repo_url)
+ loaderFromDump = SvnLoaderFromRemoteDump(swh_storage, repo_url)
assert loaderFromDump.load() == {"status": "uneventful"}
-def test_loader_user_defined_svn_properties(swh_config, datadir, tmp_path):
+def test_loader_user_defined_svn_properties(swh_storage, datadir, tmp_path):
"""Edge cases: The repository held some user defined svn-properties with special
encodings, this prevented the repository from being loaded even though we do not
ingest those information.
@@ -642,7 +648,7 @@
archive_path = os.path.join(datadir, f"{archive_name}.tgz")
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
- loader = SvnLoader(repo_url)
+ loader = SvnLoader(swh_storage, repo_url)
assert loader.load() == {"status": "eventful"}
expected_snapshot = Snapshot(
@@ -671,13 +677,13 @@
assert stats["revision"] == 7
-def test_loader_svn_dir_added_then_removed(swh_config, datadir, tmp_path):
+def test_loader_svn_dir_added_then_removed(swh_storage, datadir, tmp_path):
"""Loader should handle directory removal when processing a commit"""
archive_name = "pkg-gourmet"
archive_path = os.path.join(datadir, f"{archive_name}-add-remove-dir.tgz")
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
- loader = SvnLoader(repo_url, destination_path=tmp_path)
+ loader = SvnLoader(swh_storage, repo_url, destination_path=tmp_path)
assert loader.load() == {"status": "eventful"}
assert loader.visit_status() == "full"

File Metadata

Mime Type
text/plain
Expires
Wed, Dec 18, 4:23 PM (4 h, 33 m ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3224777

Event Timeline