diff --git a/mypy.ini b/mypy.ini --- a/mypy.ini +++ b/mypy.ini @@ -8,11 +8,14 @@ [mypy-celery.*] ignore_missing_imports = True -[mypy-subvertpy.*] +[mypy-iso8601.*] ignore_missing_imports = True [mypy-pytest.*] ignore_missing_imports = True +[mypy-subvertpy.*] +ignore_missing_imports = True + [mypy-swh.loader.*] ignore_missing_imports = True diff --git a/requirements.txt b/requirements.txt --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,4 @@ click python-dateutil subvertpy >= 0.9.4 +iso8601 diff --git a/swh/loader/svn/loader.py b/swh/loader/svn/loader.py --- a/swh/loader/svn/loader.py +++ b/swh/loader/svn/loader.py @@ -14,11 +14,12 @@ import shutil from subprocess import Popen import tempfile -from typing import Any, Dict, Iterator, List, Optional, Tuple +from typing import Dict, Iterator, List, Optional, Tuple + +import iso8601 from subvertpy import SubversionException -from swh.core.config import merge_configs from swh.loader.core.loader import BaseLoader from swh.loader.core.utils import clean_dangling_folders from swh.loader.exception import NotFound @@ -35,6 +36,7 @@ TargetType, ) from swh.storage.algos.snapshot import snapshot_get_latest +from swh.storage.interface import StorageInterface from . import converters from .exception import SvnLoaderHistoryAltered, SvnLoaderUneventful @@ -50,16 +52,6 @@ TEMPORARY_DIR_PREFIX_PATTERN = "swh.loader.svn." -DEFAULT_CONFIG: Dict[str, Any] = { - "temp_directory": "/tmp", - "debug": False, # NOT FOR PRODUCTION: False for production - "check_revision": { - "status": False, # True: check the revision, False: don't check - "limit": 1000, # Periodicity check - }, -} - - class SvnLoader(BaseLoader): """Swh svn loader. @@ -72,43 +64,49 @@ def __init__( self, - url, - origin_url=None, - visit_date=None, - destination_path=None, - swh_revision=None, - start_from_scratch=False, + storage: StorageInterface, + url: str, + origin_url: Optional[str] = None, + visit_date: Optional[str] = None, + destination_path: Optional[str] = None, + swh_revision: Optional[str] = None, + start_from_scratch: bool = False, + temp_directory: str = "/tmp", + debug: bool = False, + check_revision: Optional[int] = None, + max_content_size: Optional[int] = None, ): - super().__init__(logging_class="swh.loader.svn.SvnLoader") - self.config = merge_configs(DEFAULT_CONFIG, self.config) + super().__init__( + storage=storage, + logging_class="swh.loader.svn.SvnLoader", + max_content_size=max_content_size, + ) # technical svn uri to act on svn repository self.svn_url = url # origin url as unique identifier for origin in swh archive self.origin_url = origin_url if origin_url else self.svn_url - self.debug = self.config["debug"] - self.temp_directory = self.config["temp_directory"] + self.debug = debug + self.temp_directory = temp_directory self.done = False self.svnrepo = None # Revision check is configurable - check_revision = self.config["check_revision"] - if check_revision["status"]: - self.check_revision = check_revision["limit"] - else: - self.check_revision = None + self.check_revision = None if not check_revision else check_revision # internal state used to store swh objects - self._contents = [] - self._skipped_contents = [] - self._directories = [] - self._revisions = [] + self._contents: List[Content] = [] + self._skipped_contents: List[SkippedContent] = [] + self._directories: List[Directory] = [] + self._revisions: List[Revision] = [] self._snapshot: Optional[Snapshot] = None # internal state, current visit self._last_revision = None self._visit_status = "full" self._load_status = "uneventful" - self.visit_date = visit_date + if visit_date: + self.visit_date = iso8601.parse_date(visit_date) + else: + self.visit_date = None self.destination_path = destination_path self.start_from_scratch = start_from_scratch - self.max_content_length = self.config["max_content_size"] self.snapshot = None # state from previous visit self.latest_snapshot = None @@ -248,6 +246,7 @@ SvnLoaderUneventful: Nothing changed since last visit """ + assert self.svnrepo is not None, "svnrepo initialized in the `prepare` method" revision_head = self.svnrepo.head_revision() if revision_head == 0: # empty repository case revision_start = 0 @@ -404,7 +403,7 @@ try: self.svnrepo = SvnRepo( - self.svn_url, self.origin_url, local_dirname, self.max_content_length + self.svn_url, self.origin_url, local_dirname, self.max_content_size ) except SubversionException as e: error_msgs = [ @@ -549,21 +548,31 @@ def __init__( self, - url, - archive_path, - origin_url=None, - destination_path=None, - swh_revision=None, - start_from_scratch=None, - visit_date=None, + storage: StorageInterface, + url: str, + archive_path: str, + origin_url: Optional[str] = None, + destination_path: Optional[str] = None, + swh_revision: Optional[str] = None, + start_from_scratch: bool = False, + visit_date: Optional[str] = None, + temp_directory: str = "/tmp", + debug: bool = False, + check_revision: Optional[int] = None, + max_content_size: Optional[int] = None, ): super().__init__( - url, + storage=storage, + url=url, origin_url=origin_url, destination_path=destination_path, swh_revision=swh_revision, start_from_scratch=start_from_scratch, visit_date=visit_date, + temp_directory=temp_directory, + debug=debug, + check_revision=check_revision, + max_content_size=max_content_size, ) self.archive_path = archive_path self.temp_dir = None @@ -599,20 +608,30 @@ def __init__( self, - url, - origin_url=None, - destination_path=None, - swh_revision=None, - start_from_scratch=False, - visit_date=None, + storage: StorageInterface, + url: str, + origin_url: Optional[str] = None, + destination_path: Optional[str] = None, + swh_revision: Optional[str] = None, + start_from_scratch: bool = False, + visit_date: Optional[str] = None, + temp_directory: str = "/tmp", + debug: bool = False, + check_revision: Optional[int] = None, + max_content_size: Optional[int] = None, ): super().__init__( - url, + storage=storage, + url=url, origin_url=origin_url, destination_path=destination_path, swh_revision=swh_revision, start_from_scratch=start_from_scratch, visit_date=visit_date, + temp_directory=temp_directory, + debug=debug, + check_revision=check_revision, + max_content_size=max_content_size, ) self.temp_dir = tempfile.mkdtemp(dir=self.temp_directory) self.repo_path = None diff --git a/swh/loader/svn/tasks.py b/swh/loader/svn/tasks.py --- a/swh/loader/svn/tasks.py +++ b/swh/loader/svn/tasks.py @@ -1,8 +1,10 @@ -# Copyright (C) 2015-2019 The Software Heritage developers +# Copyright (C) 2015-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +from typing import Optional + from celery import shared_task from .loader import SvnLoader, SvnLoaderFromDumpArchive, SvnLoaderFromRemoteDump @@ -11,30 +13,31 @@ @shared_task(name=__name__ + ".LoadSvnRepository") def load_svn( *, - url=None, - origin_url=None, - destination_path=None, - swh_revision=None, - visit_date=None, - start_from_scratch=False, + url: Optional[str] = None, + origin_url: Optional[str] = None, + destination_path: Optional[str] = None, + swh_revision: Optional[str] = None, + visit_date: Optional[str] = None, + start_from_scratch: Optional[bool] = False, ): """Import a svn repository Args: - args: ordered arguments (expected None) - kwargs: Dictionary with the following expected keys: - - - url (str): (mandatory) svn's repository url - - origin_url (str): Optional original url override - - destination_path (str): (optional) root directory to - locally retrieve svn's data - - swh_revision (dict): (optional) extra revision hex to - start from. see swh.loader.svn.SvnLoader.process - docstring + - url: (mandatory) svn's repository url to ingest data from + - origin_url: Optional original url override to use as origin reference + in the archive. If not provided, "url" is used as origin. + - destination_path: (optional) root directory to + locally retrieve svn's data + - swh_revision: (optional) extra revision hex to + start from. See swh.loader.svn.SvnLoader.process + docstring + - visit_date: Optional date to override the visit date + - start_from_scratch: Flag to allow starting back the svn repository from the + start """ - loader = SvnLoader( - url, + loader = SvnLoader.from_configfile( + url=url, origin_url=origin_url, destination_path=destination_path, swh_revision=swh_revision, @@ -46,15 +49,26 @@ @shared_task(name=__name__ + ".MountAndLoadSvnRepository") def load_svn_from_archive( - *, url=None, archive_path=None, visit_date=None, start_from_scratch=False + *, + url: Optional[str] = None, + archive_path: Optional[str] = None, + visit_date: Optional[str] = None, + start_from_scratch: Optional[bool] = False, ): """1. Mount an svn dump from archive as a local svn repository 2. Load it through the svn loader 3. Clean up mounted svn repository archive + Args: + - url: origin url + - archive_path: Path on disk to the archive holdin the svn repository to ingest + - visit_date: Optional date to override the visit date + - start_from_scratch: Flag to allow starting back the svn repository from the + start + """ - loader = SvnLoaderFromDumpArchive( - url, + loader = SvnLoaderFromDumpArchive.from_configfile( + url=url, archive_path=archive_path, visit_date=visit_date, start_from_scratch=start_from_scratch, @@ -64,15 +78,27 @@ @shared_task(name=__name__ + ".DumpMountAndLoadSvnRepository") def load_svn_from_remote_dump( - *, url=None, origin_url=None, visit_date=None, start_from_scratch=False + *, + url: Optional[str] = None, + origin_url: Optional[str] = None, + visit_date: Optional[str] = None, + start_from_scratch: Optional[bool] = False, ): """1. Mount a remote svn dump as a local svn repository. 2. Load it through the svn loader. 3. Clean up mounted svn repository archive. + Args: + - url: (mandatory) svn's repository url to ingest data from + - origin_url: Optional original url override to use as origin reference + in the archive. If not provided, "url" is used as origin. + - visit_date: Optional date to override the visit date + - start_from_scratch: Flag to allow starting back the svn repository from the + start + """ - loader = SvnLoaderFromRemoteDump( - url, + loader = SvnLoaderFromRemoteDump.from_configfile( + url=url, origin_url=origin_url, visit_date=visit_date, start_from_scratch=start_from_scratch, diff --git a/swh/loader/svn/tests/conftest.py b/swh/loader/svn/tests/conftest.py --- a/swh/loader/svn/tests/conftest.py +++ b/swh/loader/svn/tests/conftest.py @@ -1,4 +1,4 @@ -# Copyright (C) 2019-2020 The Software Heritage developers +# Copyright (C) 2019-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -9,27 +9,31 @@ @pytest.fixture -def swh_loader_config(swh_storage_backend_config) -> Dict[str, Any]: - swh_storage_backend_config["journal_writer"] = {} +def swh_storage_backend_config(swh_storage_backend_config): + """Basic pg storage configuration with no journal collaborator + (to avoid pulling optional dependency on clients of this fixture) + + """ return { + "cls": "filter", "storage": { - "cls": "pipeline", - "steps": [ - {"cls": "filter"}, - { - "cls": "buffer", - "min_batch_size": { - "content": 10000, - "content_bytes": 1073741824, - "directory": 2500, - "revision": 10, - "release": 100, - }, - }, - swh_storage_backend_config, - ], + "cls": "buffer", + "min_batch_size": { + "content": 10000, + "content_bytes": 1073741824, + "directory": 2500, + "revision": 10, + "release": 100, + }, + "storage": swh_storage_backend_config, }, - "check_revision": {"limit": 100, "status": False}, - "log_db": "dbname=softwareheritage-log", + } + + +@pytest.fixture +def swh_loader_config(swh_storage_backend_config) -> Dict[str, Any]: + return { + "storage": swh_storage_backend_config, + "check_revision": 100, "temp_directory": "/tmp", } diff --git a/swh/loader/svn/tests/test_loader.py b/swh/loader/svn/tests/test_loader.py --- a/swh/loader/svn/tests/test_loader.py +++ b/swh/loader/svn/tests/test_loader.py @@ -39,33 +39,33 @@ ) -def test_loader_svn_not_found_no_mock(swh_config, tmp_path): +def test_loader_svn_not_found_no_mock(swh_storage, tmp_path): """Given an unknown repository, the loader visit ends up in status not_found""" unknown_repo_url = "unknown-repository" - loader = SvnLoader(unknown_repo_url, destination_path=tmp_path) + loader = SvnLoader(swh_storage, unknown_repo_url, destination_path=tmp_path) assert loader.load() == {"status": "uneventful"} assert_last_visit_matches( - loader.storage, unknown_repo_url, status="not_found", type="svn", + swh_storage, unknown_repo_url, status="not_found", type="svn", ) @pytest.mark.parametrize( "exception_msg", ["Unable to connect to a repository at URL", "Unknown URL type",] ) -def test_loader_svn_not_found(swh_config, tmp_path, exception_msg, mocker): +def test_loader_svn_not_found(swh_storage, tmp_path, exception_msg, mocker): """Given unknown repository issues, the loader visit ends up in status not_found""" mock = mocker.patch("swh.loader.svn.loader.SvnRepo") mock.side_effect = SubversionException(exception_msg, 0) unknown_repo_url = "unknown-repository" - loader = SvnLoader(unknown_repo_url, destination_path=tmp_path) + loader = SvnLoader(swh_storage, unknown_repo_url, destination_path=tmp_path) assert loader.load() == {"status": "uneventful"} assert_last_visit_matches( - loader.storage, unknown_repo_url, status="not_found", type="svn", + swh_storage, unknown_repo_url, status="not_found", type="svn", ) @@ -77,28 +77,28 @@ ValueError("considered a failure"), ], ) -def test_loader_svn_failures(swh_config, tmp_path, exception, mocker): +def test_loader_svn_failures(swh_storage, tmp_path, exception, mocker): """Given any errors raised, the loader visit ends up in status failed""" mock = mocker.patch("swh.loader.svn.loader.SvnRepo") mock.side_effect = exception existing_repo_url = "existing-repo-url" - loader = SvnLoader(existing_repo_url, destination_path=tmp_path) + loader = SvnLoader(swh_storage, existing_repo_url, destination_path=tmp_path) assert loader.load() == {"status": "failed"} assert_last_visit_matches( - loader.storage, existing_repo_url, status="failed", type="svn", + swh_storage, existing_repo_url, status="failed", type="svn", ) -def test_loader_svn_new_visit(swh_config, datadir, tmp_path): +def test_loader_svn_new_visit(swh_storage, datadir, tmp_path): """Eventful visit should yield 1 snapshot""" archive_name = "pkg-gourmet" archive_path = os.path.join(datadir, f"{archive_name}.tgz") repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) - loader = SvnLoader(repo_url, destination_path=tmp_path) + loader = SvnLoader(swh_storage, repo_url, destination_path=tmp_path) assert loader.load() == {"status": "eventful"} @@ -125,7 +125,7 @@ check_snapshot(GOURMET_SNAPSHOT, loader.storage) -def test_loader_svn_2_visits_no_change(swh_config, datadir, tmp_path): +def test_loader_svn_2_visits_no_change(swh_storage, datadir, tmp_path): """Visit multiple times a repository with no change should yield the same snapshot """ @@ -133,7 +133,7 @@ archive_path = os.path.join(datadir, f"{archive_name}.tgz") repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) - loader = SvnLoader(repo_url) + loader = SvnLoader(swh_storage, repo_url) assert loader.load() == {"status": "eventful"} visit_status1 = assert_last_visit_matches( @@ -166,7 +166,7 @@ )[0] assert start_revision is not None - loader = SvnLoader(repo_url, swh_revision=start_revision) + loader = SvnLoader(swh_storage, repo_url, swh_revision=start_revision) assert loader.load() == {"status": "uneventful"} stats = get_stats(loader.storage) @@ -183,7 +183,7 @@ ) -def test_loader_tampered_repository(swh_config, datadir, tmp_path): +def test_loader_tampered_repository(swh_storage, datadir, tmp_path): """In this scenario, the dump has been tampered with to modify the commit log [1]. This results in a hash divergence which is detected at startup after a new run for the same origin. @@ -204,7 +204,7 @@ archive_path = os.path.join(datadir, f"{archive_name}.tgz") repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) - loader = SvnLoader(repo_url) + loader = SvnLoader(swh_storage, repo_url) assert loader.load() == {"status": "eventful"} check_snapshot(GOURMET_SNAPSHOT, loader.storage) @@ -213,7 +213,7 @@ archive_path2, archive_name, tmp_path ) - loader2 = SvnLoader(repo_tampered_url, origin_url=repo_url) + loader2 = SvnLoader(swh_storage, repo_tampered_url, origin_url=repo_url) assert loader2.load() == {"status": "failed"} assert_last_visit_matches( @@ -226,7 +226,7 @@ assert stats["snapshot"] == 1 -def test_loader_svn_visit_with_changes(swh_config, datadir, tmp_path): +def test_loader_svn_visit_with_changes(swh_storage, datadir, tmp_path): """In this scenario, the repository has been updated with new changes. The loading visit should result in new objects stored and 1 new snapshot. @@ -239,7 +239,7 @@ ) # repo_initial_url becomes the origin_url we want to visit some more below - loader = SvnLoader(repo_initial_url) + loader = SvnLoader(swh_storage, repo_initial_url) assert loader.load() == {"status": "eventful"} visit_status1 = assert_last_visit_matches( @@ -255,7 +255,7 @@ archive_path, "pkg-gourmet", tmp_path ) - loader = SvnLoader(repo_updated_url, origin_url=repo_initial_url,) + loader = SvnLoader(swh_storage, repo_updated_url, origin_url=repo_initial_url,) assert loader.load() == {"status": "eventful"} visit_status2 = assert_last_visit_matches( @@ -286,7 +286,10 @@ # Start from scratch loading yields the same result loader = SvnLoader( - repo_updated_url, origin_url=repo_initial_url, start_from_scratch=True + swh_storage, + repo_updated_url, + origin_url=repo_initial_url, + start_from_scratch=True, ) assert loader.load() == {"status": "eventful"} visit_status3 = assert_last_visit_matches( @@ -306,7 +309,7 @@ assert stats["snapshot"] == 2 # no new snapshot -def test_loader_svn_visit_start_from_revision(swh_config, datadir, tmp_path): +def test_loader_svn_visit_start_from_revision(swh_storage, datadir, tmp_path): """Starting from existing revision, next visit on changed repo should yield 1 new snapshot. @@ -318,7 +321,7 @@ ) # repo_initial_url becomes the origin_url we want to visit some more below - loader = SvnLoader(repo_initial_url) + loader = SvnLoader(swh_storage, repo_initial_url) assert loader.load() == {"status": "eventful"} visit_status1 = assert_last_visit_matches( @@ -341,7 +344,10 @@ # we'll start from start_revision loader = SvnLoader( - repo_updated_url, origin_url=repo_initial_url, swh_revision=start_revision + swh_storage, + repo_updated_url, + origin_url=repo_initial_url, + swh_revision=start_revision, ) assert loader.load() == {"status": "eventful"} @@ -373,7 +379,7 @@ check_snapshot(GOURMET_UPDATES_SNAPSHOT, loader.storage) -def test_loader_svn_visit_with_eol_style(swh_config, datadir, tmp_path): +def test_loader_svn_visit_with_eol_style(swh_storage, datadir, tmp_path): """Check that a svn repo containing a versioned file with CRLF line endings with svn:eol-style property set to 'native' (this is a violation of svn specification as the file should have been @@ -384,7 +390,7 @@ archive_path = os.path.join(datadir, f"{archive_name}.tgz") repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) - loader = SvnLoader(repo_url) + loader = SvnLoader(swh_storage, repo_url) assert loader.load() == {"status": "eventful"} mediawiki_snapshot = Snapshot( @@ -412,7 +418,7 @@ assert stats["snapshot"] == 1 -def test_loader_svn_visit_with_mixed_crlf_lf(swh_config, datadir, tmp_path): +def test_loader_svn_visit_with_mixed_crlf_lf(swh_storage, datadir, tmp_path): """Check that a svn repo containing a versioned file with mixed CRLF/LF line endings with svn:eol-style property set to 'native' (this is a violation of svn specification as mixed line endings @@ -424,7 +430,7 @@ archive_path = os.path.join(datadir, f"{archive_name}.tgz") repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) - loader = SvnLoader(repo_url) + loader = SvnLoader(swh_storage, repo_url) assert loader.load() == {"status": "eventful"} pyang_snapshot = Snapshot( @@ -448,7 +454,7 @@ assert stats["snapshot"] == 1 -def test_loader_svn_with_external_properties(swh_config, datadir, tmp_path): +def test_loader_svn_with_external_properties(swh_storage, datadir, tmp_path): """Repository with svn:external properties cannot be fully ingested yet """ @@ -456,7 +462,7 @@ archive_path = os.path.join(datadir, "pkg-gourmet-with-external-id.tgz") repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) - loader = SvnLoader(repo_url) + loader = SvnLoader(swh_storage, repo_url) assert loader.load() == {"status": "eventful"} gourmet_externals_snapshot = Snapshot( @@ -487,7 +493,7 @@ assert stats["revision"] == 21 - 1 # commit with the svn:external property -def test_loader_svn_with_symlink(swh_config, datadir, tmp_path): +def test_loader_svn_with_symlink(swh_storage, datadir, tmp_path): """Repository with symlinks should be ingested ok Edge case: @@ -502,7 +508,7 @@ ) repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) - loader = SvnLoader(repo_url) + loader = SvnLoader(swh_storage, repo_url) assert loader.load() == {"status": "eventful"} gourmet_edge_cases_snapshot = Snapshot( @@ -531,7 +537,7 @@ assert stats["revision"] == 19 -def test_loader_svn_with_wrong_symlinks(swh_config, datadir, tmp_path): +def test_loader_svn_with_wrong_symlinks(swh_storage, datadir, tmp_path): """Repository with wrong symlinks should be ingested ok nonetheless Edge case: @@ -543,7 +549,7 @@ archive_path = os.path.join(datadir, "pkg-gourmet-with-wrong-link-cases.tgz") repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) - loader = SvnLoader(repo_url) + loader = SvnLoader(swh_storage, repo_url) assert loader.load() == {"status": "eventful"} gourmet_wrong_links_snapshot = Snapshot( @@ -572,7 +578,7 @@ assert stats["revision"] == 21 -def test_loader_svn_loader_from_dump_archive(swh_config, datadir, tmp_path): +def test_loader_svn_loader_from_dump_archive(swh_storage, datadir, tmp_path): """Repository with wrong symlinks should be ingested ok nonetheless Edge case: @@ -584,7 +590,7 @@ archive_path = os.path.join(datadir, f"{archive_name}.tgz") repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) - loaderFromDump = SvnLoaderFromRemoteDump(repo_url) + loaderFromDump = SvnLoaderFromRemoteDump(swh_storage, repo_url) assert loaderFromDump.load() == {"status": "eventful"} assert_last_visit_matches( loaderFromDump.storage, @@ -595,7 +601,7 @@ ) origin_url = repo_url + "2" # rename to another origin - loader = SvnLoader(repo_url, origin_url=origin_url) + loader = SvnLoader(swh_storage, repo_url, origin_url=origin_url) assert loader.load() == {"status": "eventful"} # because are working on new origin assert_last_visit_matches( loader.storage, @@ -612,7 +618,7 @@ assert stats["origin_visit"] == 2 assert stats["snapshot"] == 1 - loader = SvnLoader(repo_url) # no change on the origin-url + loader = SvnLoader(swh_storage, repo_url) # no change on the origin-url assert loader.load() == {"status": "uneventful"} assert_last_visit_matches( loader.storage, @@ -628,11 +634,11 @@ assert stats["snapshot"] == 1 # second visit from the dump should be uneventful - loaderFromDump = SvnLoaderFromRemoteDump(repo_url) + loaderFromDump = SvnLoaderFromRemoteDump(swh_storage, repo_url) assert loaderFromDump.load() == {"status": "uneventful"} -def test_loader_user_defined_svn_properties(swh_config, datadir, tmp_path): +def test_loader_user_defined_svn_properties(swh_storage, datadir, tmp_path): """Edge cases: The repository held some user defined svn-properties with special encodings, this prevented the repository from being loaded even though we do not ingest those information. @@ -642,7 +648,7 @@ archive_path = os.path.join(datadir, f"{archive_name}.tgz") repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) - loader = SvnLoader(repo_url) + loader = SvnLoader(swh_storage, repo_url) assert loader.load() == {"status": "eventful"} expected_snapshot = Snapshot( @@ -671,13 +677,13 @@ assert stats["revision"] == 7 -def test_loader_svn_dir_added_then_removed(swh_config, datadir, tmp_path): +def test_loader_svn_dir_added_then_removed(swh_storage, datadir, tmp_path): """Loader should handle directory removal when processing a commit""" archive_name = "pkg-gourmet" archive_path = os.path.join(datadir, f"{archive_name}-add-remove-dir.tgz") repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) - loader = SvnLoader(repo_url, destination_path=tmp_path) + loader = SvnLoader(swh_storage, repo_url, destination_path=tmp_path) assert loader.load() == {"status": "eventful"} assert loader.visit_status() == "full"