diff --git a/swh/loader/svn/loader.py b/swh/loader/svn/loader.py --- a/swh/loader/svn/loader.py +++ b/swh/loader/svn/loader.py @@ -85,7 +85,6 @@ temporary working directory is not cleaned up to ease inspection. Defaults to false. check_revision: The number of svn commits between checks for hash divergence - max_content_size: Default max content size allowed """ # technical svn uri to act on svn repository @@ -563,7 +562,7 @@ temp_directory: str = "/tmp", debug: bool = False, check_revision: int = 0, - max_content_size: Optional[int] = None, + **kwargs: Any, ): super().__init__( storage=storage, @@ -574,7 +573,7 @@ temp_directory=temp_directory, debug=debug, check_revision=check_revision, - max_content_size=max_content_size, + **kwargs, ) self.archive_path = archive_path self.temp_dir = None @@ -620,7 +619,7 @@ temp_directory: str = "/tmp", debug: bool = False, check_revision: int = 0, - max_content_size: Optional[int] = None, + **kwargs: Any, ): super().__init__( storage=storage, @@ -631,7 +630,7 @@ temp_directory=temp_directory, debug=debug, check_revision=check_revision, - max_content_size=max_content_size, + **kwargs, ) self.from_dump = True self.temp_dir = self._create_tmp_dir(self.temp_directory) diff --git a/swh/loader/svn/tasks.py b/swh/loader/svn/tasks.py --- a/swh/loader/svn/tasks.py +++ b/swh/loader/svn/tasks.py @@ -1,111 +1,38 @@ -# Copyright (C) 2015-2021 The Software Heritage developers +# Copyright (C) 2015-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from datetime import datetime -from typing import Optional from celery import shared_task -import iso8601 from .loader import SvnLoader, SvnLoaderFromDumpArchive, SvnLoaderFromRemoteDump -def convert_to_datetime(date: Optional[str]) -> Optional[datetime]: - if date is None: - return None - try: - assert isinstance(date, str) - return iso8601.parse_date(date) - except Exception: - return None - - @shared_task(name=__name__ + ".LoadSvnRepository") -def load_svn( - *, - url: Optional[str] = None, - origin_url: Optional[str] = None, - visit_date: Optional[str] = None, - incremental: Optional[bool] = True, -): - """Import a svn repository - - Args: - url: (mandatory) svn's repository url to ingest data from - origin_url: Optional original url override to use as origin reference in the - archive. If not provided, "url" is used as origin. - visit_date: Optional date to override the visit date - incremental: If True, the default, starts from the last snapshot (if any). - Otherwise, starts from the initial commit of the repository. - - - - """ - loader = SvnLoader.from_configfile( - url=url, - origin_url=origin_url, - visit_date=convert_to_datetime(visit_date), - incremental=incremental, - ) +def load_svn(*args, **kwargs): + """Import a svn repository""" + loader = SvnLoader.from_configfile(*args, **kwargs) return loader.load() @shared_task(name=__name__ + ".MountAndLoadSvnRepository") -def load_svn_from_archive( - *, - url: Optional[str] = None, - archive_path: Optional[str] = None, - visit_date: Optional[str] = None, - incremental: Optional[bool] = True, -): - """1. Mount an svn dump from archive as a local svn repository - 2. Load it through the svn loader - 3. Clean up mounted svn repository archive - - Args: - url: origin url - archive_path: Path on disk to the archive holdin the svn repository to ingest - visit_date: Optional date to override the visit date - incremental: If True, the default, starts from the last snapshot (if any). - Otherwise, starts from the initial commit of the repository. - +def load_svn_from_archive(*args, **kwargs): + """ + 1. Mount an svn dump from archive as a local svn repository + 2. Load it through the svn loader + 3. Clean up mounted svn repository archive """ - loader = SvnLoaderFromDumpArchive.from_configfile( - url=url, - archive_path=archive_path, - visit_date=convert_to_datetime(visit_date), - incremental=incremental, - ) + loader = SvnLoaderFromDumpArchive.from_configfile(*args, **kwargs) return loader.load() @shared_task(name=__name__ + ".DumpMountAndLoadSvnRepository") -def load_svn_from_remote_dump( - *, - url: Optional[str] = None, - origin_url: Optional[str] = None, - visit_date: Optional[str] = None, - incremental: Optional[bool] = True, -): - """1. Mount a remote svn dump as a local svn repository. - 2. Load it through the svn loader. - 3. Clean up mounted svn repository archive. - - Args: - url: (mandatory) svn's repository url to ingest data from - origin_url: Optional original url override to use as origin reference - in the archive. If not provided, "url" is used as origin. - visit_date: Optional date to override the visit date - incremental: If True, the default, starts from the last snapshot (if any). - Otherwise, starts from the initial commit of the repository. - +def load_svn_from_remote_dump(*args, **kwargs): + """ + 1. Mount a remote svn dump as a local svn repository. + 2. Load it through the svn loader. + 3. Clean up mounted svn repository archive. """ - loader = SvnLoaderFromRemoteDump.from_configfile( - url=url, - origin_url=origin_url, - visit_date=convert_to_datetime(visit_date), - incremental=incremental, - ) + loader = SvnLoaderFromRemoteDump.from_configfile(*args, **kwargs) return loader.load() diff --git a/swh/loader/svn/tests/test_task.py b/swh/loader/svn/tests/test_task.py --- a/swh/loader/svn/tests/test_task.py +++ b/swh/loader/svn/tests/test_task.py @@ -1,17 +1,41 @@ -# Copyright (C) 2019-2021 The Software Heritage developers +# Copyright (C) 2019-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from datetime import datetime, timezone +import uuid import pytest -from swh.loader.svn.tasks import convert_to_datetime +from swh.scheduler.model import ListedOrigin, Lister +from swh.scheduler.utils import create_origin_task_dict + + +@pytest.fixture(autouse=True) +def celery_worker_and_swh_config(swh_scheduler_celery_worker, swh_config): + pass + + +@pytest.fixture +def svn_lister(): + return Lister(name="svn-lister", instance_name="example", id=uuid.uuid4()) + + +@pytest.fixture +def svn_listed_origin(svn_lister): + return ListedOrigin( + lister_id=svn_lister.id, url="svn://example.org/repo", visit_type="svn" + ) + + +@pytest.fixture +def task_dict(svn_lister, svn_listed_origin): + return create_origin_task_dict(svn_listed_origin, svn_lister) def test_svn_loader( - mocker, swh_scheduler_celery_app, swh_scheduler_celery_worker, swh_config + mocker, + swh_scheduler_celery_app, ): mock_loader = mocker.patch("swh.loader.svn.loader.SvnLoader.load") mock_loader.return_value = {"status": "eventful"} @@ -27,8 +51,29 @@ assert res.result == {"status": "eventful"} +def test_svn_loader_for_listed_origin( + mocker, + swh_scheduler_celery_app, + task_dict, +): + mock_loader = mocker.patch("swh.loader.svn.loader.SvnLoader.load") + mock_loader.return_value = {"status": "eventful"} + + res = swh_scheduler_celery_app.send_task( + "swh.loader.svn.tasks.LoadSvnRepository", + args=task_dict["arguments"]["args"], + kwargs=task_dict["arguments"]["kwargs"], + ) + assert res + res.wait() + assert res.successful() + + assert res.result == {"status": "eventful"} + + def test_svn_loader_from_dump( - mocker, swh_scheduler_celery_app, swh_scheduler_celery_worker, swh_config + mocker, + swh_scheduler_celery_app, ): mock_loader = mocker.patch("swh.loader.svn.loader.SvnLoaderFromDumpArchive.load") mock_loader.return_value = {"status": "eventful"} @@ -44,8 +89,34 @@ assert res.result == {"status": "eventful"} +def test_svn_loader_from_dump_for_listed_origin( + mocker, + swh_scheduler_celery_app, + svn_lister, + svn_listed_origin, +): + mock_loader = mocker.patch("swh.loader.svn.loader.SvnLoaderFromDumpArchive.load") + mock_loader.return_value = {"status": "eventful"} + + svn_listed_origin.extra_loader_arguments = {"archive_path": "some-path"} + + task_dict = create_origin_task_dict(svn_listed_origin, svn_lister) + + res = swh_scheduler_celery_app.send_task( + "swh.loader.svn.tasks.MountAndLoadSvnRepository", + args=task_dict["arguments"]["args"], + kwargs=task_dict["arguments"]["kwargs"], + ) + assert res + res.wait() + assert res.successful() + + assert res.result == {"status": "eventful"} + + def test_svn_loader_from_remote_dump( - mocker, swh_scheduler_celery_app, swh_scheduler_celery_worker, swh_config + mocker, + swh_scheduler_celery_app, ): mock_loader = mocker.patch("swh.loader.svn.loader.SvnLoaderFromRemoteDump.load") mock_loader.return_value = {"status": "eventful"} @@ -61,19 +132,21 @@ assert res.result == {"status": "eventful"} -@pytest.mark.parametrize( - "date,expected_result", - [ - (None, None), - ( - "2021-11-23 09:41:02.434195+00:00", - datetime(2021, 11, 23, 9, 41, 2, 434195, tzinfo=timezone.utc), - ), - ( - "23112021", - None, - ), # failure to parse - ], -) -def test_convert_to_datetime(date, expected_result): - assert convert_to_datetime(date) == expected_result +def test_svn_loader_from_remote_dump_for_listed_origin( + mocker, + swh_scheduler_celery_app, + task_dict, +): + mock_loader = mocker.patch("swh.loader.svn.loader.SvnLoaderFromRemoteDump.load") + mock_loader.return_value = {"status": "eventful"} + + res = swh_scheduler_celery_app.send_task( + "swh.loader.svn.tasks.DumpMountAndLoadSvnRepository", + args=task_dict["arguments"]["args"], + kwargs=task_dict["arguments"]["kwargs"], + ) + assert res + res.wait() + assert res.successful() + + assert res.result == {"status": "eventful"}