diff --git a/swh/loader/svn/tasks.py b/swh/loader/svn/tasks.py index 0009255..de72c68 100644 --- a/swh/loader/svn/tasks.py +++ b/swh/loader/svn/tasks.py @@ -1,108 +1,111 @@ # Copyright (C) 2015-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from datetime import datetime from typing import Optional from celery import shared_task import iso8601 from .loader import SvnLoader, SvnLoaderFromDumpArchive, SvnLoaderFromRemoteDump def convert_to_datetime(date: Optional[str]) -> Optional[datetime]: + if date is None: + return None try: + assert isinstance(date, str) return iso8601.parse_date(date) except Exception: return None @shared_task(name=__name__ + ".LoadSvnRepository") def load_svn( *, url: Optional[str] = None, origin_url: Optional[str] = None, visit_date: Optional[str] = None, incremental: Optional[bool] = True, ): """Import a svn repository Args: url: (mandatory) svn's repository url to ingest data from origin_url: Optional original url override to use as origin reference in the archive. If not provided, "url" is used as origin. visit_date: Optional date to override the visit date incremental: If True, the default, starts from the last snapshot (if any). Otherwise, starts from the initial commit of the repository. """ loader = SvnLoader.from_configfile( url=url, origin_url=origin_url, visit_date=convert_to_datetime(visit_date), incremental=incremental, ) return loader.load() @shared_task(name=__name__ + ".MountAndLoadSvnRepository") def load_svn_from_archive( *, url: Optional[str] = None, archive_path: Optional[str] = None, visit_date: Optional[str] = None, incremental: Optional[bool] = True, ): """1. Mount an svn dump from archive as a local svn repository 2. Load it through the svn loader 3. Clean up mounted svn repository archive Args: url: origin url archive_path: Path on disk to the archive holdin the svn repository to ingest visit_date: Optional date to override the visit date incremental: If True, the default, starts from the last snapshot (if any). Otherwise, starts from the initial commit of the repository. """ loader = SvnLoaderFromDumpArchive.from_configfile( url=url, archive_path=archive_path, visit_date=convert_to_datetime(visit_date), incremental=incremental, ) return loader.load() @shared_task(name=__name__ + ".DumpMountAndLoadSvnRepository") def load_svn_from_remote_dump( *, url: Optional[str] = None, origin_url: Optional[str] = None, visit_date: Optional[str] = None, incremental: Optional[bool] = True, ): """1. Mount a remote svn dump as a local svn repository. 2. Load it through the svn loader. 3. Clean up mounted svn repository archive. Args: url: (mandatory) svn's repository url to ingest data from origin_url: Optional original url override to use as origin reference in the archive. If not provided, "url" is used as origin. visit_date: Optional date to override the visit date incremental: If True, the default, starts from the last snapshot (if any). Otherwise, starts from the initial commit of the repository. """ loader = SvnLoaderFromRemoteDump.from_configfile( url=url, origin_url=origin_url, visit_date=convert_to_datetime(visit_date), incremental=incremental, ) return loader.load() diff --git a/swh/loader/svn/tests/test_task.py b/swh/loader/svn/tests/test_task.py index 1911e39..f1dbcfa 100644 --- a/swh/loader/svn/tests/test_task.py +++ b/swh/loader/svn/tests/test_task.py @@ -1,55 +1,76 @@ -# Copyright (C) 2019 The Software Heritage developers +# Copyright (C) 2019-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +from datetime import datetime, timezone + +import pytest + +from swh.loader.svn.tasks import convert_to_datetime + def test_svn_loader( mocker, swh_scheduler_celery_app, swh_scheduler_celery_worker, swh_config ): mock_loader = mocker.patch("swh.loader.svn.loader.SvnLoader.load") mock_loader.return_value = {"status": "eventful"} res = swh_scheduler_celery_app.send_task( "swh.loader.svn.tasks.LoadSvnRepository", kwargs=dict(url="some-technical-url", origin_url="origin-url"), ) assert res res.wait() assert res.successful() assert res.result == {"status": "eventful"} def test_svn_loader_from_dump( mocker, swh_scheduler_celery_app, swh_scheduler_celery_worker, swh_config ): mock_loader = mocker.patch("swh.loader.svn.loader.SvnLoaderFromDumpArchive.load") mock_loader.return_value = {"status": "eventful"} res = swh_scheduler_celery_app.send_task( "swh.loader.svn.tasks.MountAndLoadSvnRepository", kwargs=dict(url="some-url", archive_path="some-path"), ) assert res res.wait() assert res.successful() assert res.result == {"status": "eventful"} def test_svn_loader_from_remote_dump( mocker, swh_scheduler_celery_app, swh_scheduler_celery_worker, swh_config ): mock_loader = mocker.patch("swh.loader.svn.loader.SvnLoaderFromRemoteDump.load") mock_loader.return_value = {"status": "eventful"} res = swh_scheduler_celery_app.send_task( "swh.loader.svn.tasks.DumpMountAndLoadSvnRepository", kwargs=dict(url="some-remote-dump-url", origin_url="origin-url"), ) assert res res.wait() assert res.successful() assert res.result == {"status": "eventful"} + + +@pytest.mark.parametrize( + "date,expected_result", + [ + (None, None), + ( + "2021-11-23 09:41:02.434195+00:00", + datetime(2021, 11, 23, 9, 41, 2, 434195, tzinfo=timezone.utc), + ), + ("23112021", None,), # failure to parse + ], +) +def test_convert_to_datetime(date, expected_result): + assert convert_to_datetime(date) == expected_result