Page MenuHomeSoftware Heritage

D7690.diff
No OneTemporary

D7690.diff

diff --git a/swh/loader/svn/loader.py b/swh/loader/svn/loader.py
--- a/swh/loader/svn/loader.py
+++ b/swh/loader/svn/loader.py
@@ -85,7 +85,6 @@
temporary working directory is not cleaned up to ease inspection.
Defaults to false.
check_revision: The number of svn commits between checks for hash divergence
- max_content_size: Default max content size allowed
"""
# technical svn uri to act on svn repository
@@ -563,7 +562,7 @@
temp_directory: str = "/tmp",
debug: bool = False,
check_revision: int = 0,
- max_content_size: Optional[int] = None,
+ **kwargs: Any,
):
super().__init__(
storage=storage,
@@ -574,7 +573,7 @@
temp_directory=temp_directory,
debug=debug,
check_revision=check_revision,
- max_content_size=max_content_size,
+ **kwargs,
)
self.archive_path = archive_path
self.temp_dir = None
@@ -620,7 +619,7 @@
temp_directory: str = "/tmp",
debug: bool = False,
check_revision: int = 0,
- max_content_size: Optional[int] = None,
+ **kwargs: Any,
):
super().__init__(
storage=storage,
@@ -631,7 +630,7 @@
temp_directory=temp_directory,
debug=debug,
check_revision=check_revision,
- max_content_size=max_content_size,
+ **kwargs,
)
self.from_dump = True
self.temp_dir = self._create_tmp_dir(self.temp_directory)
diff --git a/swh/loader/svn/tasks.py b/swh/loader/svn/tasks.py
--- a/swh/loader/svn/tasks.py
+++ b/swh/loader/svn/tasks.py
@@ -1,111 +1,38 @@
-# Copyright (C) 2015-2021 The Software Heritage developers
+# Copyright (C) 2015-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-from datetime import datetime
-from typing import Optional
from celery import shared_task
-import iso8601
from .loader import SvnLoader, SvnLoaderFromDumpArchive, SvnLoaderFromRemoteDump
-def convert_to_datetime(date: Optional[str]) -> Optional[datetime]:
- if date is None:
- return None
- try:
- assert isinstance(date, str)
- return iso8601.parse_date(date)
- except Exception:
- return None
-
-
@shared_task(name=__name__ + ".LoadSvnRepository")
-def load_svn(
- *,
- url: Optional[str] = None,
- origin_url: Optional[str] = None,
- visit_date: Optional[str] = None,
- incremental: Optional[bool] = True,
-):
- """Import a svn repository
-
- Args:
- url: (mandatory) svn's repository url to ingest data from
- origin_url: Optional original url override to use as origin reference in the
- archive. If not provided, "url" is used as origin.
- visit_date: Optional date to override the visit date
- incremental: If True, the default, starts from the last snapshot (if any).
- Otherwise, starts from the initial commit of the repository.
-
-
-
- """
- loader = SvnLoader.from_configfile(
- url=url,
- origin_url=origin_url,
- visit_date=convert_to_datetime(visit_date),
- incremental=incremental,
- )
+def load_svn(*args, **kwargs):
+ """Import a svn repository"""
+ loader = SvnLoader.from_configfile(*args, **kwargs)
return loader.load()
@shared_task(name=__name__ + ".MountAndLoadSvnRepository")
-def load_svn_from_archive(
- *,
- url: Optional[str] = None,
- archive_path: Optional[str] = None,
- visit_date: Optional[str] = None,
- incremental: Optional[bool] = True,
-):
- """1. Mount an svn dump from archive as a local svn repository
- 2. Load it through the svn loader
- 3. Clean up mounted svn repository archive
-
- Args:
- url: origin url
- archive_path: Path on disk to the archive holdin the svn repository to ingest
- visit_date: Optional date to override the visit date
- incremental: If True, the default, starts from the last snapshot (if any).
- Otherwise, starts from the initial commit of the repository.
-
+def load_svn_from_archive(*args, **kwargs):
+ """
+ 1. Mount an svn dump from archive as a local svn repository
+ 2. Load it through the svn loader
+ 3. Clean up mounted svn repository archive
"""
- loader = SvnLoaderFromDumpArchive.from_configfile(
- url=url,
- archive_path=archive_path,
- visit_date=convert_to_datetime(visit_date),
- incremental=incremental,
- )
+ loader = SvnLoaderFromDumpArchive.from_configfile(*args, **kwargs)
return loader.load()
@shared_task(name=__name__ + ".DumpMountAndLoadSvnRepository")
-def load_svn_from_remote_dump(
- *,
- url: Optional[str] = None,
- origin_url: Optional[str] = None,
- visit_date: Optional[str] = None,
- incremental: Optional[bool] = True,
-):
- """1. Mount a remote svn dump as a local svn repository.
- 2. Load it through the svn loader.
- 3. Clean up mounted svn repository archive.
-
- Args:
- url: (mandatory) svn's repository url to ingest data from
- origin_url: Optional original url override to use as origin reference
- in the archive. If not provided, "url" is used as origin.
- visit_date: Optional date to override the visit date
- incremental: If True, the default, starts from the last snapshot (if any).
- Otherwise, starts from the initial commit of the repository.
-
+def load_svn_from_remote_dump(*args, **kwargs):
+ """
+ 1. Mount a remote svn dump as a local svn repository.
+ 2. Load it through the svn loader.
+ 3. Clean up mounted svn repository archive.
"""
- loader = SvnLoaderFromRemoteDump.from_configfile(
- url=url,
- origin_url=origin_url,
- visit_date=convert_to_datetime(visit_date),
- incremental=incremental,
- )
+ loader = SvnLoaderFromRemoteDump.from_configfile(*args, **kwargs)
return loader.load()
diff --git a/swh/loader/svn/tests/test_task.py b/swh/loader/svn/tests/test_task.py
--- a/swh/loader/svn/tests/test_task.py
+++ b/swh/loader/svn/tests/test_task.py
@@ -1,17 +1,41 @@
-# Copyright (C) 2019-2021 The Software Heritage developers
+# Copyright (C) 2019-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-from datetime import datetime, timezone
+import uuid
import pytest
-from swh.loader.svn.tasks import convert_to_datetime
+from swh.scheduler.model import ListedOrigin, Lister
+from swh.scheduler.utils import create_origin_task_dict
+
+
+@pytest.fixture(autouse=True)
+def celery_worker_and_swh_config(swh_scheduler_celery_worker, swh_config):
+ pass
+
+
+@pytest.fixture
+def svn_lister():
+ return Lister(name="svn-lister", instance_name="example", id=uuid.uuid4())
+
+
+@pytest.fixture
+def svn_listed_origin(svn_lister):
+ return ListedOrigin(
+ lister_id=svn_lister.id, url="svn://example.org/repo", visit_type="svn"
+ )
+
+
+@pytest.fixture
+def task_dict(svn_lister, svn_listed_origin):
+ return create_origin_task_dict(svn_listed_origin, svn_lister)
def test_svn_loader(
- mocker, swh_scheduler_celery_app, swh_scheduler_celery_worker, swh_config
+ mocker,
+ swh_scheduler_celery_app,
):
mock_loader = mocker.patch("swh.loader.svn.loader.SvnLoader.load")
mock_loader.return_value = {"status": "eventful"}
@@ -27,8 +51,29 @@
assert res.result == {"status": "eventful"}
+def test_svn_loader_for_listed_origin(
+ mocker,
+ swh_scheduler_celery_app,
+ task_dict,
+):
+ mock_loader = mocker.patch("swh.loader.svn.loader.SvnLoader.load")
+ mock_loader.return_value = {"status": "eventful"}
+
+ res = swh_scheduler_celery_app.send_task(
+ "swh.loader.svn.tasks.LoadSvnRepository",
+ args=task_dict["arguments"]["args"],
+ kwargs=task_dict["arguments"]["kwargs"],
+ )
+ assert res
+ res.wait()
+ assert res.successful()
+
+ assert res.result == {"status": "eventful"}
+
+
def test_svn_loader_from_dump(
- mocker, swh_scheduler_celery_app, swh_scheduler_celery_worker, swh_config
+ mocker,
+ swh_scheduler_celery_app,
):
mock_loader = mocker.patch("swh.loader.svn.loader.SvnLoaderFromDumpArchive.load")
mock_loader.return_value = {"status": "eventful"}
@@ -44,8 +89,34 @@
assert res.result == {"status": "eventful"}
+def test_svn_loader_from_dump_for_listed_origin(
+ mocker,
+ swh_scheduler_celery_app,
+ svn_lister,
+ svn_listed_origin,
+):
+ mock_loader = mocker.patch("swh.loader.svn.loader.SvnLoaderFromDumpArchive.load")
+ mock_loader.return_value = {"status": "eventful"}
+
+ svn_listed_origin.extra_loader_arguments = {"archive_path": "some-path"}
+
+ task_dict = create_origin_task_dict(svn_listed_origin, svn_lister)
+
+ res = swh_scheduler_celery_app.send_task(
+ "swh.loader.svn.tasks.MountAndLoadSvnRepository",
+ args=task_dict["arguments"]["args"],
+ kwargs=task_dict["arguments"]["kwargs"],
+ )
+ assert res
+ res.wait()
+ assert res.successful()
+
+ assert res.result == {"status": "eventful"}
+
+
def test_svn_loader_from_remote_dump(
- mocker, swh_scheduler_celery_app, swh_scheduler_celery_worker, swh_config
+ mocker,
+ swh_scheduler_celery_app,
):
mock_loader = mocker.patch("swh.loader.svn.loader.SvnLoaderFromRemoteDump.load")
mock_loader.return_value = {"status": "eventful"}
@@ -61,19 +132,21 @@
assert res.result == {"status": "eventful"}
-@pytest.mark.parametrize(
- "date,expected_result",
- [
- (None, None),
- (
- "2021-11-23 09:41:02.434195+00:00",
- datetime(2021, 11, 23, 9, 41, 2, 434195, tzinfo=timezone.utc),
- ),
- (
- "23112021",
- None,
- ), # failure to parse
- ],
-)
-def test_convert_to_datetime(date, expected_result):
- assert convert_to_datetime(date) == expected_result
+def test_svn_loader_from_remote_dump_for_listed_origin(
+ mocker,
+ swh_scheduler_celery_app,
+ task_dict,
+):
+ mock_loader = mocker.patch("swh.loader.svn.loader.SvnLoaderFromRemoteDump.load")
+ mock_loader.return_value = {"status": "eventful"}
+
+ res = swh_scheduler_celery_app.send_task(
+ "swh.loader.svn.tasks.DumpMountAndLoadSvnRepository",
+ args=task_dict["arguments"]["args"],
+ kwargs=task_dict["arguments"]["kwargs"],
+ )
+ assert res
+ res.wait()
+ assert res.successful()
+
+ assert res.result == {"status": "eventful"}

File Metadata

Mime Type
text/plain
Expires
Tue, Dec 17, 7:32 PM (1 w, 5 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3217703

Event Timeline