Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7123072
D7690.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
10 KB
Subscribers
None
D7690.diff
View Options
diff --git a/swh/loader/svn/loader.py b/swh/loader/svn/loader.py
--- a/swh/loader/svn/loader.py
+++ b/swh/loader/svn/loader.py
@@ -85,7 +85,6 @@
temporary working directory is not cleaned up to ease inspection.
Defaults to false.
check_revision: The number of svn commits between checks for hash divergence
- max_content_size: Default max content size allowed
"""
# technical svn uri to act on svn repository
@@ -563,7 +562,7 @@
temp_directory: str = "/tmp",
debug: bool = False,
check_revision: int = 0,
- max_content_size: Optional[int] = None,
+ **kwargs: Any,
):
super().__init__(
storage=storage,
@@ -574,7 +573,7 @@
temp_directory=temp_directory,
debug=debug,
check_revision=check_revision,
- max_content_size=max_content_size,
+ **kwargs,
)
self.archive_path = archive_path
self.temp_dir = None
@@ -620,7 +619,7 @@
temp_directory: str = "/tmp",
debug: bool = False,
check_revision: int = 0,
- max_content_size: Optional[int] = None,
+ **kwargs: Any,
):
super().__init__(
storage=storage,
@@ -631,7 +630,7 @@
temp_directory=temp_directory,
debug=debug,
check_revision=check_revision,
- max_content_size=max_content_size,
+ **kwargs,
)
self.from_dump = True
self.temp_dir = self._create_tmp_dir(self.temp_directory)
diff --git a/swh/loader/svn/tasks.py b/swh/loader/svn/tasks.py
--- a/swh/loader/svn/tasks.py
+++ b/swh/loader/svn/tasks.py
@@ -1,111 +1,38 @@
-# Copyright (C) 2015-2021 The Software Heritage developers
+# Copyright (C) 2015-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-from datetime import datetime
-from typing import Optional
from celery import shared_task
-import iso8601
from .loader import SvnLoader, SvnLoaderFromDumpArchive, SvnLoaderFromRemoteDump
-def convert_to_datetime(date: Optional[str]) -> Optional[datetime]:
- if date is None:
- return None
- try:
- assert isinstance(date, str)
- return iso8601.parse_date(date)
- except Exception:
- return None
-
-
@shared_task(name=__name__ + ".LoadSvnRepository")
-def load_svn(
- *,
- url: Optional[str] = None,
- origin_url: Optional[str] = None,
- visit_date: Optional[str] = None,
- incremental: Optional[bool] = True,
-):
- """Import a svn repository
-
- Args:
- url: (mandatory) svn's repository url to ingest data from
- origin_url: Optional original url override to use as origin reference in the
- archive. If not provided, "url" is used as origin.
- visit_date: Optional date to override the visit date
- incremental: If True, the default, starts from the last snapshot (if any).
- Otherwise, starts from the initial commit of the repository.
-
-
-
- """
- loader = SvnLoader.from_configfile(
- url=url,
- origin_url=origin_url,
- visit_date=convert_to_datetime(visit_date),
- incremental=incremental,
- )
+def load_svn(*args, **kwargs):
+ """Import a svn repository"""
+ loader = SvnLoader.from_configfile(*args, **kwargs)
return loader.load()
@shared_task(name=__name__ + ".MountAndLoadSvnRepository")
-def load_svn_from_archive(
- *,
- url: Optional[str] = None,
- archive_path: Optional[str] = None,
- visit_date: Optional[str] = None,
- incremental: Optional[bool] = True,
-):
- """1. Mount an svn dump from archive as a local svn repository
- 2. Load it through the svn loader
- 3. Clean up mounted svn repository archive
-
- Args:
- url: origin url
- archive_path: Path on disk to the archive holdin the svn repository to ingest
- visit_date: Optional date to override the visit date
- incremental: If True, the default, starts from the last snapshot (if any).
- Otherwise, starts from the initial commit of the repository.
-
+def load_svn_from_archive(*args, **kwargs):
+ """
+ 1. Mount an svn dump from archive as a local svn repository
+ 2. Load it through the svn loader
+ 3. Clean up mounted svn repository archive
"""
- loader = SvnLoaderFromDumpArchive.from_configfile(
- url=url,
- archive_path=archive_path,
- visit_date=convert_to_datetime(visit_date),
- incremental=incremental,
- )
+ loader = SvnLoaderFromDumpArchive.from_configfile(*args, **kwargs)
return loader.load()
@shared_task(name=__name__ + ".DumpMountAndLoadSvnRepository")
-def load_svn_from_remote_dump(
- *,
- url: Optional[str] = None,
- origin_url: Optional[str] = None,
- visit_date: Optional[str] = None,
- incremental: Optional[bool] = True,
-):
- """1. Mount a remote svn dump as a local svn repository.
- 2. Load it through the svn loader.
- 3. Clean up mounted svn repository archive.
-
- Args:
- url: (mandatory) svn's repository url to ingest data from
- origin_url: Optional original url override to use as origin reference
- in the archive. If not provided, "url" is used as origin.
- visit_date: Optional date to override the visit date
- incremental: If True, the default, starts from the last snapshot (if any).
- Otherwise, starts from the initial commit of the repository.
-
+def load_svn_from_remote_dump(*args, **kwargs):
+ """
+ 1. Mount a remote svn dump as a local svn repository.
+ 2. Load it through the svn loader.
+ 3. Clean up mounted svn repository archive.
"""
- loader = SvnLoaderFromRemoteDump.from_configfile(
- url=url,
- origin_url=origin_url,
- visit_date=convert_to_datetime(visit_date),
- incremental=incremental,
- )
+ loader = SvnLoaderFromRemoteDump.from_configfile(*args, **kwargs)
return loader.load()
diff --git a/swh/loader/svn/tests/test_task.py b/swh/loader/svn/tests/test_task.py
--- a/swh/loader/svn/tests/test_task.py
+++ b/swh/loader/svn/tests/test_task.py
@@ -1,17 +1,41 @@
-# Copyright (C) 2019-2021 The Software Heritage developers
+# Copyright (C) 2019-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-from datetime import datetime, timezone
+import uuid
import pytest
-from swh.loader.svn.tasks import convert_to_datetime
+from swh.scheduler.model import ListedOrigin, Lister
+from swh.scheduler.utils import create_origin_task_dict
+
+
+@pytest.fixture(autouse=True)
+def celery_worker_and_swh_config(swh_scheduler_celery_worker, swh_config):
+ pass
+
+
+@pytest.fixture
+def svn_lister():
+ return Lister(name="svn-lister", instance_name="example", id=uuid.uuid4())
+
+
+@pytest.fixture
+def svn_listed_origin(svn_lister):
+ return ListedOrigin(
+ lister_id=svn_lister.id, url="svn://example.org/repo", visit_type="svn"
+ )
+
+
+@pytest.fixture
+def task_dict(svn_lister, svn_listed_origin):
+ return create_origin_task_dict(svn_listed_origin, svn_lister)
def test_svn_loader(
- mocker, swh_scheduler_celery_app, swh_scheduler_celery_worker, swh_config
+ mocker,
+ swh_scheduler_celery_app,
):
mock_loader = mocker.patch("swh.loader.svn.loader.SvnLoader.load")
mock_loader.return_value = {"status": "eventful"}
@@ -27,8 +51,29 @@
assert res.result == {"status": "eventful"}
+def test_svn_loader_for_listed_origin(
+ mocker,
+ swh_scheduler_celery_app,
+ task_dict,
+):
+ mock_loader = mocker.patch("swh.loader.svn.loader.SvnLoader.load")
+ mock_loader.return_value = {"status": "eventful"}
+
+ res = swh_scheduler_celery_app.send_task(
+ "swh.loader.svn.tasks.LoadSvnRepository",
+ args=task_dict["arguments"]["args"],
+ kwargs=task_dict["arguments"]["kwargs"],
+ )
+ assert res
+ res.wait()
+ assert res.successful()
+
+ assert res.result == {"status": "eventful"}
+
+
def test_svn_loader_from_dump(
- mocker, swh_scheduler_celery_app, swh_scheduler_celery_worker, swh_config
+ mocker,
+ swh_scheduler_celery_app,
):
mock_loader = mocker.patch("swh.loader.svn.loader.SvnLoaderFromDumpArchive.load")
mock_loader.return_value = {"status": "eventful"}
@@ -44,8 +89,34 @@
assert res.result == {"status": "eventful"}
+def test_svn_loader_from_dump_for_listed_origin(
+ mocker,
+ swh_scheduler_celery_app,
+ svn_lister,
+ svn_listed_origin,
+):
+ mock_loader = mocker.patch("swh.loader.svn.loader.SvnLoaderFromDumpArchive.load")
+ mock_loader.return_value = {"status": "eventful"}
+
+ svn_listed_origin.extra_loader_arguments = {"archive_path": "some-path"}
+
+ task_dict = create_origin_task_dict(svn_listed_origin, svn_lister)
+
+ res = swh_scheduler_celery_app.send_task(
+ "swh.loader.svn.tasks.MountAndLoadSvnRepository",
+ args=task_dict["arguments"]["args"],
+ kwargs=task_dict["arguments"]["kwargs"],
+ )
+ assert res
+ res.wait()
+ assert res.successful()
+
+ assert res.result == {"status": "eventful"}
+
+
def test_svn_loader_from_remote_dump(
- mocker, swh_scheduler_celery_app, swh_scheduler_celery_worker, swh_config
+ mocker,
+ swh_scheduler_celery_app,
):
mock_loader = mocker.patch("swh.loader.svn.loader.SvnLoaderFromRemoteDump.load")
mock_loader.return_value = {"status": "eventful"}
@@ -61,19 +132,21 @@
assert res.result == {"status": "eventful"}
-@pytest.mark.parametrize(
- "date,expected_result",
- [
- (None, None),
- (
- "2021-11-23 09:41:02.434195+00:00",
- datetime(2021, 11, 23, 9, 41, 2, 434195, tzinfo=timezone.utc),
- ),
- (
- "23112021",
- None,
- ), # failure to parse
- ],
-)
-def test_convert_to_datetime(date, expected_result):
- assert convert_to_datetime(date) == expected_result
+def test_svn_loader_from_remote_dump_for_listed_origin(
+ mocker,
+ swh_scheduler_celery_app,
+ task_dict,
+):
+ mock_loader = mocker.patch("swh.loader.svn.loader.SvnLoaderFromRemoteDump.load")
+ mock_loader.return_value = {"status": "eventful"}
+
+ res = swh_scheduler_celery_app.send_task(
+ "swh.loader.svn.tasks.DumpMountAndLoadSvnRepository",
+ args=task_dict["arguments"]["args"],
+ kwargs=task_dict["arguments"]["kwargs"],
+ )
+ assert res
+ res.wait()
+ assert res.successful()
+
+ assert res.result == {"status": "eventful"}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Tue, Dec 17, 7:32 PM (2 d, 16 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3217703
Attached To
D7690: tasks: Fix and simplify implementation
Event Timeline
Log In to Comment