diff --git a/requirements-test.txt b/requirements-test.txt --- a/requirements-test.txt +++ b/requirements-test.txt @@ -4,4 +4,3 @@ swh.storage[testing] types-click types-Deprecated -types-python-dateutil diff --git a/swh/loader/git/tasks.py b/swh/loader/git/tasks.py --- a/swh/loader/git/tasks.py +++ b/swh/loader/git/tasks.py @@ -1,41 +1,39 @@ -# Copyright (C) 2015-2021 The Software Heritage developers +# Copyright (C) 2015-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from typing import Any, Dict, Optional +from typing import Any, Dict from celery import shared_task -import dateutil.parser +from swh.loader.core.utils import parse_visit_date from swh.loader.git.from_disk import GitLoaderFromArchive, GitLoaderFromDisk from swh.loader.git.loader import GitLoader +def _process_kwargs(kwargs): + if "visit_date" in kwargs: + kwargs["visit_date"] = parse_visit_date(kwargs["visit_date"]) + return kwargs + + @shared_task(name=__name__ + ".UpdateGitRepository") -def load_git(*, url: str, base_url: Optional[str] = None) -> Dict[str, Any]: +def load_git(**kwargs) -> Dict[str, Any]: """Import a git repository from a remote location""" - loader = GitLoader.from_configfile(url=url, base_url=base_url) + loader = GitLoader.from_configfile(**_process_kwargs(kwargs)) return loader.load() @shared_task(name=__name__ + ".LoadDiskGitRepository") -def load_git_from_dir(*, url: str, directory: str, date: str) -> Dict[str, Any]: - """Import a git repository from a local repository - - Import a git repository, cloned in `directory` from `origin_url` at - `date`. - - """ - visit_date = dateutil.parser.parse(date) - loader = GitLoaderFromDisk.from_configfile( - url=url, directory=directory, visit_date=visit_date - ) +def load_git_from_dir(**kwargs) -> Dict[str, Any]: + """Import a git repository from a local repository""" + loader = GitLoaderFromDisk.from_configfile(**_process_kwargs(kwargs)) return loader.load() @shared_task(name=__name__ + ".UncompressAndLoadDiskGitRepository") -def load_git_from_zip(*, url: str, archive_path: str, date: str) -> Dict[str, Any]: +def load_git_from_zip(**kwargs) -> Dict[str, Any]: """Import a git repository from a zip archive 1. Uncompress an archive repository in a local and temporary folder @@ -43,8 +41,5 @@ 3. Clean up the temporary folder """ - visit_date = dateutil.parser.parse(date) - loader = GitLoaderFromArchive.from_configfile( - url=url, archive_path=archive_path, visit_date=visit_date - ) + loader = GitLoaderFromArchive.from_configfile(**_process_kwargs(kwargs)) return loader.load() diff --git a/swh/loader/git/tests/test_tasks.py b/swh/loader/git/tests/test_tasks.py --- a/swh/loader/git/tests/test_tasks.py +++ b/swh/loader/git/tests/test_tasks.py @@ -1,20 +1,66 @@ -# Copyright (C) 2018-2020 The Software Heritage developers +# Copyright (C) 2018-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import uuid + +import pytest + +from swh.scheduler.model import ListedOrigin, Lister +from swh.scheduler.utils import create_origin_task_dict + + +@pytest.fixture(autouse=True) +def celery_worker_and_swh_config(swh_scheduler_celery_worker, swh_config): + pass + + +@pytest.fixture +def git_lister(): + return Lister(name="git-lister", instance_name="example", id=uuid.uuid4()) + + +@pytest.fixture +def git_listed_origin(git_lister): + return ListedOrigin( + lister_id=git_lister.id, url="https://git.example.org/repo", visit_type="git" + ) + def test_git_loader( - mocker, swh_config, swh_scheduler_celery_app, swh_scheduler_celery_worker + mocker, + swh_scheduler_celery_app, ): mock_loader = mocker.patch("swh.loader.git.loader.GitLoader.load") mock_loader.return_value = {"status": "eventful"} res = swh_scheduler_celery_app.send_task( "swh.loader.git.tasks.UpdateGitRepository", - kwargs={ - "url": "origin_url", - }, + kwargs={"url": "origin_url"}, + ) + assert res + res.wait() + assert res.successful() + + assert res.result == {"status": "eventful"} + mock_loader.assert_called_once_with() + + +def test_git_loader_for_listed_origin( + mocker, + swh_scheduler_celery_app, + git_lister, + git_listed_origin, +): + mock_loader = mocker.patch("swh.loader.git.loader.GitLoader.load") + mock_loader.return_value = {"status": "eventful"} + + task_dict = create_origin_task_dict(git_listed_origin, git_lister) + + res = swh_scheduler_celery_app.send_task( + "swh.loader.git.tasks.UpdateGitRepository", + kwargs=task_dict["arguments"]["kwargs"], ) assert res res.wait() @@ -25,18 +71,41 @@ def test_git_loader_from_disk( - mocker, swh_config, swh_scheduler_celery_app, swh_scheduler_celery_worker + mocker, + swh_scheduler_celery_app, ): mock_loader = mocker.patch("swh.loader.git.from_disk.GitLoaderFromDisk.load") mock_loader.return_value = {"status": "uneventful"} res = swh_scheduler_celery_app.send_task( "swh.loader.git.tasks.LoadDiskGitRepository", - kwargs={ - "url": "origin_url2", - "directory": "/some/repo", - "date": "2018-12-10 00:00", - }, + kwargs={"url": "origin_url2", "directory": "/some/repo", "visit_date": "now"}, + ) + assert res + res.wait() + assert res.successful() + + assert res.result == {"status": "uneventful"} + mock_loader.assert_called_once_with() + + +def test_git_loader_from_disk_for_listed_origin( + mocker, + swh_scheduler_celery_app, + git_lister, + git_listed_origin, +): + mock_loader = mocker.patch("swh.loader.git.from_disk.GitLoaderFromDisk.load") + mock_loader.return_value = {"status": "uneventful"} + + git_listed_origin.extra_loader_arguments = { + "directory": "/some/repo", + } + task_dict = create_origin_task_dict(git_listed_origin, git_lister) + + res = swh_scheduler_celery_app.send_task( + "swh.loader.git.tasks.LoadDiskGitRepository", + kwargs=task_dict["arguments"]["kwargs"], ) assert res res.wait() @@ -47,10 +116,10 @@ def test_git_loader_from_archive( - mocker, swh_config, swh_scheduler_celery_app, swh_scheduler_celery_worker + mocker, + swh_scheduler_celery_app, ): mock_loader = mocker.patch("swh.loader.git.from_disk.GitLoaderFromArchive.load") - mock_loader.return_value = {"status": "failed"} res = swh_scheduler_celery_app.send_task( @@ -58,7 +127,7 @@ kwargs={ "url": "origin_url3", "archive_path": "/some/repo", - "date": "2017-01-10 00:00", + "visit_date": "now", }, ) assert res @@ -67,3 +136,29 @@ assert res.result == {"status": "failed"} mock_loader.assert_called_once_with() + + +def test_git_loader_from_archive_for_listed_origin( + mocker, + swh_scheduler_celery_app, + git_lister, + git_listed_origin, +): + mock_loader = mocker.patch("swh.loader.git.from_disk.GitLoaderFromArchive.load") + mock_loader.return_value = {"status": "failed"} + + git_listed_origin.extra_loader_arguments = { + "archive_path": "/some/repo", + } + task_dict = create_origin_task_dict(git_listed_origin, git_lister) + + res = swh_scheduler_celery_app.send_task( + "swh.loader.git.tasks.UncompressAndLoadDiskGitRepository", + kwargs=task_dict["arguments"]["kwargs"], + ) + assert res + res.wait() + assert res.successful() + + assert res.result == {"status": "failed"} + mock_loader.assert_called_once_with()