Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/git/tasks.py
# Copyright (C) 2015-2019 The Software Heritage developers | # Copyright (C) 2015-2019 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import dateutil.parser | import dateutil.parser | ||||
from typing import Any, Dict, Optional | |||||
from celery import shared_task | from celery import shared_task | ||||
from swh.loader.git.from_disk import GitLoaderFromDisk, GitLoaderFromArchive | from swh.loader.git.from_disk import GitLoaderFromDisk, GitLoaderFromArchive | ||||
from swh.loader.git.loader import GitLoader | from swh.loader.git.loader import GitLoader | ||||
@shared_task(name=__name__ + '.UpdateGitRepository') | @shared_task(name=__name__ + '.UpdateGitRepository') | ||||
def load_git(repo_url, base_url=None): | def load_git(url: str, base_url: Optional[str] = None) -> Dict[str, Any]: | ||||
"""Import a git repository from a remote location""" | """Import a git repository from a remote location | ||||
loader = GitLoader(repo_url, base_url=base_url) | |||||
""" | |||||
loader = GitLoader(url, base_url=base_url) | |||||
anlambert: How about dropping the keyword argument only constraint ?
By using the following signature… | |||||
Done Inline ActionsYes, ok. Thanks for the feedback.
I feel like we will need to do so eventually. ardumont: Yes, ok.
Thanks for the feedback.
> This feels simpler to me than migrating task arguments… | |||||
return loader.load() | return loader.load() | ||||
@shared_task(name=__name__ + '.LoadDiskGitRepository') | @shared_task(name=__name__ + '.LoadDiskGitRepository') | ||||
def load_git_from_dir(origin_url, directory, date): | def load_git_from_dir(url: str, directory: str, date: str) -> Dict[str, Any]: | ||||
"""Import a git repository from a local repository | """Import a git repository from a local repository | ||||
Import a git repository, cloned in `directory` from `origin_url` at | Import a git repository, cloned in `directory` from `origin_url` at | ||||
`date`. | `date`. | ||||
""" | """ | ||||
visit_date = dateutil.parser.parse(date) | visit_date = dateutil.parser.parse(date) | ||||
loader = GitLoaderFromDisk( | loader = GitLoaderFromDisk( | ||||
origin_url, directory=directory, visit_date=visit_date) | url, directory=directory, visit_date=visit_date) | ||||
return loader.load() | return loader.load() | ||||
@shared_task(name=__name__ + '.UncompressAndLoadDiskGitRepository') | @shared_task(name=__name__ + '.UncompressAndLoadDiskGitRepository') | ||||
def load_git_from_zip(origin_url, archive_path, date): | def load_git_from_zip( | ||||
url: str, archive_path: str, date: str) -> Dict[str, Any]: | |||||
"""Import a git repository from a zip archive | """Import a git repository from a zip archive | ||||
1. Uncompress an archive repository in a local and temporary folder | 1. Uncompress an archive repository in a local and temporary folder | ||||
2. Load it through the git disk loader | 2. Load it through the git disk loader | ||||
3. Clean up the temporary folder | 3. Clean up the temporary folder | ||||
""" | """ | ||||
visit_date = dateutil.parser.parse(date) | visit_date = dateutil.parser.parse(date) | ||||
loader = GitLoaderFromArchive( | loader = GitLoaderFromArchive( | ||||
origin_url, archive_path=archive_path, visit_date=visit_date) | url, archive_path=archive_path, visit_date=visit_date) | ||||
return loader.load() | return loader.load() |
How about dropping the keyword argument only constraint ?
By using the following signature:
you maintain backward compatibility with the recurring task already registered
in the scheduler database while providing url with a keyword argument
is still allowed.
The same thing should be done in the other loaders (for instance it exists recurring
mercurial loader tasks in the scheduler database).
This feels simpler to me than migrating task arguments format directly in the database.