Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/svn/tasks.py
| # Copyright (C) 2015-2021 The Software Heritage developers | # Copyright (C) 2015-2022 The Software Heritage developers | ||||
| # See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
| # License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
| # See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
| from datetime import datetime | |||||
| from typing import Optional | |||||
| from celery import shared_task | from celery import shared_task | ||||
| import iso8601 | |||||
| from .loader import SvnLoader, SvnLoaderFromDumpArchive, SvnLoaderFromRemoteDump | from .loader import SvnLoader, SvnLoaderFromDumpArchive, SvnLoaderFromRemoteDump | ||||
| def convert_to_datetime(date: Optional[str]) -> Optional[datetime]: | |||||
| if date is None: | |||||
| return None | |||||
| try: | |||||
| assert isinstance(date, str) | |||||
| return iso8601.parse_date(date) | |||||
| except Exception: | |||||
| return None | |||||
| @shared_task(name=__name__ + ".LoadSvnRepository") | @shared_task(name=__name__ + ".LoadSvnRepository") | ||||
| def load_svn( | def load_svn(*args, **kwargs): | ||||
| *, | """Import a svn repository""" | ||||
| url: Optional[str] = None, | loader = SvnLoader.from_configfile(*args, **kwargs) | ||||
| origin_url: Optional[str] = None, | |||||
| visit_date: Optional[str] = None, | |||||
| incremental: Optional[bool] = True, | |||||
| ): | |||||
| """Import a svn repository | |||||
| Args: | |||||
| url: (mandatory) svn's repository url to ingest data from | |||||
| origin_url: Optional original url override to use as origin reference in the | |||||
| archive. If not provided, "url" is used as origin. | |||||
| visit_date: Optional date to override the visit date | |||||
| incremental: If True, the default, starts from the last snapshot (if any). | |||||
| Otherwise, starts from the initial commit of the repository. | |||||
| """ | |||||
| loader = SvnLoader.from_configfile( | |||||
| url=url, | |||||
| origin_url=origin_url, | |||||
| visit_date=convert_to_datetime(visit_date), | |||||
| incremental=incremental, | |||||
| ) | |||||
| return loader.load() | return loader.load() | ||||
| @shared_task(name=__name__ + ".MountAndLoadSvnRepository") | @shared_task(name=__name__ + ".MountAndLoadSvnRepository") | ||||
| def load_svn_from_archive( | def load_svn_from_archive(*args, **kwargs): | ||||
| *, | """ | ||||
| url: Optional[str] = None, | 1. Mount an svn dump from archive as a local svn repository | ||||
| archive_path: Optional[str] = None, | |||||
| visit_date: Optional[str] = None, | |||||
| incremental: Optional[bool] = True, | |||||
| ): | |||||
| """1. Mount an svn dump from archive as a local svn repository | |||||
| 2. Load it through the svn loader | 2. Load it through the svn loader | ||||
| 3. Clean up mounted svn repository archive | 3. Clean up mounted svn repository archive | ||||
| Args: | |||||
| url: origin url | |||||
| archive_path: Path on disk to the archive holdin the svn repository to ingest | |||||
| visit_date: Optional date to override the visit date | |||||
| incremental: If True, the default, starts from the last snapshot (if any). | |||||
| Otherwise, starts from the initial commit of the repository. | |||||
| """ | """ | ||||
| loader = SvnLoaderFromDumpArchive.from_configfile( | loader = SvnLoaderFromDumpArchive.from_configfile(*args, **kwargs) | ||||
| url=url, | |||||
| archive_path=archive_path, | |||||
| visit_date=convert_to_datetime(visit_date), | |||||
| incremental=incremental, | |||||
| ) | |||||
| return loader.load() | return loader.load() | ||||
| @shared_task(name=__name__ + ".DumpMountAndLoadSvnRepository") | @shared_task(name=__name__ + ".DumpMountAndLoadSvnRepository") | ||||
| def load_svn_from_remote_dump( | def load_svn_from_remote_dump(*args, **kwargs): | ||||
| *, | """ | ||||
| url: Optional[str] = None, | 1. Mount a remote svn dump as a local svn repository. | ||||
| origin_url: Optional[str] = None, | |||||
| visit_date: Optional[str] = None, | |||||
| incremental: Optional[bool] = True, | |||||
| ): | |||||
| """1. Mount a remote svn dump as a local svn repository. | |||||
| 2. Load it through the svn loader. | 2. Load it through the svn loader. | ||||
| 3. Clean up mounted svn repository archive. | 3. Clean up mounted svn repository archive. | ||||
| Args: | |||||
| url: (mandatory) svn's repository url to ingest data from | |||||
| origin_url: Optional original url override to use as origin reference | |||||
| in the archive. If not provided, "url" is used as origin. | |||||
| visit_date: Optional date to override the visit date | |||||
| incremental: If True, the default, starts from the last snapshot (if any). | |||||
| Otherwise, starts from the initial commit of the repository. | |||||
| """ | """ | ||||
| loader = SvnLoaderFromRemoteDump.from_configfile( | loader = SvnLoaderFromRemoteDump.from_configfile(*args, **kwargs) | ||||
| url=url, | |||||
| origin_url=origin_url, | |||||
| visit_date=convert_to_datetime(visit_date), | |||||
| incremental=incremental, | |||||
| ) | |||||
| return loader.load() | return loader.load() | ||||