Changeset View
Standalone View
swh/loader/core/loader.py
# Copyright (C) 2015-2021 The Software Heritage developers | # Copyright (C) 2015-2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from abc import ABCMeta, abstractmethod | from abc import ABCMeta, abstractmethod | ||||
import datetime | import datetime | ||||
import hashlib | import hashlib | ||||
import logging | import logging | ||||
import os | import os | ||||
from typing import Any, Dict, Iterable, Optional | from typing import Any, Dict, Iterable, Optional | ||||
from swh.core.config import load_from_envvar | from swh.core.config import load_from_envvar | ||||
from swh.loader.exception import NotFound | |||||
from swh.model.model import ( | from swh.model.model import ( | ||||
BaseContent, | BaseContent, | ||||
Content, | Content, | ||||
Directory, | Directory, | ||||
Origin, | Origin, | ||||
OriginVisit, | OriginVisit, | ||||
OriginVisitStatus, | OriginVisitStatus, | ||||
Release, | Release, | ||||
▲ Show 20 Lines • Show All 162 Lines • ▼ Show 20 Lines | def _store_origin_visit(self) -> None: | ||||
) | ) | ||||
)[0] | )[0] | ||||
@abstractmethod | @abstractmethod | ||||
def prepare(self, *args, **kwargs) -> None: | def prepare(self, *args, **kwargs) -> None: | ||||
"""Second step executed by the loader to prepare some state needed by | """Second step executed by the loader to prepare some state needed by | ||||
the loader. | the loader. | ||||
Raises | |||||
NotFound exception if the origin to ingest is not found. | |||||
""" | """ | ||||
pass | pass | ||||
def get_origin(self) -> Origin: | def get_origin(self) -> Origin: | ||||
"""Get the origin that is currently being loaded. | """Get the origin that is currently being loaded. | ||||
self.origin should be set in :func:`prepare_origin` | self.origin should be set in :func:`prepare_origin` | ||||
Returns: | Returns: | ||||
▲ Show 20 Lines • Show All 125 Lines • ▼ Show 20 Lines | def load(self, *args, **kwargs) -> Dict[str, str]: | ||||
origin=self.origin.url, | origin=self.origin.url, | ||||
visit=self.visit.visit, | visit=self.visit.visit, | ||||
type=self.visit_type, | type=self.visit_type, | ||||
date=now(), | date=now(), | ||||
status=self.visit_status(), | status=self.visit_status(), | ||||
snapshot=self.loaded_snapshot_id, | snapshot=self.loaded_snapshot_id, | ||||
) | ) | ||||
self.storage.origin_visit_status_add([visit_status]) | self.storage.origin_visit_status_add([visit_status]) | ||||
self.post_load() | self.post_load() | ||||
except Exception: | except Exception as e: | ||||
if isinstance(e, NotFound): | |||||
status = "not_found" | |||||
task_status = "uneventful" | |||||
else: | |||||
status = "partial" if self.loaded_snapshot_id else "failed" | status = "partial" if self.loaded_snapshot_id else "failed" | ||||
task_status = "failed" | |||||
self.log.exception( | self.log.exception( | ||||
"Loading failure, updating to `%s` status", | "Loading failure, updating to `%s` status", | ||||
status, | status, | ||||
extra={"swh_task_args": args, "swh_task_kwargs": kwargs,}, | extra={"swh_task_args": args, "swh_task_kwargs": kwargs,}, | ||||
) | ) | ||||
visit_status = OriginVisitStatus( | visit_status = OriginVisitStatus( | ||||
anlambert: I think you should return `{"status": "failed"}` after that line. or maybe I am missing… | |||||
Done Inline ActionsI'm not totally sure about this. That status is about the task and not the visit. The thing is, i think with status "failed", the scheduler will schedule it back and ardumont: I'm not totally sure about this. That status is about the task and not the visit.
Currently… | |||||
Not Done Inline ActionsAh right, I got mistaken by the test that overrides load_status. anlambert: Ah right, I got mistaken by the test that overrides `load_status`. | |||||
origin=self.origin.url, | origin=self.origin.url, | ||||
visit=self.visit.visit, | visit=self.visit.visit, | ||||
type=self.visit_type, | type=self.visit_type, | ||||
date=now(), | date=now(), | ||||
status=status, | status=status, | ||||
snapshot=self.loaded_snapshot_id, | snapshot=self.loaded_snapshot_id, | ||||
) | ) | ||||
self.storage.origin_visit_status_add([visit_status]) | self.storage.origin_visit_status_add([visit_status]) | ||||
self.post_load(success=False) | self.post_load(success=False) | ||||
return {"status": "failed"} | return {"status": task_status} | ||||
finally: | finally: | ||||
self.flush() | self.flush() | ||||
self.cleanup() | self.cleanup() | ||||
return self.load_status() | return self.load_status() | ||||
class DVCSLoader(BaseLoader): | class DVCSLoader(BaseLoader): | ||||
Not Done Inline ActionsThese two exception code blocks are quite similar, you could merge them into one and test on the exception type to modify status and log message. anlambert: These two exception code blocks are quite similar, you could merge them into one and test on… | |||||
Done Inline Actionsit crossed my mind. I did not act on it as i'm always wondering whether that's within the scope of that diff or with another one about refactoring ¯\_(ツ)_/¯ ardumont: it crossed my mind.
I did not act on it as i'm always wondering whether that's within the… | |||||
Not Done Inline ActionsI think yo can do it in that diff, you just have to modify the original Exception block and change statuses if the exception type is NotFound, anlambert: I think yo can do it in that diff, you just have to modify the original Exception block and… | |||||
"""This base class is a pattern for dvcs loaders (e.g. git, mercurial). | """This base class is a pattern for dvcs loaders (e.g. git, mercurial). | ||||
Those loaders are able to load all the data in one go. For example, the | Those loaders are able to load all the data in one go. For example, the | ||||
loader defined in swh-loader-git :class:`BulkUpdater`. | loader defined in swh-loader-git :class:`BulkUpdater`. | ||||
For other loaders (stateful one, (e.g :class:`SWHSvnLoader`), | For other loaders (stateful one, (e.g :class:`SWHSvnLoader`), | ||||
inherit directly from :class:`BaseLoader`. | inherit directly from :class:`BaseLoader`. | ||||
▲ Show 20 Lines • Show All 72 Lines • Show Last 20 Lines |
I think you should return {"status": "failed"} after that line. or maybe I am missing something here ?