Changeset View
Standalone View
swh/loader/core/loader.py
# Copyright (C) 2015-2021 The Software Heritage developers | # Copyright (C) 2015-2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from abc import ABCMeta, abstractmethod | from abc import ABCMeta, abstractmethod | ||||
import datetime | import datetime | ||||
import hashlib | import hashlib | ||||
import logging | import logging | ||||
import os | import os | ||||
from typing import Any, Dict, Iterable, Optional | from typing import Any, Dict, Iterable, Optional | ||||
from swh.core.config import load_from_envvar | from swh.core.config import load_from_envvar | ||||
from swh.loader.exception import NotFound | |||||
from swh.model.model import ( | from swh.model.model import ( | ||||
BaseContent, | BaseContent, | ||||
Content, | Content, | ||||
Directory, | Directory, | ||||
Origin, | Origin, | ||||
OriginVisit, | OriginVisit, | ||||
OriginVisitStatus, | OriginVisitStatus, | ||||
Release, | Release, | ||||
▲ Show 20 Lines • Show All 303 Lines • ▼ Show 20 Lines | def load(self, *args, **kwargs) -> Dict[str, str]: | ||||
origin=self.origin.url, | origin=self.origin.url, | ||||
visit=self.visit.visit, | visit=self.visit.visit, | ||||
type=self.visit_type, | type=self.visit_type, | ||||
date=now(), | date=now(), | ||||
status=self.visit_status(), | status=self.visit_status(), | ||||
snapshot=self.loaded_snapshot_id, | snapshot=self.loaded_snapshot_id, | ||||
) | ) | ||||
self.storage.origin_visit_status_add([visit_status]) | self.storage.origin_visit_status_add([visit_status]) | ||||
self.post_load() | self.post_load() | ||||
except NotFound: | |||||
self.log.exception( | |||||
"Loading failure, updating to `not_found` status", | |||||
extra={"swh_task_args": args, "swh_task_kwargs": kwargs,}, | |||||
) | |||||
visit_status = OriginVisitStatus( | |||||
origin=self.origin.url, | |||||
visit=self.visit.visit, | |||||
type=self.visit_type, | |||||
date=now(), | |||||
status="not_found", | |||||
snapshot=None, | |||||
) | |||||
self.storage.origin_visit_status_add([visit_status]) | |||||
anlambert: I think you should return `{"status": "failed"}` after that line. or maybe I am missing… | |||||
Done Inline ActionsI'm not totally sure about this. That status is about the task and not the visit. The thing is, i think with status "failed", the scheduler will schedule it back and ardumont: I'm not totally sure about this. That status is about the task and not the visit.
Currently… | |||||
Not Done Inline ActionsAh right, I got mistaken by the test that overrides load_status. anlambert: Ah right, I got mistaken by the test that overrides `load_status`. | |||||
self.post_load(success=False) | |||||
except Exception: | except Exception: | ||||
status = "partial" if self.loaded_snapshot_id else "failed" | status = "partial" if self.loaded_snapshot_id else "failed" | ||||
self.log.exception( | self.log.exception( | ||||
"Loading failure, updating to `%s` status", | "Loading failure, updating to `%s` status", | ||||
status, | status, | ||||
extra={"swh_task_args": args, "swh_task_kwargs": kwargs,}, | extra={"swh_task_args": args, "swh_task_kwargs": kwargs,}, | ||||
) | ) | ||||
visit_status = OriginVisitStatus( | visit_status = OriginVisitStatus( | ||||
origin=self.origin.url, | origin=self.origin.url, | ||||
visit=self.visit.visit, | visit=self.visit.visit, | ||||
type=self.visit_type, | type=self.visit_type, | ||||
date=now(), | date=now(), | ||||
status=status, | status=status, | ||||
snapshot=self.loaded_snapshot_id, | snapshot=self.loaded_snapshot_id, | ||||
) | ) | ||||
self.storage.origin_visit_status_add([visit_status]) | self.storage.origin_visit_status_add([visit_status]) | ||||
self.post_load(success=False) | self.post_load(success=False) | ||||
Not Done Inline ActionsThese two exception code blocks are quite similar, you could merge them into one and test on the exception type to modify status and log message. anlambert: These two exception code blocks are quite similar, you could merge them into one and test on… | |||||
Done Inline Actionsit crossed my mind. I did not act on it as i'm always wondering whether that's within the scope of that diff or with another one about refactoring ¯\_(ツ)_/¯ ardumont: it crossed my mind.
I did not act on it as i'm always wondering whether that's within the… | |||||
Not Done Inline ActionsI think yo can do it in that diff, you just have to modify the original Exception block and change statuses if the exception type is NotFound, anlambert: I think yo can do it in that diff, you just have to modify the original Exception block and… | |||||
return {"status": "failed"} | return {"status": "failed"} | ||||
finally: | finally: | ||||
self.flush() | self.flush() | ||||
self.cleanup() | self.cleanup() | ||||
return self.load_status() | return self.load_status() | ||||
▲ Show 20 Lines • Show All 81 Lines • Show Last 20 Lines |
I think you should return {"status": "failed"} after that line. or maybe I am missing something here ?