Changeset View
Changeset View
Standalone View
Standalone View
swh/scheduler/journal_client.py
# Copyright (C) 2021 The Software Heritage developers | # Copyright (C) 2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import copy | |||||
from datetime import datetime, timedelta | from datetime import datetime, timedelta | ||||
import random | import random | ||||
from typing import Dict, List, Optional, Tuple | from typing import Dict, List, Optional, Tuple | ||||
import attr | import attr | ||||
from swh.scheduler.interface import SchedulerInterface | from swh.scheduler.interface import SchedulerInterface | ||||
from swh.scheduler.model import LastVisitStatus, OriginVisitStats | from swh.scheduler.model import LastVisitStatus, OriginVisitStats | ||||
▲ Show 20 Lines • Show All 180 Lines • ▼ Show 20 Lines | if not interesting_messages: | ||||
return | return | ||||
origin_visit_stats: Dict[Tuple[str, str], Dict] = { | origin_visit_stats: Dict[Tuple[str, str], Dict] = { | ||||
(visit_stats.url, visit_stats.visit_type): attr.asdict(visit_stats) | (visit_stats.url, visit_stats.visit_type): attr.asdict(visit_stats) | ||||
for visit_stats in scheduler.origin_visit_stats_get( | for visit_stats in scheduler.origin_visit_stats_get( | ||||
list(set((vs["origin"], vs["type"]) for vs in interesting_messages)) | list(set((vs["origin"], vs["type"]) for vs in interesting_messages)) | ||||
) | ) | ||||
} | } | ||||
existing_origin_visit_stats = copy.deepcopy(origin_visit_stats) | |||||
# Use the default values from the model object | # Use the default values from the model object | ||||
empty_object = { | empty_object = { | ||||
field.name: field.default if field.default != attr.NOTHING else None | field.name: field.default if field.default != attr.NOTHING else None | ||||
for field in attr.fields(OriginVisitStats) | for field in attr.fields(OriginVisitStats) | ||||
} | } | ||||
disabled_urls: List[str] = [] | disabled_urls: List[str] = [] | ||||
▲ Show 20 Lines • Show All 67 Lines • ▼ Show 20 Lines | for msg_dict in interesting_messages: | ||||
# Disable recurring failing/not-found origins | # Disable recurring failing/not-found origins | ||||
if ( | if ( | ||||
visit_stats_d["last_visit_status"] | visit_stats_d["last_visit_status"] | ||||
in [LastVisitStatus.not_found, LastVisitStatus.failed] | in [LastVisitStatus.not_found, LastVisitStatus.failed] | ||||
) and visit_stats_d["successive_visits"] >= DISABLE_ORIGIN_THRESHOLD: | ) and visit_stats_d["successive_visits"] >= DISABLE_ORIGIN_THRESHOLD: | ||||
disabled_urls.append(visit_stats_d["url"]) | disabled_urls.append(visit_stats_d["url"]) | ||||
scheduler.origin_visit_stats_upsert( | # Only upsert changed values | ||||
OriginVisitStats(**ovs) for ovs in origin_visit_stats.values() | to_upsert = [] | ||||
) | for key, ovs in origin_visit_stats.items(): | ||||
if ( | |||||
key not in existing_origin_visit_stats | |||||
or ovs != existing_origin_visit_stats[key] | |||||
): | |||||
to_upsert.append(OriginVisitStats(**ovs)) | |||||
if to_upsert: | |||||
scheduler.origin_visit_stats_upsert(to_upsert) | |||||
# Disable any origins if any | # Disable any origins if any | ||||
if disabled_urls: | if disabled_urls: | ||||
disabled_origins = [] | disabled_origins = [] | ||||
for url in disabled_urls: | for url in disabled_urls: | ||||
origins = scheduler.get_listed_origins(url=url).results | origins = scheduler.get_listed_origins(url=url).results | ||||
if len(origins) > 0: | if len(origins) > 0: | ||||
origin = attr.evolve(origins[0], enabled=False) | origin = attr.evolve(origins[0], enabled=False) | ||||
disabled_origins.append(origin) | disabled_origins.append(origin) | ||||
if disabled_origins: | if disabled_origins: | ||||
scheduler.record_listed_origins(disabled_origins) | scheduler.record_listed_origins(disabled_origins) |