Changeset View
Standalone View
swh/web/common/management/commands/refresh_savecodenow_statuses.py
# Copyright (C) 2021 The Software Heritage developers | # Copyright (C) 2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU Affero General Public License version 3, or any later version | # License: GNU Affero General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from django.core.management.base import BaseCommand | from django.core.management.base import BaseCommand | ||||
from swh.scheduler.model import ListedOrigin | |||||
from swh.web.common.origin_save import refresh_save_origin_request_statuses | from swh.web.common.origin_save import refresh_save_origin_request_statuses | ||||
from swh.web.config import scheduler as get_scheduler | |||||
class Command(BaseCommand): | class Command(BaseCommand): | ||||
help = "Refresh save code now origin request statuses periodically" | help = "Refresh save code now origin request statuses periodically" | ||||
def handle(self, *args, **options): | def handle(self, *args, **options): | ||||
"""Refresh origin save code now requests. | |||||
For the origin visit types, svn, git, hg, this also installs the origins as | |||||
recurring origins to visit. | |||||
""" | |||||
refreshed_statuses = refresh_save_origin_request_statuses() | refreshed_statuses = refresh_save_origin_request_statuses() | ||||
scheduler = get_scheduler() | |||||
# then schedule the origins with meaningful status and type to be ingested | |||||
# regularly | |||||
lister = scheduler.get_or_create_lister( | |||||
ardumont: ^ fixme: find the archive's instance name instead of host | |||||
name="save-code-now", instance_name="host" | |||||
) # FIXME: retrieve the archive instance name | |||||
listed_origins = [] | |||||
for status in refreshed_statuses: | |||||
visit_type = status["visit_type"] | |||||
if visit_type == "archives": # only deal with git, svn, hg | |||||
continue | |||||
if status["visit_status"] not in ("partial", "full"): | |||||
continue | |||||
listed_origins.append( | |||||
ListedOrigin( | |||||
lister_id=lister.id, visit_type=visit_type, url=status["origin_url"] | |||||
) | |||||
Done Inline ActionsYou can have duplicated origins in the final list as multiple save code now requests can be submitted for a same origin URL. You should avoid that duplication as it will result in errors when trying to insert the data in scheduler database. Also maybe the last_update field of ListedOrigin could be set to the visit date ? anlambert: You can have duplicated origins in the final list as multiple save code now requests can be… | |||||
Done Inline Actionsdefinitely a good idea for the first part. The second part sounds good as well but I'm not sure what that entails. ardumont: definitely a good idea for the first part.
The second part sounds good as well but I'm not… | |||||
Done Inline Actions
Thinking it back, this should be set only if the visit was eventful as we know for sure we ingest some new code here. anlambert: > The second part sounds good as well but I'm not sure what that entails.
Thinking it back… | |||||
Done Inline Actions
I played the bonus card and asked a friend ;) 14:24 <+olasd> ardumont: if you set last_update, then it won't get visited again until that field is updated So no. ardumont: > The second part sounds good as well but I'm not sure what that entails.
I played the bonus… | |||||
Done Inline Actionsyep, using that field seems only relevant for a real lister exploiting forge API. anlambert: yep, using that field seems only relevant for a real lister exploiting forge API. | |||||
) | |||||
if listed_origins: | |||||
scheduler.record_listed_origins(listed_origins) | |||||
if len(refreshed_statuses) > 0: | if len(refreshed_statuses) > 0: | ||||
msg = f"Successfully updated {len(refreshed_statuses)} save request(s)." | msg = f"Successfully updated {len(refreshed_statuses)} save request(s)." | ||||
else: | else: | ||||
msg = "Nothing to do." | msg = "Nothing to do." | ||||
self.stdout.write(self.style.SUCCESS(msg)) | self.stdout.write(self.style.SUCCESS(msg)) |
^ fixme: find the archive's instance name instead of host