diff --git a/sql/updates/21.sql b/sql/updates/21.sql new file mode 100644 --- /dev/null +++ b/sql/updates/21.sql @@ -0,0 +1,4 @@ +insert into dbversion (version, release, description) + values (21, now(), 'Work In Progress'); + +alter table origin_visit_stats add column last_snapshot bytea; diff --git a/swh/scheduler/backend.py b/swh/scheduler/backend.py --- a/swh/scheduler/backend.py +++ b/swh/scheduler/backend.py @@ -769,9 +769,10 @@ visit_type, last_eventful, last_uneventful, - last_failed + last_failed, + last_snapshot ) - VALUES (%s, %s, %s, %s, %s) ON CONFLICT (url, visit_type) DO + VALUES (%s, %s, %s, %s, %s, %s) ON CONFLICT (url, visit_type) DO UPDATE SET last_eventful = coalesce( excluded.last_eventful, @@ -784,6 +785,10 @@ last_failed = coalesce( excluded.last_failed, ovi.last_failed + ), + last_snapshot = coalesce( + excluded.last_snapshot, + ovi.last_snapshot ) """ @@ -795,6 +800,7 @@ visit_stats.last_eventful, visit_stats.last_uneventful, visit_stats.last_failed, + visit_stats.last_snapshot, ), ) diff --git a/swh/scheduler/journal_client.py b/swh/scheduler/journal_client.py --- a/swh/scheduler/journal_client.py +++ b/swh/scheduler/journal_client.py @@ -27,6 +27,7 @@ "last_uneventful": None, "last_eventful": None, "last_failed": None, + "last_snapshot": ovs_dict["snapshot"], } # partial, snapshot -> eventful diff --git a/swh/scheduler/model.py b/swh/scheduler/model.py --- a/swh/scheduler/model.py +++ b/swh/scheduler/model.py @@ -221,6 +221,9 @@ type=Optional[datetime.datetime], validator=type_validator() ) last_failed = attr.ib(type=Optional[datetime.datetime], validator=type_validator()) + last_snapshot = attr.ib( + type=Optional[bytes], validator=type_validator(), default=None + ) @last_eventful.validator def check_last_eventful(self, attribute, value): diff --git a/swh/scheduler/sql/30-schema.sql b/swh/scheduler/sql/30-schema.sql --- a/swh/scheduler/sql/30-schema.sql +++ b/swh/scheduler/sql/30-schema.sql @@ -11,7 +11,7 @@ comment on column dbversion.description is 'Version description'; insert into dbversion (version, release, description) - values (19, now(), 'Work In Progress'); + values (21, now(), 'Work In Progress'); create table task_type ( type text primary key, @@ -171,6 +171,7 @@ last_eventful timestamptz, last_uneventful timestamptz, last_failed timestamptz, + last_snapshot bytea, primary key (url, visit_type) ); @@ -180,3 +181,4 @@ comment on column origin_visit_stats.last_eventful is 'Date of the last eventful event'; comment on column origin_visit_stats.last_uneventful is 'Date of the last uneventful event'; comment on column origin_visit_stats.last_failed is 'Date of the last failed event'; +comment on column origin_visit_stats.last_snapshot is 'Last visit snapshot'; diff --git a/swh/scheduler/tests/test_scheduler.py b/swh/scheduler/tests/test_scheduler.py --- a/swh/scheduler/tests/test_scheduler.py +++ b/swh/scheduler/tests/test_scheduler.py @@ -14,6 +14,7 @@ import attr import pytest +from swh.model.hashutil import hash_to_bytes from swh.scheduler.exc import StaleData, UnknownPolicy from swh.scheduler.interface import SchedulerInterface from swh.scheduler.model import ListedOrigin, ListedOriginPageToken, OriginVisitStats @@ -823,3 +824,20 @@ ) assert failed_visit == expected_visit_stats + + def test_origin_visit_stats_upsert_with_snapshot(self, swh_scheduler) -> None: + eventful_date = utcnow() + url = "https://github.com/666/test" + + visit_stats = OriginVisitStats( + url=url, + visit_type="git", + last_eventful=eventful_date, + last_uneventful=None, + last_failed=None, + last_snapshot=hash_to_bytes("d81cc0710eb6cf9efd5b920a8453e1e07157b6cd"), + ) + swh_scheduler.origin_visit_stats_upsert(visit_stats) + + assert swh_scheduler.origin_visit_stats_get(url, "git") == visit_stats + assert swh_scheduler.origin_visit_stats_get(url, "svn") is None