diff --git a/sql/updates/19.sql b/sql/updates/19.sql new file mode 100644 --- /dev/null +++ b/sql/updates/19.sql @@ -0,0 +1,18 @@ +insert into dbversion (version, release, description) + values (19, now(), 'Work In Progress'); + +create table origin_visit_info ( + url text not null, + visit_type text not null, + eventful timestamptz, + uneventful timestamptz, + failed timestamptz, + + primary key (url, visit_type) +); + +comment on column origin_visit_info.url is 'Origin URL'; +comment on column origin_visit_info.visit_type is 'Type of the visit for the given url'; +comment on column origin_visit_info.eventful is 'Date of the last eventful event'; +comment on column origin_visit_info.uneventful is 'Date of the last uneventful event'; +comment on column origin_visit_info.failed is 'Date of the last failed event'; diff --git a/swh/scheduler/backend.py b/swh/scheduler/backend.py --- a/swh/scheduler/backend.py +++ b/swh/scheduler/backend.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2020 The Software Heritage developers +# Copyright (C) 2015-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -21,6 +21,7 @@ ListedOrigin, ListedOriginPageToken, Lister, + OriginVisitInfo, PaginatedListedOriginList, ) @@ -725,3 +726,40 @@ def get_priority_ratios(self, db=None, cur=None): cur.execute("select id, ratio from priority_ratio") return {row["id"]: row["ratio"] for row in cur.fetchall()} + + @db_transaction() + def origin_visit_info_upsert(self, visit_info: OriginVisitInfo, db=None, cur=None): + query = """INSERT into origin_visit_info as ovi (url, visit_type, eventful, uneventful, failed) + VALUES (%s, %s, %s, %s, %s) + ON CONFLICT (url, visit_type) DO UPDATE + SET eventful = coalesce(excluded.eventful, ovi.eventful), + uneventful = coalesce(excluded.uneventful, ovi.uneventful), + failed = coalesce(excluded.failed, ovi.failed) + """ + + cur.execute( + query, + ( + visit_info.url, + visit_info.visit_type, + visit_info.eventful, + visit_info.uneventful, + visit_info.failed, + ), + ) + + @db_transaction() + def origin_visit_info_get( + self, url: str, visit_type: str, db=None, cur=None + ) -> Optional[OriginVisitInfo]: + query = format_query( + "select {keys} from origin_visit_info where url=%s and visit_type=%s", + OriginVisitInfo.select_columns(), + ) + cur.execute(query, (url, visit_type)) + row = cur.fetchone() + + if row: + return OriginVisitInfo(**row) + else: + return None diff --git a/swh/scheduler/interface.py b/swh/scheduler/interface.py --- a/swh/scheduler/interface.py +++ b/swh/scheduler/interface.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2020 The Software Heritage developers +# Copyright (C) 2015-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -13,6 +13,7 @@ ListedOrigin, ListedOriginPageToken, Lister, + OriginVisitInfo, PaginatedListedOriginList, ) @@ -312,3 +313,14 @@ @remote_api_endpoint("priority_ratios/get") def get_priority_ratios(self): ... + + @remote_api_endpoint("visit_info/upsert") + def origin_visit_info_upsert(self, visit_info: OriginVisitInfo): + """Create a new origin visit info + """ + ... + + @remote_api_endpoint("visit_info/get") + def origin_visit_info_get(self, url: str, visit_type: str): + """Retrieve the info for an origin with a given visit type""" + ... diff --git a/swh/scheduler/model.py b/swh/scheduler/model.py --- a/swh/scheduler/model.py +++ b/swh/scheduler/model.py @@ -1,4 +1,4 @@ -# Copyright (C) 2020 The Software Heritage developers +# Copyright (C) 2020-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -191,3 +191,37 @@ converter=convert_listed_origin_page_token, default=None, ) + + +@attr.s(frozen=True, slots=True) +class OriginVisitInfo(BaseSchedulerModel): + """Represents an aggregated origin visits view. + """ + + url = attr.ib( + type=str, validator=[type_validator()], metadata={"primary_key": True} + ) + visit_type = attr.ib( + type=str, validator=[type_validator()], metadata={"primary_key": True} + ) + eventful = attr.ib(type=Optional[datetime.datetime], validator=type_validator()) + uneventful = attr.ib(type=Optional[datetime.datetime], validator=type_validator()) + failed = attr.ib(type=Optional[datetime.datetime], validator=type_validator()) + + @eventful.validator + def check_eventful(self, attribute, value): + """Checks the date has a timezone.""" + if value is not None and value.tzinfo is None: + raise ValueError("date must be a timezone-aware datetime.") + + @uneventful.validator + def check_uneventful(self, attribute, value): + """Checks the date has a timezone.""" + if value is not None and value.tzinfo is None: + raise ValueError("date must be a timezone-aware datetime.") + + @failed.validator + def check_failed(self, attribute, value): + """Checks the date has a timezone.""" + if value is not None and value.tzinfo is None: + raise ValueError("date must be a timezone-aware datetime.") diff --git a/swh/scheduler/sql/30-schema.sql b/swh/scheduler/sql/30-schema.sql --- a/swh/scheduler/sql/30-schema.sql +++ b/swh/scheduler/sql/30-schema.sql @@ -11,7 +11,7 @@ comment on column dbversion.description is 'Version description'; insert into dbversion (version, release, description) - values (17, now(), 'Work In Progress'); + values (19, now(), 'Work In Progress'); create table task_type ( type text primary key, @@ -159,3 +159,19 @@ comment on column listed_origins.last_seen is 'Time at which the origin was last seen by the lister'; comment on column listed_origins.last_update is 'Time of the last update to the origin recorded by the remote'; + +create table origin_visit_info ( + url text not null, + visit_type text not null, + eventful timestamptz, + uneventful timestamptz, + failed timestamptz, + + primary key (url, visit_type) +); + +comment on column origin_visit_info.url is 'Origin URL'; +comment on column origin_visit_info.visit_type is 'Type of the visit for the given url'; +comment on column origin_visit_info.eventful is 'Date of the last eventful event'; +comment on column origin_visit_info.uneventful is 'Date of the last uneventful event'; +comment on column origin_visit_info.failed is 'Date of the last failed event'; diff --git a/swh/scheduler/tests/test_scheduler.py b/swh/scheduler/tests/test_scheduler.py --- a/swh/scheduler/tests/test_scheduler.py +++ b/swh/scheduler/tests/test_scheduler.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2019 The Software Heritage developers +# Copyright (C) 2017-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -16,7 +16,7 @@ from swh.scheduler.exc import StaleData from swh.scheduler.interface import SchedulerInterface -from swh.scheduler.model import ListedOrigin, ListedOriginPageToken +from swh.scheduler.model import ListedOrigin, ListedOriginPageToken, OriginVisitInfo from swh.scheduler.utils import utcnow from .common import LISTERS, TASK_TYPES, TEMPLATES, tasks_from_template @@ -729,3 +729,64 @@ def _create_task_types(self, scheduler): for tt in TASK_TYPES.values(): scheduler.create_task_type(tt) + + def test_origin_visit_info_upsert(self, swh_scheduler) -> None: + eventful_date = utcnow() + url = "https://github.com/test" + + visit_info = OriginVisitInfo( + url=url, + visit_type="git", + eventful=eventful_date, + uneventful=None, + failed=None, + ) + swh_scheduler.origin_visit_info_upsert(visit_info) + swh_scheduler.origin_visit_info_upsert(visit_info) + + assert swh_scheduler.origin_visit_info_get(url, "git") == visit_info + assert swh_scheduler.origin_visit_info_get(url, "svn") is None + + uneventful_date = utcnow() + visit_info = OriginVisitInfo( + url=url, + visit_type="git", + eventful=None, + uneventful=uneventful_date, + failed=None, + ) + swh_scheduler.origin_visit_info_upsert(visit_info) + + uneventful_visit = swh_scheduler.origin_visit_info_get(url, "git") + + expected_visit_info = OriginVisitInfo( + url=url, + visit_type="git", + eventful=eventful_date, + uneventful=uneventful_date, + failed=None, + ) + + assert uneventful_visit == expected_visit_info + + failed_date = utcnow() + visit_info = OriginVisitInfo( + url=url, + visit_type="git", + eventful=None, + uneventful=None, + failed=failed_date, + ) + swh_scheduler.origin_visit_info_upsert(visit_info) + + failed_visit = swh_scheduler.origin_visit_info_get(url, "git") + + expected_visit_info = OriginVisitInfo( + url=url, + visit_type="git", + eventful=eventful_date, + uneventful=uneventful_date, + failed=failed_date, + ) + + assert failed_visit == expected_visit_info