diff --git a/swh/scheduler/backend.py b/swh/scheduler/backend.py --- a/swh/scheduler/backend.py +++ b/swh/scheduler/backend.py @@ -820,16 +820,21 @@ @db_transaction() def origin_visit_stats_get( - self, url: str, visit_type: str, db=None, cur=None - ) -> Optional[OriginVisitStats]: - query = format_query( - "SELECT {keys} FROM origin_visit_stats WHERE url=%s AND visit_type=%s", - OriginVisitStats.select_columns(), - ) - cur.execute(query, (url, visit_type)) - row = cur.fetchone() + self, ids: Iterable[Tuple[str, str]], db=None, cur=None + ) -> List[OriginVisitStats]: + if not ids: + return [] - if row: - return OriginVisitStats(**row) - else: - return None + pks = tuple((origin, visit_type) for (origin, visit_type) in ids) + query = format_query("SELECT {keys} ", OriginVisitStats.select_columns(),) + placeholder_query = ("%s," * len(pks))[:-1] + query += f"FROM (VALUES {placeholder_query}) as stats(url, visit_type) " + query += "INNER JOIN origin_visit_stats USING (url, visit_type)" + + cur.execute(query, pks) + + stats = [] + for row in cur.fetchall(): + stats.append(OriginVisitStats(**row)) + + return stats diff --git a/swh/scheduler/interface.py b/swh/scheduler/interface.py --- a/swh/scheduler/interface.py +++ b/swh/scheduler/interface.py @@ -3,7 +3,7 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from typing import Any, Dict, Iterable, List, Optional +from typing import Any, Dict, Iterable, List, Optional, Tuple from uuid import UUID from typing_extensions import Protocol, runtime_checkable @@ -336,7 +336,7 @@ @remote_api_endpoint("visit_stats/get") def origin_visit_stats_get( - self, url: str, visit_type: str - ) -> Optional[OriginVisitStats]: + self, ids: Iterable[Tuple[str, str]] + ) -> List[OriginVisitStats]: """Retrieve the stats for an origin with a given visit type""" ... diff --git a/swh/scheduler/journal_client.py b/swh/scheduler/journal_client.py --- a/swh/scheduler/journal_client.py +++ b/swh/scheduler/journal_client.py @@ -60,9 +60,9 @@ } pk = origin, visit_type if pk not in origin_visit_stats: - visit_stats = scheduler.origin_visit_stats_get(origin, visit_type) + visit_stats = scheduler.origin_visit_stats_get([pk]) origin_visit_stats[pk] = ( - attr.asdict(visit_stats) if visit_stats else empty_object + attr.asdict(visit_stats[0]) if visit_stats else empty_object ) visit_stats_d = origin_visit_stats[pk] diff --git a/swh/scheduler/tests/test_cli_journal.py b/swh/scheduler/tests/test_cli_journal.py --- a/swh/scheduler/tests/test_cli_journal.py +++ b/swh/scheduler/tests/test_cli_journal.py @@ -108,7 +108,8 @@ assert result.output == expected_output actual_visit_stats = swh_scheduler.origin_visit_stats_get( - visit_status["origin"], visit_status["type"] + [(visit_status["origin"], visit_status["type"])] ) - assert actual_visit_stats is not None + assert actual_visit_stats + assert len(actual_visit_stats) == 1 diff --git a/swh/scheduler/tests/test_journal_client.py b/swh/scheduler/tests/test_journal_client.py --- a/swh/scheduler/tests/test_journal_client.py +++ b/swh/scheduler/tests/test_journal_client.py @@ -81,11 +81,10 @@ ) # Ensure those visit status are ignored - for visit_status in visit_statuses: - actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get( - visit_status["origin"], visit_status["type"] - ) - assert actual_origin_visit_stats is None + actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get( + [(vs["origin"], vs["type"]) for vs in visit_statuses] + ) + assert actual_origin_visit_stats == [] def test_journal_client_origin_visit_status_from_journal_last_notfound(swh_scheduler): @@ -103,17 +102,19 @@ ) actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get( - visit_status["origin"], visit_status["type"] - ) - assert actual_origin_visit_stats == OriginVisitStats( - url=visit_status["origin"], - visit_type=visit_status["type"], - last_eventful=None, - last_uneventful=None, - last_failed=None, - last_notfound=visit_status["date"], - last_snapshot=None, + [(visit_status["origin"], visit_status["type"])] ) + assert actual_origin_visit_stats == [ + OriginVisitStats( + url=visit_status["origin"], + visit_type=visit_status["type"], + last_eventful=None, + last_uneventful=None, + last_failed=None, + last_notfound=visit_status["date"], + last_snapshot=None, + ) + ] visit_statuses = [ { @@ -140,18 +141,19 @@ for visit_status in visit_statuses: actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get( - visit_status["origin"], visit_status["type"] - ) - assert actual_origin_visit_stats is not None - assert actual_origin_visit_stats == OriginVisitStats( - url=visit_status["origin"], - visit_type=visit_status["type"], - last_eventful=None, - last_uneventful=None, - last_failed=None, - last_notfound=DATE3, - last_snapshot=None, + [(visit_status["origin"], visit_status["type"])] ) + assert actual_origin_visit_stats == [ + OriginVisitStats( + url=visit_status["origin"], + visit_type=visit_status["type"], + last_eventful=None, + last_uneventful=None, + last_failed=None, + last_notfound=DATE3, + last_snapshot=None, + ) + ] def test_journal_client_origin_visit_status_from_journal_last_failed(swh_scheduler): @@ -181,18 +183,19 @@ # Ensure those visit status are ignored for visit_status in visit_statuses: actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get( - visit_status["origin"], visit_status["type"] - ) - assert actual_origin_visit_stats is not None - assert actual_origin_visit_stats == OriginVisitStats( - url=visit_status["origin"], - visit_type=visit_status["type"], - last_eventful=None, - last_uneventful=None, - last_failed=visit_status["date"], - last_notfound=None, - last_snapshot=None, + [(visit_status["origin"], visit_status["type"])] ) + assert actual_origin_visit_stats == [ + OriginVisitStats( + url=visit_status["origin"], + visit_type=visit_status["type"], + last_eventful=None, + last_uneventful=None, + last_failed=visit_status["date"], + last_notfound=None, + last_snapshot=None, + ) + ] visit_statuses = [ { @@ -219,18 +222,19 @@ for visit_status in visit_statuses: actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get( - visit_status["origin"], visit_status["type"] - ) - assert actual_origin_visit_stats is not None - assert actual_origin_visit_stats == OriginVisitStats( - url=visit_status["origin"], - visit_type=visit_status["type"], - last_eventful=None, - last_uneventful=None, - last_failed=DATE3, - last_notfound=None, - last_snapshot=None, + [(visit_status["origin"], visit_status["type"])] ) + assert actual_origin_visit_stats == [ + OriginVisitStats( + url=visit_status["origin"], + visit_type=visit_status["type"], + last_eventful=None, + last_uneventful=None, + last_failed=DATE3, + last_notfound=None, + last_snapshot=None, + ) + ] def test_journal_client_origin_visit_status_from_journal_last_eventful(swh_scheduler): @@ -259,18 +263,19 @@ for visit_status in visit_statuses: actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get( - visit_status["origin"], visit_status["type"] - ) - assert actual_origin_visit_stats is not None - assert actual_origin_visit_stats == OriginVisitStats( - url=visit_status["origin"], - visit_type=visit_status["type"], - last_eventful=visit_status["date"], - last_uneventful=None, - last_failed=None, - last_notfound=None, - last_snapshot=visit_status["snapshot"], + [(visit_status["origin"], visit_status["type"])] ) + assert actual_origin_visit_stats == [ + OriginVisitStats( + url=visit_status["origin"], + visit_type=visit_status["type"], + last_eventful=visit_status["date"], + last_uneventful=None, + last_failed=None, + last_notfound=None, + last_snapshot=visit_status["snapshot"], + ) + ] visit_statuses = [ { @@ -297,18 +302,19 @@ for visit_status in visit_statuses: actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get( - visit_status["origin"], visit_status["type"] - ) - assert actual_origin_visit_stats is not None - assert actual_origin_visit_stats == OriginVisitStats( - url=visit_status["origin"], - visit_type=visit_status["type"], - last_eventful=DATE3, - last_uneventful=None, - last_failed=None, - last_notfound=None, - last_snapshot=hash_to_bytes("dddcc0710eb6cf9efd5b920a8453e1e07157bddd"), + [(visit_status["origin"], visit_status["type"])] ) + assert actual_origin_visit_stats == [ + OriginVisitStats( + url=visit_status["origin"], + visit_type=visit_status["type"], + last_eventful=DATE3, + last_uneventful=None, + last_failed=None, + last_notfound=None, + last_snapshot=hash_to_bytes("dddcc0710eb6cf9efd5b920a8453e1e07157bddd"), + ) + ] def test_journal_client_origin_visit_status_from_journal_last_uneventful(swh_scheduler): @@ -341,18 +347,19 @@ ) actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get( - visit_status["origin"], visit_status["type"] - ) - assert actual_origin_visit_stats is not None - assert actual_origin_visit_stats == OriginVisitStats( - url=visit_status["origin"], - visit_type=visit_status["type"], - last_eventful=DATE1, - last_uneventful=visit_status["date"], # most recent date but uneventful - last_failed=DATE2, - last_notfound=DATE1, - last_snapshot=visit_status["snapshot"], + [(visit_status["origin"], visit_status["type"])] ) + assert actual_origin_visit_stats == [ + OriginVisitStats( + url=visit_status["origin"], + visit_type=visit_status["type"], + last_eventful=DATE1, + last_uneventful=visit_status["date"], # most recent date but uneventful + last_failed=DATE2, + last_notfound=DATE1, + last_snapshot=visit_status["snapshot"], + ) + ] VISIT_STATUSES = [ @@ -413,7 +420,9 @@ last_snapshot=hash_to_bytes("d81cc0710eb6cf9efd5b920a8453e1e07157b6cd"), ) - assert swh_scheduler.origin_visit_stats_get("foo", "git") == expected_visit_stats + assert swh_scheduler.origin_visit_stats_get([("foo", "git")]) == [ + expected_visit_stats + ] VISIT_STATUSES1 = [ @@ -474,6 +483,6 @@ last_snapshot=hash_to_bytes("aaaaaabbbeb6cf9efd5b920a8453e1e07157b6cd"), ) - assert ( - swh_scheduler.origin_visit_stats_get("cavabarder", "hg") == expected_visit_stats - ) + assert swh_scheduler.origin_visit_stats_get([("cavabarder", "hg")]) == [ + expected_visit_stats + ] diff --git a/swh/scheduler/tests/test_scheduler.py b/swh/scheduler/tests/test_scheduler.py --- a/swh/scheduler/tests/test_scheduler.py +++ b/swh/scheduler/tests/test_scheduler.py @@ -788,8 +788,8 @@ swh_scheduler.origin_visit_stats_upsert([visit_stats]) swh_scheduler.origin_visit_stats_upsert([visit_stats]) - assert swh_scheduler.origin_visit_stats_get(url, "git") == visit_stats - assert swh_scheduler.origin_visit_stats_get(url, "svn") is None + assert swh_scheduler.origin_visit_stats_get([(url, "git")]) == [visit_stats] + assert swh_scheduler.origin_visit_stats_get([(url, "svn")]) == [] uneventful_date = utcnow() visit_stats = OriginVisitStats( @@ -802,7 +802,7 @@ ) swh_scheduler.origin_visit_stats_upsert([visit_stats]) - uneventful_visit = swh_scheduler.origin_visit_stats_get(url, "git") + uneventful_visits = swh_scheduler.origin_visit_stats_get([(url, "git")]) expected_visit_stats = OriginVisitStats( url=url, @@ -813,7 +813,7 @@ last_notfound=None, ) - assert uneventful_visit == expected_visit_stats + assert uneventful_visits == [expected_visit_stats] failed_date = utcnow() visit_stats = OriginVisitStats( @@ -826,7 +826,7 @@ ) swh_scheduler.origin_visit_stats_upsert([visit_stats]) - failed_visit = swh_scheduler.origin_visit_stats_get(url, "git") + failed_visits = swh_scheduler.origin_visit_stats_get([(url, "git")]) expected_visit_stats = OriginVisitStats( url=url, @@ -837,7 +837,7 @@ last_notfound=None, ) - assert failed_visit == expected_visit_stats + assert failed_visits == [expected_visit_stats] def test_origin_visit_stats_upsert_with_snapshot(self, swh_scheduler) -> None: eventful_date = utcnow() @@ -854,8 +854,8 @@ ) swh_scheduler.origin_visit_stats_upsert([visit_stats]) - assert swh_scheduler.origin_visit_stats_get(url, "git") == visit_stats - assert swh_scheduler.origin_visit_stats_get(url, "svn") is None + assert swh_scheduler.origin_visit_stats_get([(url, "git")]) == [visit_stats] + assert swh_scheduler.origin_visit_stats_get([(url, "svn")]) == [] def test_origin_visit_stats_upsert_messing_with_time(self, swh_scheduler) -> None: url = "interesting-origin" @@ -879,7 +879,7 @@ ) swh_scheduler.origin_visit_stats_upsert([visit_stats0]) - actual_visit_stats0 = swh_scheduler.origin_visit_stats_get(url, "git") + actual_visit_stats0 = swh_scheduler.origin_visit_stats_get([(url, "git")])[0] assert actual_visit_stats0 == visit_stats0 visit_stats2 = OriginVisitStats( @@ -892,7 +892,7 @@ ) swh_scheduler.origin_visit_stats_upsert([visit_stats2]) - actual_visit_stats2 = swh_scheduler.origin_visit_stats_get(url, "git") + actual_visit_stats2 = swh_scheduler.origin_visit_stats_get([(url, "git")])[0] assert actual_visit_stats2 == attr.evolve( actual_visit_stats0, last_uneventful=date1 ) @@ -911,7 +911,7 @@ ) swh_scheduler.origin_visit_stats_upsert([visit_stats1]) - actual_visit_stats1 = swh_scheduler.origin_visit_stats_get(url, "git") + actual_visit_stats1 = swh_scheduler.origin_visit_stats_get([(url, "git")])[0] assert actual_visit_stats1 == attr.evolve( actual_visit_stats2, last_eventful=date2 @@ -942,13 +942,10 @@ swh_scheduler.origin_visit_stats_upsert(visit_stats) - for visit_stat in visit_stats: - assert ( - swh_scheduler.origin_visit_stats_get( - visit_stat.url, visit_stat.visit_type - ) - is not None - ) + for visit_stat in swh_scheduler.origin_visit_stats_get( + [(vs.url, vs.visit_type) for vs in visit_stats] + ): + assert visit_stat is not None def test_origin_visit_stats_upsert_cardinality_failing(self, swh_scheduler) -> None: """Batch upsert does not support altering multiple times the same origin-visit-status