Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9345405
D4888.id17440.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
19 KB
Subscribers
None
D4888.id17440.diff
View Options
diff --git a/swh/scheduler/backend.py b/swh/scheduler/backend.py
--- a/swh/scheduler/backend.py
+++ b/swh/scheduler/backend.py
@@ -864,19 +864,21 @@
@db_transaction()
def origin_visit_stats_get(
- self, url: str, visit_type: str, db=None, cur=None
- ) -> Optional[OriginVisitStats]:
+ self, ids: Iterable[Tuple[str, str]], db=None, cur=None
+ ) -> List[OriginVisitStats]:
+ if not ids:
+ return []
+ primary_keys = tuple((origin, visit_type) for (origin, visit_type) in ids)
query = format_query(
- "SELECT {keys} FROM origin_visit_stats WHERE url=%s AND visit_type=%s",
+ """
+ SELECT {keys}
+ FROM (VALUES %s) as stats(url, visit_type)
+ INNER JOIN origin_visit_stats USING (url, visit_type)
+ """,
OriginVisitStats.select_columns(),
)
- cur.execute(query, (url, visit_type))
- row = cur.fetchone()
-
- if row:
- return OriginVisitStats(**row)
- else:
- return None
+ psycopg2.extras.execute_values(cur=cur, sql=query, argslist=primary_keys)
+ return [OriginVisitStats(**row) for row in cur.fetchall()]
@db_transaction()
def update_metrics(
diff --git a/swh/scheduler/interface.py b/swh/scheduler/interface.py
--- a/swh/scheduler/interface.py
+++ b/swh/scheduler/interface.py
@@ -3,8 +3,9 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+
import datetime
-from typing import Any, Dict, Iterable, List, Optional
+from typing import Any, Dict, Iterable, List, Optional, Tuple
from uuid import UUID
from typing_extensions import Protocol, runtime_checkable
@@ -347,9 +348,14 @@
@remote_api_endpoint("visit_stats/get")
def origin_visit_stats_get(
- self, url: str, visit_type: str
- ) -> Optional[OriginVisitStats]:
- """Retrieve the stats for an origin with a given visit type"""
+ self, ids: Iterable[Tuple[str, str]]
+ ) -> List[OriginVisitStats]:
+ """Retrieve the stats for an origin with a given visit type
+
+ If some visit_stats are not found, they are filtered out of the result. So the
+ output list may be of length inferior to the length of the input list.
+
+ """
...
@remote_api_endpoint("scheduler_metrics/update")
diff --git a/swh/scheduler/journal_client.py b/swh/scheduler/journal_client.py
--- a/swh/scheduler/journal_client.py
+++ b/swh/scheduler/journal_client.py
@@ -65,9 +65,9 @@
}
pk = origin, visit_type
if pk not in origin_visit_stats:
- visit_stats = scheduler.origin_visit_stats_get(origin, visit_type)
+ visit_stats = scheduler.origin_visit_stats_get([pk])
origin_visit_stats[pk] = (
- attr.asdict(visit_stats) if visit_stats else empty_object
+ attr.asdict(visit_stats[0]) if visit_stats else empty_object
)
visit_stats_d = origin_visit_stats[pk]
diff --git a/swh/scheduler/simulator/origins.py b/swh/scheduler/simulator/origins.py
--- a/swh/scheduler/simulator/origins.py
+++ b/swh/scheduler/simulator/origins.py
@@ -94,7 +94,9 @@
"""
# This is cheating; actual tasks access the state from the storage, not the
# scheduler
- stats = env.scheduler.origin_visit_stats_get(task.origin, task.visit_type)
+ pk = task.origin, task.visit_type
+ visit_stats = env.scheduler.origin_visit_stats_get([pk])
+ stats: Optional[OriginVisitStats] = visit_stats[0] if len(visit_stats) > 0 else None
last_snapshot = stats.last_snapshot if stats else None
status = OriginVisitStatus(
diff --git a/swh/scheduler/tests/test_cli_journal.py b/swh/scheduler/tests/test_cli_journal.py
--- a/swh/scheduler/tests/test_cli_journal.py
+++ b/swh/scheduler/tests/test_cli_journal.py
@@ -109,7 +109,8 @@
assert result.output == expected_output
actual_visit_stats = swh_scheduler.origin_visit_stats_get(
- visit_status["origin"], visit_status["type"]
+ [(visit_status["origin"], visit_status["type"])]
)
- assert actual_visit_stats is not None
+ assert actual_visit_stats
+ assert len(actual_visit_stats) == 1
diff --git a/swh/scheduler/tests/test_journal_client.py b/swh/scheduler/tests/test_journal_client.py
--- a/swh/scheduler/tests/test_journal_client.py
+++ b/swh/scheduler/tests/test_journal_client.py
@@ -90,11 +90,10 @@
)
# Ensure those visit status are ignored
- for visit_status in visit_statuses:
- actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get(
- visit_status["origin"], visit_status["type"]
- )
- assert actual_origin_visit_stats is None
+ actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get(
+ [(vs["origin"], vs["type"]) for vs in visit_statuses]
+ )
+ assert actual_origin_visit_stats == []
def test_journal_client_origin_visit_status_from_journal_last_notfound(swh_scheduler):
@@ -112,17 +111,19 @@
)
actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get(
- visit_status["origin"], visit_status["type"]
- )
- assert actual_origin_visit_stats == OriginVisitStats(
- url=visit_status["origin"],
- visit_type=visit_status["type"],
- last_eventful=None,
- last_uneventful=None,
- last_failed=None,
- last_notfound=visit_status["date"],
- last_snapshot=None,
+ [(visit_status["origin"], visit_status["type"])]
)
+ assert actual_origin_visit_stats == [
+ OriginVisitStats(
+ url=visit_status["origin"],
+ visit_type=visit_status["type"],
+ last_eventful=None,
+ last_uneventful=None,
+ last_failed=None,
+ last_notfound=visit_status["date"],
+ last_snapshot=None,
+ )
+ ]
visit_statuses = [
{
@@ -149,18 +150,19 @@
for visit_status in visit_statuses:
actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get(
- visit_status["origin"], visit_status["type"]
- )
- assert actual_origin_visit_stats is not None
- assert actual_origin_visit_stats == OriginVisitStats(
- url=visit_status["origin"],
- visit_type=visit_status["type"],
- last_eventful=None,
- last_uneventful=None,
- last_failed=None,
- last_notfound=DATE3,
- last_snapshot=None,
+ [(visit_status["origin"], visit_status["type"])]
)
+ assert actual_origin_visit_stats == [
+ OriginVisitStats(
+ url=visit_status["origin"],
+ visit_type=visit_status["type"],
+ last_eventful=None,
+ last_uneventful=None,
+ last_failed=None,
+ last_notfound=DATE3,
+ last_snapshot=None,
+ )
+ ]
def test_journal_client_origin_visit_status_from_journal_last_failed(swh_scheduler):
@@ -190,18 +192,19 @@
# Ensure those visit status are ignored
for visit_status in visit_statuses:
actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get(
- visit_status["origin"], visit_status["type"]
- )
- assert actual_origin_visit_stats is not None
- assert actual_origin_visit_stats == OriginVisitStats(
- url=visit_status["origin"],
- visit_type=visit_status["type"],
- last_eventful=None,
- last_uneventful=None,
- last_failed=visit_status["date"],
- last_notfound=None,
- last_snapshot=None,
+ [(visit_status["origin"], visit_status["type"])]
)
+ assert actual_origin_visit_stats == [
+ OriginVisitStats(
+ url=visit_status["origin"],
+ visit_type=visit_status["type"],
+ last_eventful=None,
+ last_uneventful=None,
+ last_failed=visit_status["date"],
+ last_notfound=None,
+ last_snapshot=None,
+ )
+ ]
visit_statuses = [
{
@@ -228,18 +231,19 @@
for visit_status in visit_statuses:
actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get(
- visit_status["origin"], visit_status["type"]
- )
- assert actual_origin_visit_stats is not None
- assert actual_origin_visit_stats == OriginVisitStats(
- url=visit_status["origin"],
- visit_type=visit_status["type"],
- last_eventful=None,
- last_uneventful=None,
- last_failed=DATE3,
- last_notfound=None,
- last_snapshot=None,
+ [(visit_status["origin"], visit_status["type"])]
)
+ assert actual_origin_visit_stats == [
+ OriginVisitStats(
+ url=visit_status["origin"],
+ visit_type=visit_status["type"],
+ last_eventful=None,
+ last_uneventful=None,
+ last_failed=DATE3,
+ last_notfound=None,
+ last_snapshot=None,
+ )
+ ]
def test_journal_client_origin_visit_status_from_journal_last_eventful(swh_scheduler):
@@ -268,18 +272,19 @@
for visit_status in visit_statuses:
actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get(
- visit_status["origin"], visit_status["type"]
- )
- assert actual_origin_visit_stats is not None
- assert actual_origin_visit_stats == OriginVisitStats(
- url=visit_status["origin"],
- visit_type=visit_status["type"],
- last_eventful=visit_status["date"],
- last_uneventful=None,
- last_failed=None,
- last_notfound=None,
- last_snapshot=visit_status["snapshot"],
+ [(visit_status["origin"], visit_status["type"])]
)
+ assert actual_origin_visit_stats == [
+ OriginVisitStats(
+ url=visit_status["origin"],
+ visit_type=visit_status["type"],
+ last_eventful=visit_status["date"],
+ last_uneventful=None,
+ last_failed=None,
+ last_notfound=None,
+ last_snapshot=visit_status["snapshot"],
+ )
+ ]
visit_statuses = [
{
@@ -306,18 +311,19 @@
for visit_status in visit_statuses:
actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get(
- visit_status["origin"], visit_status["type"]
- )
- assert actual_origin_visit_stats is not None
- assert actual_origin_visit_stats == OriginVisitStats(
- url=visit_status["origin"],
- visit_type=visit_status["type"],
- last_eventful=DATE3,
- last_uneventful=None,
- last_failed=None,
- last_notfound=None,
- last_snapshot=hash_to_bytes("dddcc0710eb6cf9efd5b920a8453e1e07157bddd"),
+ [(visit_status["origin"], visit_status["type"])]
)
+ assert actual_origin_visit_stats == [
+ OriginVisitStats(
+ url=visit_status["origin"],
+ visit_type=visit_status["type"],
+ last_eventful=DATE3,
+ last_uneventful=None,
+ last_failed=None,
+ last_notfound=None,
+ last_snapshot=hash_to_bytes("dddcc0710eb6cf9efd5b920a8453e1e07157bddd"),
+ )
+ ]
def test_journal_client_origin_visit_status_from_journal_last_uneventful(swh_scheduler):
@@ -350,18 +356,19 @@
)
actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get(
- visit_status["origin"], visit_status["type"]
- )
- assert actual_origin_visit_stats is not None
- assert actual_origin_visit_stats == OriginVisitStats(
- url=visit_status["origin"],
- visit_type=visit_status["type"],
- last_eventful=DATE1,
- last_uneventful=visit_status["date"], # most recent date but uneventful
- last_failed=DATE2,
- last_notfound=DATE1,
- last_snapshot=visit_status["snapshot"],
+ [(visit_status["origin"], visit_status["type"])]
)
+ assert actual_origin_visit_stats == [
+ OriginVisitStats(
+ url=visit_status["origin"],
+ visit_type=visit_status["type"],
+ last_eventful=DATE1,
+ last_uneventful=visit_status["date"], # most recent date but uneventful
+ last_failed=DATE2,
+ last_notfound=DATE1,
+ last_snapshot=visit_status["snapshot"],
+ )
+ ]
VISIT_STATUSES = [
@@ -422,7 +429,9 @@
last_snapshot=hash_to_bytes("d81cc0710eb6cf9efd5b920a8453e1e07157b6cd"),
)
- assert swh_scheduler.origin_visit_stats_get("foo", "git") == expected_visit_stats
+ assert swh_scheduler.origin_visit_stats_get([("foo", "git")]) == [
+ expected_visit_stats
+ ]
VISIT_STATUSES1 = [
@@ -483,9 +492,9 @@
last_snapshot=hash_to_bytes("aaaaaabbbeb6cf9efd5b920a8453e1e07157b6cd"),
)
- assert (
- swh_scheduler.origin_visit_stats_get("cavabarder", "hg") == expected_visit_stats
- )
+ assert swh_scheduler.origin_visit_stats_get([("cavabarder", "hg")]) == [
+ expected_visit_stats
+ ]
VISIT_STATUSES_2 = [
@@ -572,21 +581,21 @@
)
after = utcnow()
- assert swh_scheduler.origin_visit_stats_get("cavabarder", "hg") is None
- assert swh_scheduler.origin_visit_stats_get("cavabarder", "git") is not None
+ assert swh_scheduler.origin_visit_stats_get([("cavabarder", "hg")]) == []
+ assert swh_scheduler.origin_visit_stats_get([("cavabarder", "git")])[0] is not None
process_journal_objects(
{"origin_visit_status": VISIT_STATUSES_2}, scheduler=swh_scheduler
)
for url in ("cavabarder", "iciaussi"):
- ovs = swh_scheduler.origin_visit_stats_get(url, "git")
+ ovs = swh_scheduler.origin_visit_stats_get([(url, "git")])[0]
assert before <= ovs.last_scheduled <= after
- ovs = swh_scheduler.origin_visit_stats_get(url, "hg")
+ ovs = swh_scheduler.origin_visit_stats_get([(url, "hg")])[0]
assert ovs.last_scheduled is None
- ovs = swh_scheduler.origin_visit_stats_get("cavabarder", "git")
+ ovs = swh_scheduler.origin_visit_stats_get([("cavabarder", "git")])[0]
assert ovs.last_eventful == DATE1 + 5 * ONE_DAY
assert ovs.last_uneventful is None
assert ovs.last_failed is None
diff --git a/swh/scheduler/tests/test_scheduler.py b/swh/scheduler/tests/test_scheduler.py
--- a/swh/scheduler/tests/test_scheduler.py
+++ b/swh/scheduler/tests/test_scheduler.py
@@ -772,9 +772,8 @@
assert len(ret) == NUM_RESULTS
for origin in ret:
- visit_stats = swh_scheduler.origin_visit_stats_get(
- origin.url, origin.visit_type
- )
+ pk = (origin.url, origin.visit_type)
+ visit_stats = swh_scheduler.origin_visit_stats_get([pk])[0]
assert visit_stats is not None
assert before <= visit_stats.last_scheduled <= after
@@ -805,6 +804,9 @@
for tt in TASK_TYPES.values():
scheduler.create_task_type(tt)
+ def test_origin_visit_stats_get_empty(self, swh_scheduler) -> None:
+ assert swh_scheduler.origin_visit_stats_get([]) == []
+
def test_origin_visit_stats_upsert(self, swh_scheduler) -> None:
eventful_date = utcnow()
url = "https://github.com/test"
@@ -820,8 +822,8 @@
swh_scheduler.origin_visit_stats_upsert([visit_stats])
swh_scheduler.origin_visit_stats_upsert([visit_stats])
- assert swh_scheduler.origin_visit_stats_get(url, "git") == visit_stats
- assert swh_scheduler.origin_visit_stats_get(url, "svn") is None
+ assert swh_scheduler.origin_visit_stats_get([(url, "git")]) == [visit_stats]
+ assert swh_scheduler.origin_visit_stats_get([(url, "svn")]) == []
uneventful_date = utcnow()
visit_stats = OriginVisitStats(
@@ -834,7 +836,7 @@
)
swh_scheduler.origin_visit_stats_upsert([visit_stats])
- uneventful_visit = swh_scheduler.origin_visit_stats_get(url, "git")
+ uneventful_visits = swh_scheduler.origin_visit_stats_get([(url, "git")])
expected_visit_stats = OriginVisitStats(
url=url,
@@ -845,7 +847,7 @@
last_notfound=None,
)
- assert uneventful_visit == expected_visit_stats
+ assert uneventful_visits == [expected_visit_stats]
failed_date = utcnow()
visit_stats = OriginVisitStats(
@@ -858,7 +860,7 @@
)
swh_scheduler.origin_visit_stats_upsert([visit_stats])
- failed_visit = swh_scheduler.origin_visit_stats_get(url, "git")
+ failed_visits = swh_scheduler.origin_visit_stats_get([(url, "git")])
expected_visit_stats = OriginVisitStats(
url=url,
@@ -869,7 +871,7 @@
last_notfound=None,
)
- assert failed_visit == expected_visit_stats
+ assert failed_visits == [expected_visit_stats]
def test_origin_visit_stats_upsert_with_snapshot(self, swh_scheduler) -> None:
eventful_date = utcnow()
@@ -886,8 +888,8 @@
)
swh_scheduler.origin_visit_stats_upsert([visit_stats])
- assert swh_scheduler.origin_visit_stats_get(url, "git") == visit_stats
- assert swh_scheduler.origin_visit_stats_get(url, "svn") is None
+ assert swh_scheduler.origin_visit_stats_get([(url, "git")]) == [visit_stats]
+ assert swh_scheduler.origin_visit_stats_get([(url, "svn")]) == []
def test_origin_visit_stats_upsert_messing_with_time(self, swh_scheduler) -> None:
url = "interesting-origin"
@@ -911,7 +913,7 @@
)
swh_scheduler.origin_visit_stats_upsert([visit_stats0])
- actual_visit_stats0 = swh_scheduler.origin_visit_stats_get(url, "git")
+ actual_visit_stats0 = swh_scheduler.origin_visit_stats_get([(url, "git")])[0]
assert actual_visit_stats0 == visit_stats0
visit_stats2 = OriginVisitStats(
@@ -924,7 +926,7 @@
)
swh_scheduler.origin_visit_stats_upsert([visit_stats2])
- actual_visit_stats2 = swh_scheduler.origin_visit_stats_get(url, "git")
+ actual_visit_stats2 = swh_scheduler.origin_visit_stats_get([(url, "git")])[0]
assert actual_visit_stats2 == attr.evolve(
actual_visit_stats0, last_uneventful=date1
)
@@ -943,7 +945,7 @@
)
swh_scheduler.origin_visit_stats_upsert([visit_stats1])
- actual_visit_stats1 = swh_scheduler.origin_visit_stats_get(url, "git")
+ actual_visit_stats1 = swh_scheduler.origin_visit_stats_get([(url, "git")])[0]
assert actual_visit_stats1 == attr.evolve(
actual_visit_stats2, last_eventful=date2
@@ -974,13 +976,10 @@
swh_scheduler.origin_visit_stats_upsert(visit_stats)
- for visit_stat in visit_stats:
- assert (
- swh_scheduler.origin_visit_stats_get(
- visit_stat.url, visit_stat.visit_type
- )
- is not None
- )
+ for visit_stat in swh_scheduler.origin_visit_stats_get(
+ [(vs.url, vs.visit_type) for vs in visit_stats]
+ ):
+ assert visit_stat is not None
def test_origin_visit_stats_upsert_cardinality_failing(self, swh_scheduler) -> None:
"""Batch upsert does not support altering multiple times the same origin-visit-status
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Jul 3, 3:20 PM (5 d, 19 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3226412
Attached To
D4888: scheduler: Make origin_visit_stats_get read multiple entries
Event Timeline
Log In to Comment