Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9346072
D4888.id17387.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
15 KB
Subscribers
None
D4888.id17387.diff
View Options
diff --git a/swh/scheduler/backend.py b/swh/scheduler/backend.py
--- a/swh/scheduler/backend.py
+++ b/swh/scheduler/backend.py
@@ -837,16 +837,26 @@
@db_transaction()
def origin_visit_stats_get(
- self, url: str, visit_type: str, db=None, cur=None
- ) -> Optional[OriginVisitStats]:
+ self, ids: Iterable[Tuple[str, str]], db=None, cur=None
+ ) -> Dict[Tuple[str, str], OriginVisitStats]:
+ if not ids:
+ return {}
+
+ primary_keys = tuple((origin, visit_type) for (origin, visit_type) in ids)
query = format_query(
- "SELECT {keys} FROM origin_visit_stats WHERE url=%s AND visit_type=%s",
+ """
+ SELECT {keys}
+ FROM (VALUES %s) as stats(url, visit_type)
+ INNER JOIN origin_visit_stats USING (url, visit_type)
+ """,
OriginVisitStats.select_columns(),
)
- cur.execute(query, (url, visit_type))
- row = cur.fetchone()
- if row:
- return OriginVisitStats(**row)
- else:
- return None
+ psycopg2.extras.execute_values(
+ cur=cur, sql=query, argslist=primary_keys, fetch=False,
+ )
+
+ return {
+ (row["url"], row["visit_type"]): OriginVisitStats(**row)
+ for row in cur.fetchall()
+ }
diff --git a/swh/scheduler/interface.py b/swh/scheduler/interface.py
--- a/swh/scheduler/interface.py
+++ b/swh/scheduler/interface.py
@@ -3,7 +3,7 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-from typing import Any, Dict, Iterable, List, Optional
+from typing import Any, Dict, Iterable, List, Optional, Tuple
from uuid import UUID
from typing_extensions import Protocol, runtime_checkable
@@ -336,7 +336,7 @@
@remote_api_endpoint("visit_stats/get")
def origin_visit_stats_get(
- self, url: str, visit_type: str
- ) -> Optional[OriginVisitStats]:
+ self, ids: Iterable[Tuple[str, str]]
+ ) -> Dict[Tuple[str, str], OriginVisitStats]:
"""Retrieve the stats for an origin with a given visit type"""
...
diff --git a/swh/scheduler/journal_client.py b/swh/scheduler/journal_client.py
--- a/swh/scheduler/journal_client.py
+++ b/swh/scheduler/journal_client.py
@@ -60,7 +60,7 @@
}
pk = origin, visit_type
if pk not in origin_visit_stats:
- visit_stats = scheduler.origin_visit_stats_get(origin, visit_type)
+ visit_stats = scheduler.origin_visit_stats_get([pk]).get(pk)
origin_visit_stats[pk] = (
attr.asdict(visit_stats) if visit_stats else empty_object
)
diff --git a/swh/scheduler/tests/test_cli_journal.py b/swh/scheduler/tests/test_cli_journal.py
--- a/swh/scheduler/tests/test_cli_journal.py
+++ b/swh/scheduler/tests/test_cli_journal.py
@@ -108,7 +108,8 @@
assert result.output == expected_output
actual_visit_stats = swh_scheduler.origin_visit_stats_get(
- visit_status["origin"], visit_status["type"]
+ [(visit_status["origin"], visit_status["type"])]
)
- assert actual_visit_stats is not None
+ assert actual_visit_stats
+ assert len(actual_visit_stats) == 1
diff --git a/swh/scheduler/tests/test_journal_client.py b/swh/scheduler/tests/test_journal_client.py
--- a/swh/scheduler/tests/test_journal_client.py
+++ b/swh/scheduler/tests/test_journal_client.py
@@ -81,11 +81,10 @@
)
# Ensure those visit status are ignored
- for visit_status in visit_statuses:
- actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get(
- visit_status["origin"], visit_status["type"]
- )
- assert actual_origin_visit_stats is None
+ actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get(
+ [(vs["origin"], vs["type"]) for vs in visit_statuses]
+ )
+ assert actual_origin_visit_stats == {}
def test_journal_client_origin_visit_status_from_journal_last_notfound(swh_scheduler):
@@ -102,10 +101,9 @@
{"origin_visit_status": [visit_status]}, scheduler=swh_scheduler
)
- actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get(
- visit_status["origin"], visit_status["type"]
- )
- assert actual_origin_visit_stats == OriginVisitStats(
+ pk = (visit_status["origin"], visit_status["type"])
+ actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get([pk])
+ assert actual_origin_visit_stats[pk] == OriginVisitStats(
url=visit_status["origin"],
visit_type=visit_status["type"],
last_eventful=None,
@@ -139,11 +137,9 @@
)
for visit_status in visit_statuses:
- actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get(
- visit_status["origin"], visit_status["type"]
- )
- assert actual_origin_visit_stats is not None
- assert actual_origin_visit_stats == OriginVisitStats(
+ pk = (visit_status["origin"], visit_status["type"])
+ actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get([pk])
+ assert actual_origin_visit_stats[pk] == OriginVisitStats(
url=visit_status["origin"],
visit_type=visit_status["type"],
last_eventful=None,
@@ -180,11 +176,9 @@
# Ensure those visit status are ignored
for visit_status in visit_statuses:
- actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get(
- visit_status["origin"], visit_status["type"]
- )
- assert actual_origin_visit_stats is not None
- assert actual_origin_visit_stats == OriginVisitStats(
+ pk = (visit_status["origin"], visit_status["type"])
+ actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get([pk])
+ assert actual_origin_visit_stats[pk] == OriginVisitStats(
url=visit_status["origin"],
visit_type=visit_status["type"],
last_eventful=None,
@@ -218,11 +212,9 @@
)
for visit_status in visit_statuses:
- actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get(
- visit_status["origin"], visit_status["type"]
- )
- assert actual_origin_visit_stats is not None
- assert actual_origin_visit_stats == OriginVisitStats(
+ pk = (visit_status["origin"], visit_status["type"])
+ actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get([pk])
+ assert actual_origin_visit_stats[pk] == OriginVisitStats(
url=visit_status["origin"],
visit_type=visit_status["type"],
last_eventful=None,
@@ -258,11 +250,9 @@
)
for visit_status in visit_statuses:
- actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get(
- visit_status["origin"], visit_status["type"]
- )
- assert actual_origin_visit_stats is not None
- assert actual_origin_visit_stats == OriginVisitStats(
+ pk = (visit_status["origin"], visit_status["type"])
+ actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get([pk])
+ assert actual_origin_visit_stats[pk] == OriginVisitStats(
url=visit_status["origin"],
visit_type=visit_status["type"],
last_eventful=visit_status["date"],
@@ -296,11 +286,9 @@
)
for visit_status in visit_statuses:
- actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get(
- visit_status["origin"], visit_status["type"]
- )
- assert actual_origin_visit_stats is not None
- assert actual_origin_visit_stats == OriginVisitStats(
+ pk = (visit_status["origin"], visit_status["type"])
+ actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get([pk])
+ assert actual_origin_visit_stats[pk] == OriginVisitStats(
url=visit_status["origin"],
visit_type=visit_status["type"],
last_eventful=DATE3,
@@ -340,11 +328,9 @@
{"origin_visit_status": [visit_status]}, scheduler=swh_scheduler
)
- actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get(
- visit_status["origin"], visit_status["type"]
- )
- assert actual_origin_visit_stats is not None
- assert actual_origin_visit_stats == OriginVisitStats(
+ pk = (visit_status["origin"], visit_status["type"])
+ actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get([pk])
+ assert actual_origin_visit_stats[pk] == OriginVisitStats(
url=visit_status["origin"],
visit_type=visit_status["type"],
last_eventful=DATE1,
@@ -413,7 +399,8 @@
last_snapshot=hash_to_bytes("d81cc0710eb6cf9efd5b920a8453e1e07157b6cd"),
)
- assert swh_scheduler.origin_visit_stats_get("foo", "git") == expected_visit_stats
+ pk = ("foo", "git")
+ assert swh_scheduler.origin_visit_stats_get([pk]) == {pk: expected_visit_stats}
VISIT_STATUSES1 = [
@@ -474,9 +461,8 @@
last_snapshot=hash_to_bytes("aaaaaabbbeb6cf9efd5b920a8453e1e07157b6cd"),
)
- assert (
- swh_scheduler.origin_visit_stats_get("cavabarder", "hg") == expected_visit_stats
- )
+ pk = ("cavabarder", "hg")
+ assert swh_scheduler.origin_visit_stats_get([pk]) == {pk: expected_visit_stats}
VISIT_STATUSES_2 = [
@@ -563,21 +549,26 @@
)
after = utcnow()
- assert swh_scheduler.origin_visit_stats_get("cavabarder", "hg") is None
- assert swh_scheduler.origin_visit_stats_get("cavabarder", "git") is not None
+ assert swh_scheduler.origin_visit_stats_get([("cavabarder", "hg")]) == {}
+
+ pk = ("cavabarder", "git")
+ assert swh_scheduler.origin_visit_stats_get([pk])[pk] is not None
process_journal_objects(
{"origin_visit_status": VISIT_STATUSES_2}, scheduler=swh_scheduler
)
for url in ("cavabarder", "iciaussi"):
- ovs = swh_scheduler.origin_visit_stats_get(url, "git")
+ pk = (url, "git")
+ ovs = swh_scheduler.origin_visit_stats_get([pk])[pk]
assert before <= ovs.last_scheduled <= after
- ovs = swh_scheduler.origin_visit_stats_get(url, "hg")
+ pk = (url, "hg")
+ ovs = swh_scheduler.origin_visit_stats_get([pk])[pk]
assert ovs.last_scheduled is None
- ovs = swh_scheduler.origin_visit_stats_get("cavabarder", "git")
+ pk = ("cavabarder", "git")
+ ovs = swh_scheduler.origin_visit_stats_get([pk])[pk]
assert ovs.last_eventful == DATE1 + 5 * ONE_DAY
assert ovs.last_uneventful is None
assert ovs.last_failed is None
diff --git a/swh/scheduler/tests/test_scheduler.py b/swh/scheduler/tests/test_scheduler.py
--- a/swh/scheduler/tests/test_scheduler.py
+++ b/swh/scheduler/tests/test_scheduler.py
@@ -744,9 +744,8 @@
assert len(ret) == NUM_RESULTS
for origin in ret:
- visit_stats = swh_scheduler.origin_visit_stats_get(
- origin.url, origin.visit_type
- )
+ pk = (origin.url, origin.visit_type)
+ visit_stats = swh_scheduler.origin_visit_stats_get([pk])[pk]
assert visit_stats is not None
assert before <= visit_stats.last_scheduled <= after
@@ -792,8 +791,10 @@
swh_scheduler.origin_visit_stats_upsert([visit_stats])
swh_scheduler.origin_visit_stats_upsert([visit_stats])
- assert swh_scheduler.origin_visit_stats_get(url, "git") == visit_stats
- assert swh_scheduler.origin_visit_stats_get(url, "svn") is None
+ assert swh_scheduler.origin_visit_stats_get([(url, "git")]) == {
+ (url, "git"): visit_stats
+ }
+ assert swh_scheduler.origin_visit_stats_get([(url, "svn")]) == {}
uneventful_date = utcnow()
visit_stats = OriginVisitStats(
@@ -806,7 +807,7 @@
)
swh_scheduler.origin_visit_stats_upsert([visit_stats])
- uneventful_visit = swh_scheduler.origin_visit_stats_get(url, "git")
+ uneventful_visits = swh_scheduler.origin_visit_stats_get([(url, "git")])
expected_visit_stats = OriginVisitStats(
url=url,
@@ -817,7 +818,7 @@
last_notfound=None,
)
- assert uneventful_visit == expected_visit_stats
+ assert uneventful_visits == {(url, "git"): expected_visit_stats}
failed_date = utcnow()
visit_stats = OriginVisitStats(
@@ -830,7 +831,7 @@
)
swh_scheduler.origin_visit_stats_upsert([visit_stats])
- failed_visit = swh_scheduler.origin_visit_stats_get(url, "git")
+ failed_visits = swh_scheduler.origin_visit_stats_get([(url, "git")])
expected_visit_stats = OriginVisitStats(
url=url,
@@ -841,7 +842,7 @@
last_notfound=None,
)
- assert failed_visit == expected_visit_stats
+ assert failed_visits == {(url, "git"): expected_visit_stats}
def test_origin_visit_stats_upsert_with_snapshot(self, swh_scheduler) -> None:
eventful_date = utcnow()
@@ -858,8 +859,10 @@
)
swh_scheduler.origin_visit_stats_upsert([visit_stats])
- assert swh_scheduler.origin_visit_stats_get(url, "git") == visit_stats
- assert swh_scheduler.origin_visit_stats_get(url, "svn") is None
+ assert swh_scheduler.origin_visit_stats_get([(url, "git")]) == {
+ (url, "git"): visit_stats
+ }
+ assert swh_scheduler.origin_visit_stats_get([(url, "svn")]) == {}
def test_origin_visit_stats_upsert_messing_with_time(self, swh_scheduler) -> None:
url = "interesting-origin"
@@ -883,7 +886,8 @@
)
swh_scheduler.origin_visit_stats_upsert([visit_stats0])
- actual_visit_stats0 = swh_scheduler.origin_visit_stats_get(url, "git")
+ pk = (url, "git")
+ actual_visit_stats0 = swh_scheduler.origin_visit_stats_get([pk])[pk]
assert actual_visit_stats0 == visit_stats0
visit_stats2 = OriginVisitStats(
@@ -896,7 +900,7 @@
)
swh_scheduler.origin_visit_stats_upsert([visit_stats2])
- actual_visit_stats2 = swh_scheduler.origin_visit_stats_get(url, "git")
+ actual_visit_stats2 = swh_scheduler.origin_visit_stats_get([pk])[pk]
assert actual_visit_stats2 == attr.evolve(
actual_visit_stats0, last_uneventful=date1
)
@@ -915,7 +919,7 @@
)
swh_scheduler.origin_visit_stats_upsert([visit_stats1])
- actual_visit_stats1 = swh_scheduler.origin_visit_stats_get(url, "git")
+ actual_visit_stats1 = swh_scheduler.origin_visit_stats_get([pk])[pk]
assert actual_visit_stats1 == attr.evolve(
actual_visit_stats2, last_eventful=date2
@@ -946,13 +950,13 @@
swh_scheduler.origin_visit_stats_upsert(visit_stats)
+ visit_stats_dict = swh_scheduler.origin_visit_stats_get(
+ [(vs.url, vs.visit_type) for vs in visit_stats]
+ )
for visit_stat in visit_stats:
- assert (
- swh_scheduler.origin_visit_stats_get(
- visit_stat.url, visit_stat.visit_type
- )
- is not None
- )
+ pk = (visit_stat.url, visit_stat.visit_type)
+ assert pk in visit_stats_dict
+ assert visit_stats_dict[pk] is not None
def test_origin_visit_stats_upsert_cardinality_failing(self, swh_scheduler) -> None:
"""Batch upsert does not support altering multiple times the same origin-visit-status
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Jul 3, 3:42 PM (2 w, 2 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3230924
Attached To
D4888: scheduler: Make origin_visit_stats_get read multiple entries
Event Timeline
Log In to Comment