Changeset View
Changeset View
Standalone View
Standalone View
swh/scheduler/tests/test_scheduler.py
Show First 20 Lines • Show All 738 Lines • ▼ Show 20 Lines | def test_grab_next_visits(self, swh_scheduler, listed_origins_by_type, policy): | ||||
swh_scheduler.record_listed_origins(origins) | swh_scheduler.record_listed_origins(origins) | ||||
before = utcnow() | before = utcnow() | ||||
ret = swh_scheduler.grab_next_visits(visit_type, NUM_RESULTS, policy=policy) | ret = swh_scheduler.grab_next_visits(visit_type, NUM_RESULTS, policy=policy) | ||||
after = utcnow() | after = utcnow() | ||||
assert len(ret) == NUM_RESULTS | assert len(ret) == NUM_RESULTS | ||||
for origin in ret: | for origin in ret: | ||||
visit_stats = swh_scheduler.origin_visit_stats_get( | pk = (origin.url, origin.visit_type) | ||||
origin.url, origin.visit_type | visit_stats = swh_scheduler.origin_visit_stats_get([pk])[pk] | ||||
) | |||||
assert visit_stats is not None | assert visit_stats is not None | ||||
assert before <= visit_stats.last_scheduled <= after | assert before <= visit_stats.last_scheduled <= after | ||||
@pytest.mark.parametrize("policy", ["oldest_scheduled_first"]) | @pytest.mark.parametrize("policy", ["oldest_scheduled_first"]) | ||||
def test_grab_next_visits_underflow( | def test_grab_next_visits_underflow( | ||||
self, swh_scheduler, listed_origins_by_type, policy | self, swh_scheduler, listed_origins_by_type, policy | ||||
): | ): | ||||
NUM_RESULTS = 5 | NUM_RESULTS = 5 | ||||
Show All 29 Lines | def test_origin_visit_stats_upsert(self, swh_scheduler) -> None: | ||||
last_eventful=eventful_date, | last_eventful=eventful_date, | ||||
last_uneventful=None, | last_uneventful=None, | ||||
last_failed=None, | last_failed=None, | ||||
last_notfound=None, | last_notfound=None, | ||||
) | ) | ||||
swh_scheduler.origin_visit_stats_upsert([visit_stats]) | swh_scheduler.origin_visit_stats_upsert([visit_stats]) | ||||
swh_scheduler.origin_visit_stats_upsert([visit_stats]) | swh_scheduler.origin_visit_stats_upsert([visit_stats]) | ||||
assert swh_scheduler.origin_visit_stats_get(url, "git") == visit_stats | assert swh_scheduler.origin_visit_stats_get([(url, "git")]) == { | ||||
assert swh_scheduler.origin_visit_stats_get(url, "svn") is None | (url, "git"): visit_stats | ||||
} | |||||
assert swh_scheduler.origin_visit_stats_get([(url, "svn")]) == {} | |||||
uneventful_date = utcnow() | uneventful_date = utcnow() | ||||
visit_stats = OriginVisitStats( | visit_stats = OriginVisitStats( | ||||
url=url, | url=url, | ||||
visit_type="git", | visit_type="git", | ||||
last_eventful=None, | last_eventful=None, | ||||
last_uneventful=uneventful_date, | last_uneventful=uneventful_date, | ||||
last_failed=None, | last_failed=None, | ||||
last_notfound=None, | last_notfound=None, | ||||
) | ) | ||||
swh_scheduler.origin_visit_stats_upsert([visit_stats]) | swh_scheduler.origin_visit_stats_upsert([visit_stats]) | ||||
uneventful_visit = swh_scheduler.origin_visit_stats_get(url, "git") | uneventful_visits = swh_scheduler.origin_visit_stats_get([(url, "git")]) | ||||
expected_visit_stats = OriginVisitStats( | expected_visit_stats = OriginVisitStats( | ||||
url=url, | url=url, | ||||
visit_type="git", | visit_type="git", | ||||
last_eventful=eventful_date, | last_eventful=eventful_date, | ||||
last_uneventful=uneventful_date, | last_uneventful=uneventful_date, | ||||
last_failed=None, | last_failed=None, | ||||
last_notfound=None, | last_notfound=None, | ||||
) | ) | ||||
assert uneventful_visit == expected_visit_stats | assert uneventful_visits == {(url, "git"): expected_visit_stats} | ||||
failed_date = utcnow() | failed_date = utcnow() | ||||
visit_stats = OriginVisitStats( | visit_stats = OriginVisitStats( | ||||
url=url, | url=url, | ||||
visit_type="git", | visit_type="git", | ||||
last_eventful=None, | last_eventful=None, | ||||
last_uneventful=None, | last_uneventful=None, | ||||
last_failed=failed_date, | last_failed=failed_date, | ||||
last_notfound=None, | last_notfound=None, | ||||
) | ) | ||||
swh_scheduler.origin_visit_stats_upsert([visit_stats]) | swh_scheduler.origin_visit_stats_upsert([visit_stats]) | ||||
failed_visit = swh_scheduler.origin_visit_stats_get(url, "git") | failed_visits = swh_scheduler.origin_visit_stats_get([(url, "git")]) | ||||
expected_visit_stats = OriginVisitStats( | expected_visit_stats = OriginVisitStats( | ||||
url=url, | url=url, | ||||
visit_type="git", | visit_type="git", | ||||
last_eventful=eventful_date, | last_eventful=eventful_date, | ||||
last_uneventful=uneventful_date, | last_uneventful=uneventful_date, | ||||
last_failed=failed_date, | last_failed=failed_date, | ||||
last_notfound=None, | last_notfound=None, | ||||
) | ) | ||||
assert failed_visit == expected_visit_stats | assert failed_visits == {(url, "git"): expected_visit_stats} | ||||
def test_origin_visit_stats_upsert_with_snapshot(self, swh_scheduler) -> None: | def test_origin_visit_stats_upsert_with_snapshot(self, swh_scheduler) -> None: | ||||
eventful_date = utcnow() | eventful_date = utcnow() | ||||
url = "https://github.com/666/test" | url = "https://github.com/666/test" | ||||
visit_stats = OriginVisitStats( | visit_stats = OriginVisitStats( | ||||
url=url, | url=url, | ||||
visit_type="git", | visit_type="git", | ||||
last_eventful=eventful_date, | last_eventful=eventful_date, | ||||
last_uneventful=None, | last_uneventful=None, | ||||
last_failed=None, | last_failed=None, | ||||
last_notfound=None, | last_notfound=None, | ||||
last_snapshot=hash_to_bytes("d81cc0710eb6cf9efd5b920a8453e1e07157b6cd"), | last_snapshot=hash_to_bytes("d81cc0710eb6cf9efd5b920a8453e1e07157b6cd"), | ||||
) | ) | ||||
swh_scheduler.origin_visit_stats_upsert([visit_stats]) | swh_scheduler.origin_visit_stats_upsert([visit_stats]) | ||||
assert swh_scheduler.origin_visit_stats_get(url, "git") == visit_stats | assert swh_scheduler.origin_visit_stats_get([(url, "git")]) == { | ||||
assert swh_scheduler.origin_visit_stats_get(url, "svn") is None | (url, "git"): visit_stats | ||||
} | |||||
assert swh_scheduler.origin_visit_stats_get([(url, "svn")]) == {} | |||||
def test_origin_visit_stats_upsert_messing_with_time(self, swh_scheduler) -> None: | def test_origin_visit_stats_upsert_messing_with_time(self, swh_scheduler) -> None: | ||||
url = "interesting-origin" | url = "interesting-origin" | ||||
# Let's play with dates... | # Let's play with dates... | ||||
date2 = utcnow() | date2 = utcnow() | ||||
date1 = date2 - ONEDAY | date1 = date2 - ONEDAY | ||||
date0 = date1 - ONEDAY | date0 = date1 - ONEDAY | ||||
assert date0 < date1 < date2 | assert date0 < date1 < date2 | ||||
snapshot2 = hash_to_bytes("d81cc0710eb6cf9efd5b920a8453e1e07157b6cd") | snapshot2 = hash_to_bytes("d81cc0710eb6cf9efd5b920a8453e1e07157b6cd") | ||||
snapshot0 = hash_to_bytes("fffcc0710eb6cf9efd5b920a8453e1e07157bfff") | snapshot0 = hash_to_bytes("fffcc0710eb6cf9efd5b920a8453e1e07157bfff") | ||||
visit_stats0 = OriginVisitStats( | visit_stats0 = OriginVisitStats( | ||||
url=url, | url=url, | ||||
visit_type="git", | visit_type="git", | ||||
last_eventful=date2, | last_eventful=date2, | ||||
last_uneventful=None, | last_uneventful=None, | ||||
last_failed=None, | last_failed=None, | ||||
last_notfound=None, | last_notfound=None, | ||||
last_snapshot=snapshot2, | last_snapshot=snapshot2, | ||||
) | ) | ||||
swh_scheduler.origin_visit_stats_upsert([visit_stats0]) | swh_scheduler.origin_visit_stats_upsert([visit_stats0]) | ||||
actual_visit_stats0 = swh_scheduler.origin_visit_stats_get(url, "git") | pk = (url, "git") | ||||
actual_visit_stats0 = swh_scheduler.origin_visit_stats_get([pk])[pk] | |||||
assert actual_visit_stats0 == visit_stats0 | assert actual_visit_stats0 == visit_stats0 | ||||
visit_stats2 = OriginVisitStats( | visit_stats2 = OriginVisitStats( | ||||
url=url, | url=url, | ||||
visit_type="git", | visit_type="git", | ||||
last_eventful=None, | last_eventful=None, | ||||
last_uneventful=date1, | last_uneventful=date1, | ||||
last_notfound=None, | last_notfound=None, | ||||
last_failed=None, | last_failed=None, | ||||
) | ) | ||||
swh_scheduler.origin_visit_stats_upsert([visit_stats2]) | swh_scheduler.origin_visit_stats_upsert([visit_stats2]) | ||||
actual_visit_stats2 = swh_scheduler.origin_visit_stats_get(url, "git") | actual_visit_stats2 = swh_scheduler.origin_visit_stats_get([pk])[pk] | ||||
assert actual_visit_stats2 == attr.evolve( | assert actual_visit_stats2 == attr.evolve( | ||||
actual_visit_stats0, last_uneventful=date1 | actual_visit_stats0, last_uneventful=date1 | ||||
) | ) | ||||
# a past date, what happens? | # a past date, what happens? | ||||
# date0 < date2 so this ovs should be dismissed | # date0 < date2 so this ovs should be dismissed | ||||
# the "eventful" associated snapshot should be dismissed as well | # the "eventful" associated snapshot should be dismissed as well | ||||
visit_stats1 = OriginVisitStats( | visit_stats1 = OriginVisitStats( | ||||
url=url, | url=url, | ||||
visit_type="git", | visit_type="git", | ||||
last_eventful=date0, | last_eventful=date0, | ||||
last_uneventful=None, | last_uneventful=None, | ||||
last_failed=None, | last_failed=None, | ||||
last_notfound=None, | last_notfound=None, | ||||
last_snapshot=snapshot0, | last_snapshot=snapshot0, | ||||
) | ) | ||||
swh_scheduler.origin_visit_stats_upsert([visit_stats1]) | swh_scheduler.origin_visit_stats_upsert([visit_stats1]) | ||||
actual_visit_stats1 = swh_scheduler.origin_visit_stats_get(url, "git") | actual_visit_stats1 = swh_scheduler.origin_visit_stats_get([pk])[pk] | ||||
assert actual_visit_stats1 == attr.evolve( | assert actual_visit_stats1 == attr.evolve( | ||||
actual_visit_stats2, last_eventful=date2 | actual_visit_stats2, last_eventful=date2 | ||||
) | ) | ||||
def test_origin_visit_stats_upsert_batch(self, swh_scheduler) -> None: | def test_origin_visit_stats_upsert_batch(self, swh_scheduler) -> None: | ||||
"""Batch upsert is ok""" | """Batch upsert is ok""" | ||||
visit_stats = [ | visit_stats = [ | ||||
Show All 14 Lines | def test_origin_visit_stats_upsert_batch(self, swh_scheduler) -> None: | ||||
last_notfound=None, | last_notfound=None, | ||||
last_failed=None, | last_failed=None, | ||||
last_snapshot=hash_to_bytes("fffcc0710eb6cf9efd5b920a8453e1e07157bfff"), | last_snapshot=hash_to_bytes("fffcc0710eb6cf9efd5b920a8453e1e07157bfff"), | ||||
), | ), | ||||
] | ] | ||||
swh_scheduler.origin_visit_stats_upsert(visit_stats) | swh_scheduler.origin_visit_stats_upsert(visit_stats) | ||||
for visit_stat in visit_stats: | visit_stats_dict = swh_scheduler.origin_visit_stats_get( | ||||
assert ( | [(vs.url, vs.visit_type) for vs in visit_stats] | ||||
swh_scheduler.origin_visit_stats_get( | |||||
visit_stat.url, visit_stat.visit_type | |||||
) | |||||
is not None | |||||
) | ) | ||||
for visit_stat in visit_stats: | |||||
pk = (visit_stat.url, visit_stat.visit_type) | |||||
assert pk in visit_stats_dict | |||||
assert visit_stats_dict[pk] is not None | |||||
def test_origin_visit_stats_upsert_cardinality_failing(self, swh_scheduler) -> None: | def test_origin_visit_stats_upsert_cardinality_failing(self, swh_scheduler) -> None: | ||||
"""Batch upsert does not support altering multiple times the same origin-visit-status | """Batch upsert does not support altering multiple times the same origin-visit-status | ||||
""" | """ | ||||
with pytest.raises(SchedulerException, match="CardinalityViolation"): | with pytest.raises(SchedulerException, match="CardinalityViolation"): | ||||
swh_scheduler.origin_visit_stats_upsert( | swh_scheduler.origin_visit_stats_upsert( | ||||
[ | [ | ||||
Show All 20 Lines |