Changeset View
Changeset View
Standalone View
Standalone View
swh/scheduler/tests/test_journal_client.py
Show First 20 Lines • Show All 84 Lines • ▼ Show 20 Lines | visit_statuses = [ | ||||
}, | }, | ||||
] | ] | ||||
process_journal_objects( | process_journal_objects( | ||||
{"origin_visit_status": visit_statuses}, scheduler=swh_scheduler | {"origin_visit_status": visit_statuses}, scheduler=swh_scheduler | ||||
) | ) | ||||
# Ensure those visit status are ignored | # Ensure those visit status are ignored | ||||
for visit_status in visit_statuses: | |||||
actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get( | actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get( | ||||
visit_status["origin"], visit_status["type"] | [(vs["origin"], vs["type"]) for vs in visit_statuses] | ||||
) | ) | ||||
assert actual_origin_visit_stats is None | assert actual_origin_visit_stats == [] | ||||
def test_journal_client_origin_visit_status_from_journal_last_notfound(swh_scheduler): | def test_journal_client_origin_visit_status_from_journal_last_notfound(swh_scheduler): | ||||
visit_status = { | visit_status = { | ||||
"origin": "foo", | "origin": "foo", | ||||
"visit": 1, | "visit": 1, | ||||
"status": "not_found", | "status": "not_found", | ||||
"date": DATE1, | "date": DATE1, | ||||
"type": "git", | "type": "git", | ||||
"snapshot": None, | "snapshot": None, | ||||
} | } | ||||
process_journal_objects( | process_journal_objects( | ||||
{"origin_visit_status": [visit_status]}, scheduler=swh_scheduler | {"origin_visit_status": [visit_status]}, scheduler=swh_scheduler | ||||
) | ) | ||||
actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get("foo", "git") | actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get([("foo", "git")]) | ||||
assert actual_origin_visit_stats == OriginVisitStats( | assert actual_origin_visit_stats == [ | ||||
OriginVisitStats( | |||||
url="foo", | url="foo", | ||||
visit_type="git", | visit_type="git", | ||||
last_eventful=None, | last_eventful=None, | ||||
last_uneventful=None, | last_uneventful=None, | ||||
last_failed=None, | last_failed=None, | ||||
last_notfound=visit_status["date"], | last_notfound=visit_status["date"], | ||||
last_snapshot=None, | last_snapshot=None, | ||||
) | ) | ||||
] | |||||
visit_statuses = [ | visit_statuses = [ | ||||
{ | { | ||||
"origin": "foo", | "origin": "foo", | ||||
"visit": 3, | "visit": 3, | ||||
"status": "not_found", | "status": "not_found", | ||||
"date": DATE2, | "date": DATE2, | ||||
"type": "git", | "type": "git", | ||||
"snapshot": None, | "snapshot": None, | ||||
}, | }, | ||||
{ | { | ||||
"origin": "foo", | "origin": "foo", | ||||
"visit": 4, | "visit": 4, | ||||
"status": "not_found", | "status": "not_found", | ||||
"date": DATE3, | "date": DATE3, | ||||
"type": "git", | "type": "git", | ||||
"snapshot": None, | "snapshot": None, | ||||
}, | }, | ||||
] | ] | ||||
process_journal_objects( | process_journal_objects( | ||||
{"origin_visit_status": visit_statuses}, scheduler=swh_scheduler | {"origin_visit_status": visit_statuses}, scheduler=swh_scheduler | ||||
) | ) | ||||
actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get("foo", "git") | actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get([("foo", "git")]) | ||||
assert actual_origin_visit_stats is not None | assert actual_origin_visit_stats == [OriginVisitStats( | ||||
assert actual_origin_visit_stats == OriginVisitStats( | |||||
url="foo", | url="foo", | ||||
visit_type="git", | visit_type="git", | ||||
last_eventful=None, | last_eventful=None, | ||||
last_uneventful=None, | last_uneventful=None, | ||||
last_failed=None, | last_failed=None, | ||||
last_notfound=DATE3, | last_notfound=DATE3, | ||||
last_snapshot=None, | last_snapshot=None, | ||||
) | )] | ||||
def test_journal_client_origin_visit_status_from_journal_last_failed(swh_scheduler): | def test_journal_client_origin_visit_status_from_journal_last_failed(swh_scheduler): | ||||
visit_statuses = [ | visit_statuses = [ | ||||
{ | { | ||||
"origin": "foo", | "origin": "foo", | ||||
"visit": 1, | "visit": 1, | ||||
"status": "partial", | "status": "partial", | ||||
Show All 26 Lines | visit_statuses = [ | ||||
"snapshot": None, | "snapshot": None, | ||||
}, | }, | ||||
] | ] | ||||
process_journal_objects( | process_journal_objects( | ||||
{"origin_visit_status": visit_statuses}, scheduler=swh_scheduler | {"origin_visit_status": visit_statuses}, scheduler=swh_scheduler | ||||
) | ) | ||||
actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get("bar", "git") | actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get([("bar", "git")]) | ||||
assert actual_origin_visit_stats is not None | assert actual_origin_visit_stats == [OriginVisitStats( | ||||
assert actual_origin_visit_stats == OriginVisitStats( | |||||
url="bar", | url="bar", | ||||
visit_type="git", | visit_type="git", | ||||
last_eventful=None, | last_eventful=None, | ||||
last_uneventful=None, | last_uneventful=None, | ||||
last_failed=DATE3, | last_failed=DATE3, | ||||
last_notfound=None, | last_notfound=None, | ||||
last_snapshot=None, | last_snapshot=None, | ||||
) | )] | ||||
def test_journal_client_origin_visit_status_from_journal_last_eventful(swh_scheduler): | def test_journal_client_origin_visit_status_from_journal_last_eventful(swh_scheduler): | ||||
visit_statuses = [ | visit_statuses = [ | ||||
{ | { | ||||
"origin": "bar", | "origin": "bar", | ||||
"visit": 1, | "visit": 1, | ||||
"status": "partial", | "status": "partial", | ||||
Show All 26 Lines | visit_statuses = [ | ||||
"snapshot": hash_to_bytes("dddcc0710eb6cf9efd5b920a8453e1e07157bddd"), | "snapshot": hash_to_bytes("dddcc0710eb6cf9efd5b920a8453e1e07157bddd"), | ||||
}, | }, | ||||
] | ] | ||||
process_journal_objects( | process_journal_objects( | ||||
{"origin_visit_status": visit_statuses}, scheduler=swh_scheduler | {"origin_visit_status": visit_statuses}, scheduler=swh_scheduler | ||||
) | ) | ||||
actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get("foo", "git") | actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get([("foo", "git")]) | ||||
assert actual_origin_visit_stats is not None | assert actual_origin_visit_stats == [OriginVisitStats( | ||||
assert actual_origin_visit_stats == OriginVisitStats( | |||||
url="foo", | url="foo", | ||||
visit_type="git", | visit_type="git", | ||||
last_eventful=DATE3, | last_eventful=DATE3, | ||||
last_uneventful=None, | last_uneventful=None, | ||||
last_failed=None, | last_failed=None, | ||||
last_notfound=None, | last_notfound=None, | ||||
last_snapshot=hash_to_bytes("dddcc0710eb6cf9efd5b920a8453e1e07157bddd"), | last_snapshot=hash_to_bytes("dddcc0710eb6cf9efd5b920a8453e1e07157bddd"), | ||||
) | )] | ||||
def test_journal_client_origin_visit_status_from_journal_last_uneventful(swh_scheduler): | def test_journal_client_origin_visit_status_from_journal_last_uneventful(swh_scheduler): | ||||
visit_status = { | visit_status = { | ||||
"origin": "foo", | "origin": "foo", | ||||
"visit": 1, | "visit": 1, | ||||
"status": "full", | "status": "full", | ||||
"date": DATE3 + ONE_DAY, | "date": DATE3 + ONE_DAY, | ||||
Show All 16 Lines | swh_scheduler.origin_visit_stats_upsert( | ||||
] | ] | ||||
) | ) | ||||
process_journal_objects( | process_journal_objects( | ||||
{"origin_visit_status": [visit_status]}, scheduler=swh_scheduler | {"origin_visit_status": [visit_status]}, scheduler=swh_scheduler | ||||
) | ) | ||||
actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get( | actual_origin_visit_stats = swh_scheduler.origin_visit_stats_get( | ||||
visit_status["origin"], visit_status["type"] | [(visit_status["origin"], visit_status["type"])] | ||||
) | ) | ||||
assert actual_origin_visit_stats is not None | assert actual_origin_visit_stats == [ | ||||
assert actual_origin_visit_stats == OriginVisitStats( | OriginVisitStats( | ||||
url=visit_status["origin"], | url=visit_status["origin"], | ||||
visit_type=visit_status["type"], | visit_type=visit_status["type"], | ||||
last_eventful=DATE1, | last_eventful=DATE1, | ||||
last_uneventful=visit_status["date"], # most recent date but uneventful | last_uneventful=visit_status["date"], # most recent date but uneventful | ||||
last_failed=DATE2, | last_failed=DATE2, | ||||
last_notfound=DATE1, | last_notfound=DATE1, | ||||
last_snapshot=visit_status["snapshot"], | last_snapshot=visit_status["snapshot"], | ||||
) | ) | ||||
] | |||||
VISIT_STATUSES = [ | VISIT_STATUSES = [ | ||||
{**ovs, "date": DATE1 + n * ONE_DAY} | {**ovs, "date": DATE1 + n * ONE_DAY} | ||||
for n, ovs in enumerate( | for n, ovs in enumerate( | ||||
[ | [ | ||||
{ | { | ||||
"origin": "foo", | "origin": "foo", | ||||
▲ Show 20 Lines • Show All 44 Lines • ▼ Show 20 Lines | expected_visit_stats = OriginVisitStats( | ||||
visit_type="git", | visit_type="git", | ||||
last_eventful=DATE1 + ONE_DAY, | last_eventful=DATE1 + ONE_DAY, | ||||
last_uneventful=DATE1 + 3 * ONE_DAY, | last_uneventful=DATE1 + 3 * ONE_DAY, | ||||
last_failed=None, | last_failed=None, | ||||
last_notfound=None, | last_notfound=None, | ||||
last_snapshot=hash_to_bytes("d81cc0710eb6cf9efd5b920a8453e1e07157b6cd"), | last_snapshot=hash_to_bytes("d81cc0710eb6cf9efd5b920a8453e1e07157b6cd"), | ||||
) | ) | ||||
assert swh_scheduler.origin_visit_stats_get("foo", "git") == expected_visit_stats | assert swh_scheduler.origin_visit_stats_get([("foo", "git")]) == [ | ||||
expected_visit_stats | |||||
] | |||||
VISIT_STATUSES_1 = [ | VISIT_STATUSES_1 = [ | ||||
{**ovs, "date": DATE1 + n * ONE_DAY} | {**ovs, "date": DATE1 + n * ONE_DAY} | ||||
for n, ovs in enumerate( | for n, ovs in enumerate( | ||||
[ | [ | ||||
{ | { | ||||
"origin": "cavabarder", | "origin": "cavabarder", | ||||
▲ Show 20 Lines • Show All 44 Lines • ▼ Show 20 Lines | expected_visit_stats = OriginVisitStats( | ||||
visit_type="hg", | visit_type="hg", | ||||
last_eventful=DATE1 + 2 * ONE_DAY, | last_eventful=DATE1 + 2 * ONE_DAY, | ||||
last_uneventful=DATE1 + 3 * ONE_DAY, | last_uneventful=DATE1 + 3 * ONE_DAY, | ||||
last_failed=None, | last_failed=None, | ||||
last_notfound=None, | last_notfound=None, | ||||
last_snapshot=hash_to_bytes("aaaaaabbbeb6cf9efd5b920a8453e1e07157b6cd"), | last_snapshot=hash_to_bytes("aaaaaabbbeb6cf9efd5b920a8453e1e07157b6cd"), | ||||
) | ) | ||||
assert ( | assert swh_scheduler.origin_visit_stats_get([("cavabarder", "hg")]) == [ | ||||
swh_scheduler.origin_visit_stats_get("cavabarder", "hg") == expected_visit_stats | expected_visit_stats | ||||
) | ] | ||||
VISIT_STATUSES_2 = [ | VISIT_STATUSES_2 = [ | ||||
{**ovs, "date": DATE1 + n * ONE_DAY} | {**ovs, "date": DATE1 + n * ONE_DAY} | ||||
for n, ovs in enumerate( | for n, ovs in enumerate( | ||||
[ | [ | ||||
{ | { | ||||
"origin": "cavabarder", | "origin": "cavabarder", | ||||
▲ Show 20 Lines • Show All 70 Lines • ▼ Show 20 Lines | ): | ||||
] | ] | ||||
swh_scheduler.record_listed_origins(listed_origins) | swh_scheduler.record_listed_origins(listed_origins) | ||||
before = utcnow() | before = utcnow() | ||||
swh_scheduler.grab_next_visits( | swh_scheduler.grab_next_visits( | ||||
visit_type="git", count=10, policy="oldest_scheduled_first" | visit_type="git", count=10, policy="oldest_scheduled_first" | ||||
) | ) | ||||
after = utcnow() | after = utcnow() | ||||
assert swh_scheduler.origin_visit_stats_get("cavabarder", "hg") is None | assert swh_scheduler.origin_visit_stats_get([("cavabarder", "hg")]) == [] | ||||
assert swh_scheduler.origin_visit_stats_get("cavabarder", "git") is not None | assert swh_scheduler.origin_visit_stats_get([("cavabarder", "git")])[0] is not None | ||||
process_journal_objects( | process_journal_objects( | ||||
{"origin_visit_status": VISIT_STATUSES_2}, scheduler=swh_scheduler | {"origin_visit_status": VISIT_STATUSES_2}, scheduler=swh_scheduler | ||||
) | ) | ||||
for url in ("cavabarder", "iciaussi"): | for url in ("cavabarder", "iciaussi"): | ||||
ovs = swh_scheduler.origin_visit_stats_get(url, "git") | ovs = swh_scheduler.origin_visit_stats_get([(url, "git")])[0] | ||||
assert before <= ovs.last_scheduled <= after | assert before <= ovs.last_scheduled <= after | ||||
ovs = swh_scheduler.origin_visit_stats_get(url, "hg") | ovs = swh_scheduler.origin_visit_stats_get([(url, "hg")])[0] | ||||
assert ovs.last_scheduled is None | assert ovs.last_scheduled is None | ||||
ovs = swh_scheduler.origin_visit_stats_get("cavabarder", "git") | ovs = swh_scheduler.origin_visit_stats_get([("cavabarder", "git")])[0] | ||||
assert ovs.last_eventful == DATE1 + 5 * ONE_DAY | assert ovs.last_eventful == DATE1 + 5 * ONE_DAY | ||||
assert ovs.last_uneventful is None | assert ovs.last_uneventful is None | ||||
assert ovs.last_failed is None | assert ovs.last_failed is None | ||||
assert ovs.last_notfound is None | assert ovs.last_notfound is None | ||||
assert ovs.last_snapshot == hash_to_bytes( | assert ovs.last_snapshot == hash_to_bytes( | ||||
"5555555555555555555555555555555555555555" | "5555555555555555555555555555555555555555" | ||||
) | ) |