Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/tests/algos/test_origin.py
# Copyright (C) 2019-2020 The Software Heritage developers | # Copyright (C) 2019-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import pytest | import pytest | ||||
from unittest.mock import patch | from unittest.mock import patch | ||||
from swh.model.model import Origin, OriginVisit, OriginVisitStatus | from swh.model.model import Origin, OriginVisit, OriginVisitStatus | ||||
from swh.storage.algos.origin import iter_origins, origin_get_latest_visit_status | from swh.storage.algos.origin import iter_origins, origin_get_latest_visit_status | ||||
from swh.storage.utils import now | from swh.storage.utils import now | ||||
from swh.storage.tests.test_storage import round_to_milliseconds | from swh.storage.tests.test_storage import round_to_milliseconds | ||||
from swh.storage.tests.storage_data import data | |||||
def assert_list_eq(left, right, msg=None): | def assert_list_eq(left, right, msg=None): | ||||
assert list(left) == list(right), msg | assert list(left) == list(right), msg | ||||
@pytest.fixture | @pytest.fixture | ||||
def swh_storage_backend_config(): | def swh_storage_backend_config(): | ||||
▲ Show 20 Lines • Show All 62 Lines • ▼ Show 20 Lines | |||||
def test_origin_get_latest_visit_status_none(swh_storage, sample_data): | def test_origin_get_latest_visit_status_none(swh_storage, sample_data): | ||||
"""Looking up unknown objects should return nothing | """Looking up unknown objects should return nothing | ||||
""" | """ | ||||
# unknown origin so no result | # unknown origin so no result | ||||
assert origin_get_latest_visit_status(swh_storage, "unknown-origin") is None | assert origin_get_latest_visit_status(swh_storage, "unknown-origin") is None | ||||
# unknown type so no result | # unknown type so no result | ||||
origin = sample_data["origin"][0] | origin = sample_data.origin | ||||
origin_visit = sample_data["origin_visit"][0] | origin_visit = sample_data.origin_visit | ||||
assert origin_visit.origin == origin.url | assert origin_visit.origin == origin.url | ||||
swh_storage.origin_add([origin]) | swh_storage.origin_add([origin]) | ||||
swh_storage.origin_visit_add([origin_visit])[0] | swh_storage.origin_visit_add([origin_visit])[0] | ||||
assert origin_visit.type != "unknown" | assert origin_visit.type != "unknown" | ||||
actual_origin_visit = origin_get_latest_visit_status( | actual_origin_visit = origin_get_latest_visit_status( | ||||
swh_storage, origin.url, type="unknown" | swh_storage, origin.url, type="unknown" | ||||
) | ) | ||||
Show All 9 Lines | def test_origin_get_latest_visit_status_none(swh_storage, sample_data): | ||||
) | ) | ||||
assert actual_origin_visit is None | assert actual_origin_visit is None | ||||
def init_storage_with_origin_visits(swh_storage, sample_data): | def init_storage_with_origin_visits(swh_storage, sample_data): | ||||
"""Initialize storage with origin/origin-visit/origin-visit-status | """Initialize storage with origin/origin-visit/origin-visit-status | ||||
""" | """ | ||||
snapshot = sample_data["snapshot"][2] | snapshot = sample_data.snapshots[2] | ||||
origin1, origin2 = sample_data["origin"][:2] | origin1, origin2 = sample_data.origins[:2] | ||||
swh_storage.origin_add([origin1, origin2]) | swh_storage.origin_add([origin1, origin2]) | ||||
ov1, ov2 = swh_storage.origin_visit_add( | ov1, ov2 = swh_storage.origin_visit_add( | ||||
[ | [ | ||||
OriginVisit( | OriginVisit( | ||||
origin=origin1.url, date=data.date_visit1, type=data.type_visit1, | origin=origin1.url, | ||||
date=sample_data.date_visit1, | |||||
type=sample_data.type_visit1, | |||||
), | ), | ||||
OriginVisit( | OriginVisit( | ||||
origin=origin2.url, date=data.date_visit2, type=data.type_visit2, | origin=origin2.url, | ||||
date=sample_data.date_visit2, | |||||
type=sample_data.type_visit2, | |||||
), | ), | ||||
] | ] | ||||
) | ) | ||||
swh_storage.snapshot_add([snapshot]) | swh_storage.snapshot_add([snapshot]) | ||||
date_now = now() | date_now = now() | ||||
date_now = round_to_milliseconds(date_now) | date_now = round_to_milliseconds(date_now) | ||||
assert data.date_visit1 < data.date_visit2 | assert sample_data.date_visit1 < sample_data.date_visit2 | ||||
assert data.date_visit2 < date_now | assert sample_data.date_visit2 < date_now | ||||
# origin visit status 1 for origin visit 1 | # origin visit status 1 for origin visit 1 | ||||
ovs11 = OriginVisitStatus( | ovs11 = OriginVisitStatus( | ||||
origin=origin1.url, | origin=origin1.url, | ||||
visit=ov1.visit, | visit=ov1.visit, | ||||
date=data.date_visit1, | date=sample_data.date_visit1, | ||||
status="partial", | status="partial", | ||||
snapshot=None, | snapshot=None, | ||||
) | ) | ||||
# origin visit status 2 for origin visit 1 | # origin visit status 2 for origin visit 1 | ||||
ovs12 = OriginVisitStatus( | ovs12 = OriginVisitStatus( | ||||
origin=origin1.url, | origin=origin1.url, | ||||
visit=ov1.visit, | visit=ov1.visit, | ||||
date=data.date_visit2, | date=sample_data.date_visit2, | ||||
status="ongoing", | status="ongoing", | ||||
snapshot=None, | snapshot=None, | ||||
) | ) | ||||
# origin visit status 1 for origin visit 2 | # origin visit status 1 for origin visit 2 | ||||
ovs21 = OriginVisitStatus( | ovs21 = OriginVisitStatus( | ||||
origin=origin2.url, | origin=origin2.url, | ||||
visit=ov2.visit, | visit=ov2.visit, | ||||
date=data.date_visit2, | date=sample_data.date_visit2, | ||||
status="ongoing", | status="ongoing", | ||||
snapshot=None, | snapshot=None, | ||||
) | ) | ||||
# origin visit status 2 for origin visit 2 | # origin visit status 2 for origin visit 2 | ||||
ovs22 = OriginVisitStatus( | ovs22 = OriginVisitStatus( | ||||
origin=origin2.url, | origin=origin2.url, | ||||
visit=ov2.visit, | visit=ov2.visit, | ||||
date=date_now, | date=date_now, | ||||
Show All 16 Lines | def test_origin_get_latest_visit_status_filter_type(swh_storage, sample_data): | ||||
""" | """ | ||||
objects = init_storage_with_origin_visits(swh_storage, sample_data) | objects = init_storage_with_origin_visits(swh_storage, sample_data) | ||||
origin1, origin2 = objects["origin"] | origin1, origin2 = objects["origin"] | ||||
ov1, ov2 = objects["origin_visit"] | ov1, ov2 = objects["origin_visit"] | ||||
ovs11, ovs12, _, ovs22 = objects["origin_visit_status"] | ovs11, ovs12, _, ovs22 = objects["origin_visit_status"] | ||||
# no visit for origin1 url with type_visit2 | # no visit for origin1 url with type_visit2 | ||||
assert ( | assert ( | ||||
origin_get_latest_visit_status(swh_storage, origin1.url, type=data.type_visit2) | origin_get_latest_visit_status( | ||||
swh_storage, origin1.url, type=sample_data.type_visit2 | |||||
) | |||||
is None | is None | ||||
) | ) | ||||
# no visit for origin2 url with type_visit1 | # no visit for origin2 url with type_visit1 | ||||
assert ( | assert ( | ||||
origin_get_latest_visit_status(swh_storage, origin2.url, type=data.type_visit1) | origin_get_latest_visit_status( | ||||
swh_storage, origin2.url, type=sample_data.type_visit1 | |||||
) | |||||
is None | is None | ||||
) | ) | ||||
# Two visits, both with no snapshot, take the most recent | # Two visits, both with no snapshot, take the most recent | ||||
actual_ov1, actual_ovs12 = origin_get_latest_visit_status( | actual_ov1, actual_ovs12 = origin_get_latest_visit_status( | ||||
swh_storage, origin1.url, type=data.type_visit1 | swh_storage, origin1.url, type=sample_data.type_visit1 | ||||
) | ) | ||||
assert isinstance(actual_ov1, OriginVisit) | assert isinstance(actual_ov1, OriginVisit) | ||||
assert isinstance(actual_ovs12, OriginVisitStatus) | assert isinstance(actual_ovs12, OriginVisitStatus) | ||||
assert actual_ov1.origin == ov1.origin | assert actual_ov1.origin == ov1.origin | ||||
assert actual_ov1.visit == ov1.visit | assert actual_ov1.visit == ov1.visit | ||||
assert actual_ov1.type == data.type_visit1 | assert actual_ov1.type == sample_data.type_visit1 | ||||
assert actual_ovs12 == ovs12 | assert actual_ovs12 == ovs12 | ||||
# take the most recent visit with type_visit2 | # take the most recent visit with type_visit2 | ||||
actual_ov2, actual_ovs22 = origin_get_latest_visit_status( | actual_ov2, actual_ovs22 = origin_get_latest_visit_status( | ||||
swh_storage, origin2.url, type=data.type_visit2 | swh_storage, origin2.url, type=sample_data.type_visit2 | ||||
) | ) | ||||
assert isinstance(actual_ov2, OriginVisit) | assert isinstance(actual_ov2, OriginVisit) | ||||
assert isinstance(actual_ovs22, OriginVisitStatus) | assert isinstance(actual_ovs22, OriginVisitStatus) | ||||
assert actual_ov2.origin == ov2.origin | assert actual_ov2.origin == ov2.origin | ||||
assert actual_ov2.visit == ov2.visit | assert actual_ov2.visit == ov2.visit | ||||
assert actual_ov2.type == data.type_visit2 | assert actual_ov2.type == sample_data.type_visit2 | ||||
assert actual_ovs22 == ovs22 | assert actual_ovs22 == ovs22 | ||||
def test_origin_get_latest_visit_status_filter_status(swh_storage, sample_data): | def test_origin_get_latest_visit_status_filter_status(swh_storage, sample_data): | ||||
objects = init_storage_with_origin_visits(swh_storage, sample_data) | objects = init_storage_with_origin_visits(swh_storage, sample_data) | ||||
origin1, origin2 = objects["origin"] | origin1, origin2 = objects["origin"] | ||||
ov1, ov2 = objects["origin_visit"] | ov1, ov2 = objects["origin_visit"] | ||||
ovs11, ovs12, _, ovs22 = objects["origin_visit_status"] | ovs11, ovs12, _, ovs22 = objects["origin_visit_status"] | ||||
# no failed status for that visit | # no failed status for that visit | ||||
assert ( | assert ( | ||||
origin_get_latest_visit_status( | origin_get_latest_visit_status( | ||||
swh_storage, origin2.url, allowed_statuses=["failed"] | swh_storage, origin2.url, allowed_statuses=["failed"] | ||||
) | ) | ||||
is None | is None | ||||
) | ) | ||||
# only 1 partial for that visit | # only 1 partial for that visit | ||||
actual_ov1, actual_ovs11 = origin_get_latest_visit_status( | actual_ov1, actual_ovs11 = origin_get_latest_visit_status( | ||||
swh_storage, origin1.url, allowed_statuses=["partial"] | swh_storage, origin1.url, allowed_statuses=["partial"] | ||||
) | ) | ||||
assert actual_ov1.origin == ov1.origin | assert actual_ov1.origin == ov1.origin | ||||
assert actual_ov1.visit == ov1.visit | assert actual_ov1.visit == ov1.visit | ||||
assert actual_ov1.type == data.type_visit1 | assert actual_ov1.type == sample_data.type_visit1 | ||||
assert actual_ovs11 == ovs11 | assert actual_ovs11 == ovs11 | ||||
# both status exist, take the latest one | # both status exist, take the latest one | ||||
actual_ov1, actual_ovs12 = origin_get_latest_visit_status( | actual_ov1, actual_ovs12 = origin_get_latest_visit_status( | ||||
swh_storage, origin1.url, allowed_statuses=["partial", "ongoing"] | swh_storage, origin1.url, allowed_statuses=["partial", "ongoing"] | ||||
) | ) | ||||
assert actual_ov1.origin == ov1.origin | assert actual_ov1.origin == ov1.origin | ||||
assert actual_ov1.visit == ov1.visit | assert actual_ov1.visit == ov1.visit | ||||
assert actual_ov1.type == data.type_visit1 | assert actual_ov1.type == sample_data.type_visit1 | ||||
assert actual_ovs12 == ovs12 | assert actual_ovs12 == ovs12 | ||||
assert isinstance(actual_ov1, OriginVisit) | assert isinstance(actual_ov1, OriginVisit) | ||||
assert isinstance(actual_ovs12, OriginVisitStatus) | assert isinstance(actual_ovs12, OriginVisitStatus) | ||||
assert actual_ov1.origin == ov1.origin | assert actual_ov1.origin == ov1.origin | ||||
assert actual_ov1.visit == ov1.visit | assert actual_ov1.visit == ov1.visit | ||||
assert actual_ov1.type == data.type_visit1 | assert actual_ov1.type == sample_data.type_visit1 | ||||
assert actual_ovs12 == ovs12 | assert actual_ovs12 == ovs12 | ||||
# take the most recent visit with type_visit2 | # take the most recent visit with type_visit2 | ||||
actual_ov2, actual_ovs22 = origin_get_latest_visit_status( | actual_ov2, actual_ovs22 = origin_get_latest_visit_status( | ||||
swh_storage, origin2.url, allowed_statuses=["full"] | swh_storage, origin2.url, allowed_statuses=["full"] | ||||
) | ) | ||||
assert actual_ov2.origin == ov2.origin | assert actual_ov2.origin == ov2.origin | ||||
assert actual_ov2.visit == ov2.visit | assert actual_ov2.visit == ov2.visit | ||||
assert actual_ov2.type == data.type_visit2 | assert actual_ov2.type == sample_data.type_visit2 | ||||
assert actual_ovs22 == ovs22 | assert actual_ovs22 == ovs22 | ||||
def test_origin_get_latest_visit_status_filter_snapshot(swh_storage, sample_data): | def test_origin_get_latest_visit_status_filter_snapshot(swh_storage, sample_data): | ||||
objects = init_storage_with_origin_visits(swh_storage, sample_data) | objects = init_storage_with_origin_visits(swh_storage, sample_data) | ||||
origin1, origin2 = objects["origin"] | origin1, origin2 = objects["origin"] | ||||
_, ov2 = objects["origin_visit"] | _, ov2 = objects["origin_visit"] | ||||
_, _, _, ovs22 = objects["origin_visit_status"] | _, _, _, ovs22 = objects["origin_visit_status"] | ||||
Show All 12 Lines | def test_origin_get_latest_visit_status_filter_snapshot(swh_storage, sample_data): | ||||
assert actual_ov2.visit == ov2.visit | assert actual_ov2.visit == ov2.visit | ||||
assert actual_ov2.type == ov2.type | assert actual_ov2.type == ov2.type | ||||
assert actual_ovs22 == ovs22 | assert actual_ovs22 == ovs22 | ||||
date_now = now() | date_now = now() | ||||
# Add another visit | # Add another visit | ||||
swh_storage.origin_visit_add( | swh_storage.origin_visit_add( | ||||
[OriginVisit(origin=origin2.url, date=date_now, type=data.type_visit2,),] | [OriginVisit(origin=origin2.url, date=date_now, type=sample_data.type_visit2,),] | ||||
) | ) | ||||
# Requiring the latest visit with a snapshot, we still find the previous visit | # Requiring the latest visit with a snapshot, we still find the previous visit | ||||
ov2, ovs22 = origin_get_latest_visit_status( | ov2, ovs22 = origin_get_latest_visit_status( | ||||
swh_storage, origin2.url, require_snapshot=True | swh_storage, origin2.url, require_snapshot=True | ||||
) | ) | ||||
assert actual_ov2.origin == ov2.origin | assert actual_ov2.origin == ov2.origin | ||||
assert actual_ov2.visit == ov2.visit | assert actual_ov2.visit == ov2.visit | ||||
assert actual_ov2.type == ov2.type | assert actual_ov2.type == ov2.type | ||||
assert actual_ovs22 == ovs22 | assert actual_ovs22 == ovs22 |