Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/tests/algos/test_origin.py
# Copyright (C) 2019-2020 The Software Heritage developers | # Copyright (C) 2019-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import datetime | import datetime | ||||
import pytest | |||||
from unittest.mock import patch | |||||
from swh.model.model import Origin, OriginVisit, OriginVisitStatus | from swh.model.model import Origin, OriginVisit, OriginVisitStatus | ||||
from swh.storage.algos.origin import ( | from swh.storage.algos.origin import ( | ||||
iter_origins, | iter_origins, | ||||
origin_get_latest_visit_status, | origin_get_latest_visit_status, | ||||
iter_origin_visits, | iter_origin_visits, | ||||
iter_origin_visit_statuses, | iter_origin_visit_statuses, | ||||
) | ) | ||||
from swh.storage.interface import ListOrder | from swh.storage.interface import ListOrder | ||||
from swh.storage.utils import now | from swh.storage.utils import now | ||||
from swh.storage.tests.test_storage import round_to_milliseconds | from swh.storage.tests.test_storage import round_to_milliseconds | ||||
def assert_list_eq(left, right, msg=None): | def assert_list_eq(left, right, msg=None): | ||||
assert list(left) == list(right), msg | assert list(left) == list(right), msg | ||||
@pytest.fixture | |||||
def swh_storage_backend_config(): | |||||
yield { | |||||
"cls": "memory", | |||||
} | |||||
def test_iter_origins(swh_storage): | def test_iter_origins(swh_storage): | ||||
origins = [ | origins = [ | ||||
Origin(url="bar"), | Origin(url="bar"), | ||||
Origin(url="qux"), | Origin(url="qux"), | ||||
Origin(url="quuz"), | Origin(url="quuz"), | ||||
] | ] | ||||
assert swh_storage.origin_add(origins) == {"origin:add": 3} | assert swh_storage.origin_add(origins) == {"origin:add": 3} | ||||
assert_list_eq(iter_origins(swh_storage), origins) | assert_list_eq(iter_origins(swh_storage), origins) | ||||
Show All 30 Lines | for i in range(1, 5): | ||||
iter_origins( | iter_origins( | ||||
swh_storage, origin_from=i + 1, origin_to=j + 1, batch_size=2 | swh_storage, origin_from=i + 1, origin_to=j + 1, batch_size=2 | ||||
), | ), | ||||
origins[i:j], | origins[i:j], | ||||
(i, j), | (i, j), | ||||
) | ) | ||||
@patch("swh.storage.in_memory.InMemoryStorage.origin_get_range") | |||||
def test_iter_origins_batch_size(mock_origin_get_range, swh_storage): | |||||
mock_origin_get_range.return_value = [] | |||||
list(iter_origins(swh_storage)) | |||||
mock_origin_get_range.assert_called_with(origin_from=1, origin_count=10000) | |||||
list(iter_origins(swh_storage, batch_size=42)) | |||||
mock_origin_get_range.assert_called_with(origin_from=1, origin_count=42) | |||||
def test_origin_get_latest_visit_status_none(swh_storage, sample_data): | def test_origin_get_latest_visit_status_none(swh_storage, sample_data): | ||||
"""Looking up unknown objects should return nothing | """Looking up unknown objects should return nothing | ||||
""" | """ | ||||
# unknown origin so no result | # unknown origin so no result | ||||
assert origin_get_latest_visit_status(swh_storage, "unknown-origin") is None | assert origin_get_latest_visit_status(swh_storage, "unknown-origin") is None | ||||
# unknown type so no result | # unknown type so no result | ||||
Show All 9 Lines | def test_origin_get_latest_visit_status_none(swh_storage, sample_data): | ||||
) | ) | ||||
assert actual_origin_visit is None | assert actual_origin_visit is None | ||||
actual_origin_visit = origin_get_latest_visit_status( | actual_origin_visit = origin_get_latest_visit_status( | ||||
swh_storage, origin.url, require_snapshot=True | swh_storage, origin.url, require_snapshot=True | ||||
) | ) | ||||
assert actual_origin_visit is None | assert actual_origin_visit is None | ||||
actual_origin_visit = origin_get_latest_visit_status( | |||||
swh_storage, origin.url, allowed_statuses=["unknown"] | |||||
) | |||||
assert actual_origin_visit is None | |||||
def init_storage_with_origin_visits(swh_storage, sample_data): | def init_storage_with_origin_visits(swh_storage, sample_data): | ||||
"""Initialize storage with origin/origin-visit/origin-visit-status | """Initialize storage with origin/origin-visit/origin-visit-status | ||||
""" | """ | ||||
snapshot = sample_data.snapshots[2] | snapshot = sample_data.snapshots[2] | ||||
origin1, origin2 = sample_data.origins[:2] | origin1, origin2 = sample_data.origins[:2] | ||||
swh_storage.origin_add([origin1, origin2]) | swh_storage.origin_add([origin1, origin2]) | ||||
Show All 19 Lines | def init_storage_with_origin_visits(swh_storage, sample_data): | ||||
date_now = round_to_milliseconds(date_now) | date_now = round_to_milliseconds(date_now) | ||||
assert sample_data.date_visit1 < sample_data.date_visit2 | assert sample_data.date_visit1 < sample_data.date_visit2 | ||||
assert sample_data.date_visit2 < date_now | assert sample_data.date_visit2 < date_now | ||||
# origin visit status 1 for origin visit 1 | # origin visit status 1 for origin visit 1 | ||||
ovs11 = OriginVisitStatus( | ovs11 = OriginVisitStatus( | ||||
origin=origin1.url, | origin=origin1.url, | ||||
visit=ov1.visit, | visit=ov1.visit, | ||||
date=sample_data.date_visit1, | date=ov1.date + datetime.timedelta(seconds=10), # so it's not ignored | ||||
status="partial", | status="partial", | ||||
snapshot=None, | snapshot=None, | ||||
) | ) | ||||
# origin visit status 2 for origin visit 1 | # origin visit status 2 for origin visit 1 | ||||
ovs12 = OriginVisitStatus( | ovs12 = OriginVisitStatus( | ||||
origin=origin1.url, | origin=origin1.url, | ||||
visit=ov1.visit, | visit=ov1.visit, | ||||
date=sample_data.date_visit2, | date=sample_data.date_visit2, | ||||
status="ongoing", | status="ongoing", | ||||
snapshot=None, | snapshot=None, | ||||
) | ) | ||||
# origin visit status 1 for origin visit 2 | # origin visit status 1 for origin visit 2 | ||||
ovs21 = OriginVisitStatus( | ovs21 = OriginVisitStatus( | ||||
origin=origin2.url, | origin=origin2.url, | ||||
visit=ov2.visit, | visit=ov2.visit, | ||||
date=sample_data.date_visit2, | date=ov2.date + datetime.timedelta(seconds=10), # so it's not ignored | ||||
ardumont: ^ those are ignored with pg implem because they end up being the same id (origin, visit, date)… | |||||
status="ongoing", | status="ongoing", | ||||
snapshot=None, | snapshot=None, | ||||
) | ) | ||||
# origin visit status 2 for origin visit 2 | # origin visit status 2 for origin visit 2 | ||||
ovs22 = OriginVisitStatus( | ovs22 = OriginVisitStatus( | ||||
origin=origin2.url, | origin=origin2.url, | ||||
visit=ov2.visit, | visit=ov2.visit, | ||||
date=date_now, | date=date_now, | ||||
▲ Show 20 Lines • Show All 59 Lines • ▼ Show 20 Lines | |||||
def test_origin_get_latest_visit_status_filter_status(swh_storage, sample_data): | def test_origin_get_latest_visit_status_filter_status(swh_storage, sample_data): | ||||
objects = init_storage_with_origin_visits(swh_storage, sample_data) | objects = init_storage_with_origin_visits(swh_storage, sample_data) | ||||
origin1, origin2 = objects["origin"] | origin1, origin2 = objects["origin"] | ||||
ov1, ov2 = objects["origin_visit"] | ov1, ov2 = objects["origin_visit"] | ||||
ovs11, ovs12, _, ovs22 = objects["origin_visit_status"] | ovs11, ovs12, _, ovs22 = objects["origin_visit_status"] | ||||
# no failed status for that visit | # no partial status for that origin visit | ||||
assert ( | assert ( | ||||
origin_get_latest_visit_status( | origin_get_latest_visit_status( | ||||
swh_storage, origin2.url, allowed_statuses=["failed"] | swh_storage, origin2.url, allowed_statuses=["partial"] | ||||
) | ) | ||||
is None | is None | ||||
) | ) | ||||
# only 1 partial for that visit | # only 1 partial for that visit | ||||
actual_ov1, actual_ovs11 = origin_get_latest_visit_status( | actual_ov1, actual_ovs11 = origin_get_latest_visit_status( | ||||
swh_storage, origin1.url, allowed_statuses=["partial"] | swh_storage, origin1.url, allowed_statuses=["partial"] | ||||
) | ) | ||||
▲ Show 20 Lines • Show All 126 Lines • ▼ Show 20 Lines | for i in range(20): | ||||
origin=origin1.url, | origin=origin1.url, | ||||
visit=ov1.visit, | visit=ov1.visit, | ||||
date=status_date, | date=status_date, | ||||
status="created", | status="created", | ||||
snapshot=None, | snapshot=None, | ||||
) | ) | ||||
) | ) | ||||
visit_statuses = swh_storage.origin_visit_add(new_visit_statuses) | swh_storage.origin_visit_status_add(new_visit_statuses) | ||||
reversed_visit_statuses = list(reversed(visit_statuses)) | reversed_visit_statuses = list(reversed(new_visit_statuses)) | ||||
Done Inline ActionsThis endpoint was not the correct one... ¯\_(ツ)_/¯ ardumont: This endpoint was not the correct one... ¯\_(ツ)_/¯
The new one does not return anything.
| |||||
# order asc | # order asc | ||||
actual_visit_statuses = list( | actual_visit_statuses = list( | ||||
iter_origin_visit_statuses(swh_storage, ov1.origin, ov1.visit) | iter_origin_visit_statuses(swh_storage, ov1.origin, ov1.visit) | ||||
) | ) | ||||
assert actual_visit_statuses == visit_statuses | assert actual_visit_statuses == new_visit_statuses | ||||
# order desc | # order desc | ||||
actual_visit_statuses = list( | actual_visit_statuses = list( | ||||
iter_origin_visit_statuses( | iter_origin_visit_statuses( | ||||
swh_storage, ov1.origin, ov1.visit, order=ListOrder.DESC | swh_storage, ov1.origin, ov1.visit, order=ListOrder.DESC | ||||
) | ) | ||||
) | ) | ||||
assert actual_visit_statuses == reversed_visit_statuses | assert actual_visit_statuses == reversed_visit_statuses | ||||
# no result | # no result | ||||
actual_visit_statuses = list( | actual_visit_statuses = list( | ||||
iter_origin_visit_statuses(swh_storage, origin2.url, ov1.visit) | iter_origin_visit_statuses(swh_storage, origin2.url, ov1.visit) | ||||
) | ) | ||||
assert actual_visit_statuses == [] | assert actual_visit_statuses == [] |
^ those are ignored with pg implem because they end up being the same id (origin, visit, date) as the visit status created by origin-visit-add.