Changeset View
Standalone View
swh/web/tests/common/test_origin_visits.py
# Copyright (C) 2018-2019 The Software Heritage developers | # Copyright (C) 2018-2019 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU Affero General Public License version 3, or any later version | # License: GNU Affero General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from datetime import timedelta | |||||
from hypothesis import given | from hypothesis import given | ||||
import pytest | import pytest | ||||
from swh.model.hashutil import hash_to_hex | from swh.model.hashutil import hash_to_hex | ||||
from swh.model.model import OriginVisit, OriginVisitStatus | |||||
from swh.storage.utils import now | |||||
from swh.web.common.exc import NotFoundExc | from swh.web.common.exc import NotFoundExc | ||||
from swh.web.common.origin_visits import get_origin_visits, get_origin_visit | from swh.web.common.origin_visits import get_origin_visits, get_origin_visit | ||||
from swh.web.tests.strategies import new_snapshots | from swh.web.common.typing import OriginInfo | ||||
from swh.web.tests.strategies import new_origin, new_snapshots | |||||
@given(new_snapshots(3)) | @given(new_snapshots(3)) | ||||
def test_get_origin_visits(mocker, snapshots): | def test_get_origin_visits(mocker, snapshots): | ||||
mock_service = mocker.patch("swh.web.common.service") | mock_service = mocker.patch("swh.web.common.service") | ||||
mock_service.MAX_LIMIT = 2 | mock_service.MAX_LIMIT = 2 | ||||
def _lookup_origin_visits(*args, **kwargs): | def _lookup_origin_visits(*args, **kwargs): | ||||
▲ Show 20 Lines • Show All 113 Lines • ▼ Show 20 Lines | def test_get_origin_visit(mocker, snapshots): | ||||
visit = get_origin_visit(origin_info, visit_ts="2014-01-01") | visit = get_origin_visit(origin_info, visit_ts="2014-01-01") | ||||
assert visit == visits[0] | assert visit == visits[0] | ||||
visit = get_origin_visit(origin_info, visit_ts="2018-01-01") | visit = get_origin_visit(origin_info, visit_ts="2018-01-01") | ||||
assert visit == visits[-1] | assert visit == visits[-1] | ||||
def test_get_origin_visit_latest_valid_snapshot(mocker): | @given(new_origin(), new_snapshots(6)) | ||||
mock_origin_visits = mocker.patch("swh.web.common.origin_visits.get_origin_visits") | def test_get_origin_visit_return_first_valid_full_visit( | ||||
origin_info = { | archive_data, new_origin, new_snapshots | ||||
"url": "https://nix-community.github.io/nixpkgs-swh/sources-unstable.json", | ): | ||||
} | visits = [] | ||||
visits = [ | archive_data.origin_add([new_origin]) | ||||
{ | # create 6 visits, the first three have full status while the | ||||
"date": "2020-04-15T12:42:52.936520+00:00", | # last three have partial status and set a null snapshot for | ||||
"origin": origin_info["url"], | # the last four visits | ||||
"snapshot": "d820451681c74eec63693b6ea4e4b8417c76bb7a", | for i, snp in enumerate(new_snapshots): | ||||
"status": "partial", | visit_date = now() + timedelta(days=i * 10) | ||||
"type": "nixguix", | visit = archive_data.origin_visit_add( | ||||
"visit": 16, | [OriginVisit(origin=new_origin.url, date=visit_date, type="git",)] | ||||
}, | )[0] | ||||
{ | archive_data.snapshot_add([new_snapshots[i]]) | ||||
"date": "2020-04-17T17:25:13.738789+00:00", | visit_status = OriginVisitStatus( | ||||
"origin": origin_info["url"], | origin=new_origin.url, | ||||
"snapshot": "d20627c1ae2b5e553e8adcf625f37e37cc5190dd", | visit=visit.visit, | ||||
"status": "partial", | date=visit_date + timedelta(minutes=5), | ||||
"type": "nixguix", | status="full" if i < 3 else "partial", | ||||
"visit": 17, | snapshot=new_snapshots[i].id if i < 2 else None, | ||||
}, | ) | ||||
{ | if i < 2: | ||||
"date": "2020-04-19T19:02:42.906079+00:00", | archive_data.origin_visit_status_add([visit_status]) | ||||
"origin": origin_info["url"], | visits.append(visit.visit) | ||||
"snapshot": None, | |||||
"status": "partial", | # should return the second visit | ||||
"type": "nixguix", | expected_visit = archive_data.origin_visit_get_by(new_origin.url, visits[1]) | ||||
"visit": 18, | assert get_origin_visit((OriginInfo(url=new_origin.url))) == expected_visit | ||||
}, | |||||
{ | |||||
"date": "2020-04-20T12:43:41.120422+00:00", | @given(new_origin(), new_snapshots(6)) | ||||
"origin": origin_info["url"], | def test_get_origin_visit_non_resolvable_snapshots( | ||||
"snapshot": None, | archive_data, new_origin, new_snapshots | ||||
"status": "partial", | ): | ||||
"type": "nixguix", | visits = [] | ||||
"visit": 19, | archive_data.origin_add([new_origin]) | ||||
}, | # create 6 full visits, the first three have resolvable snapshots | ||||
{ | # while the last three have non resolvable snapshots | ||||
"date": "2020-04-20T12:46:40.255418+00:00", | for i, snp in enumerate(new_snapshots): | ||||
"origin": origin_info["url"], | visit_date = now() + timedelta(days=i * 10) | ||||
"snapshot": None, | visit = archive_data.origin_visit_add( | ||||
"status": "partial", | [OriginVisit(origin=new_origin.url, date=visit_date, type="git",)] | ||||
"type": "nixguix", | )[0] | ||||
"visit": 20, | archive_data.snapshot_add([new_snapshots[i]]) | ||||
}, | visit_status = OriginVisitStatus( | ||||
] | origin=new_origin.url, | ||||
visit=visit.visit, | |||||
mock_origin_visits.return_value = visits | date=visit_date + timedelta(minutes=5), | ||||
status="full", | |||||
visit = get_origin_visit(origin_info) | snapshot=new_snapshots[i].id, | ||||
) | |||||
assert visit["snapshot"] is not None | if i < 3: | ||||
assert visit["visit"] == 17 | archive_data.origin_visit_status_add([visit_status]) | ||||
visits.append(visit.visit) | |||||
# should return the third visit | |||||
expected_visit = archive_data.origin_visit_get_by(new_origin.url, visits[2]) | |||||
assert get_origin_visit((OriginInfo(url=new_origin.url))) == expected_visit | |||||
@given(new_origin(), new_snapshots(6)) | |||||
def test_get_origin_visit_return_first_valid_partial_visit( | |||||
archive_data, new_origin, new_snapshots | |||||
): | |||||
visits = [] | |||||
archive_data.origin_add([new_origin]) | |||||
# create 6 visits, the first three have full status but null snapshot | |||||
# while the last three have partial status with valid snapshot | |||||
for i, snp in enumerate(new_snapshots): | |||||
visit_date = now() + timedelta(days=i * 10) | |||||
visit = archive_data.origin_visit_add( | |||||
[OriginVisit(origin=new_origin.url, date=visit_date, type="git",)] | |||||
)[0] | |||||
archive_data.snapshot_add([new_snapshots[i]]) | |||||
visit_status = OriginVisitStatus( | |||||
origin=new_origin.url, | |||||
visit=visit.visit, | |||||
date=visit_date + timedelta(minutes=5), | |||||
status="full" if i < 3 else "partial", | |||||
ardumont: That does not really test the second part (fallback on partial visit with a snapshot ;).
For… | |||||
Not Done Inline Actions
I mean it apparently is as you reverse it... Also, it kinda feel redundant with the ardumont: > Also, i don't know if the list is already ordered or not.
I mean it apparently is as you… | |||||
Done Inline ActionsIndeed, this is the same processing but that visit picking strategy was in place since a long time in swh-web and the origin_get_latest_visit_status function has landed recently in storage so I did not really have it in mind. I think I might be able to use it in the get_origin_visit function. anlambert: Indeed, this is the same processing but that visit picking strategy was in place since a long… | |||||
Done Inline ActionsI think the best option here is to rewrite the tests using hypothesis to avoid hardcoding visits data and mocking functions. anlambert: I think the best option here is to rewrite the tests using hypothesis to avoid hardcoding… | |||||
snapshot=new_snapshots[i].id if i > 2 else None, | |||||
) | |||||
if i > 2: | |||||
archive_data.origin_visit_status_add([visit_status]) | |||||
visits.append(visit.visit) | |||||
# should return the last visit | |||||
expected_visit = archive_data.origin_visit_get_by(new_origin.url, visits[-1]) | |||||
assert get_origin_visit((OriginInfo(url=new_origin.url))) == expected_visit |
That does not really test the second part (fallback on partial visit with a snapshot ;).
For that maybe make a first pass on your local visits list without the visit id 3.
That should then found the partial visit with id 6 (if i'm not mistaken).
Then append visit id 3 to your list.
Trigger a search again which should be the current scenario ;)
Also, i don't know if the list is already ordered or not.
What do you think?