diff --git a/swh/web/common/origin_visits.py b/swh/web/common/origin_visits.py --- a/swh/web/common/origin_visits.py +++ b/swh/web/common/origin_visits.py @@ -18,6 +18,9 @@ That list is put in cache in order to speedup the navigation in the swh web browse ui. + The returned visits are sorted according to their date in + ascending order. + Args: origin_info: dict describing the origin to fetch visits from @@ -85,21 +88,53 @@ snapshot_id: Optional[str] = None, ) -> OriginVisitInfo: """Function that returns information about a visit for a given origin. - The visit is retrieved from a provided timestamp. - The closest visit from that timestamp is selected. + + If a timestamp is provided, the closest visit from that + timestamp is returned. + + If a snapshot identifier is provided, the first visit with that snapshot + is returned. + + If no search hints are provided, return the most recent full visit with + a valid snapshot or the most recent partial visit with a valid snapshot + otherwise. Args: origin_info: a dict filled with origin information visit_ts: an ISO date string or Unix timestamp to parse + snapshot_id: a snapshot identifier Returns: A dict containing the visit info. + + Raises: + swh.web.common.exc.NotFoundExc: if no visit can be found """ + + if not visit_ts and not visit_id and not snapshot_id: + from swh.web.common import service + + # returns the latest full visit with a valid snapshot + visit = service.lookup_origin_visit_latest( + origin_info["url"], allowed_statuses=["full"], require_snapshot=True + ) + if not visit: + # or the latest partial visit with a valid snapshot otherwise + visit = service.lookup_origin_visit_latest( + origin_info["url"], allowed_statuses=["partial"], require_snapshot=True + ) + if visit: + return visit + else: + raise NotFoundExc( + f"No valid visit for origin with url {origin_info['url']} found!" + ) + visits = get_origin_visits(origin_info) if not visits: raise NotFoundExc( - ("No visit associated to origin with" " url %s!" % origin_info["url"]) + f"No visits associated to origin with url {origin_info['url']}!" ) if snapshot_id: @@ -124,39 +159,35 @@ ) return visits[0] - if not visit_ts: - # returns the latest visit with a valid snapshot when no timestamp is provided - for v in reversed(visits): - if v["snapshot"] is not None: - return v - return visits[-1] - - target_visit_ts = math.floor(parse_timestamp(visit_ts).timestamp()) - - # Find the visit with date closest to the target (in absolute value) - (abs_time_delta, visit_idx) = min( - ( - (math.floor(parse_timestamp(visit["date"]).timestamp()), i) - for (i, visit) in enumerate(visits) - ), - key=lambda ts_and_i: abs(ts_and_i[0] - target_visit_ts), - ) - - if visit_idx is not None: - visit = visits[visit_idx] - # If multiple visits have the same date, select the one with - # the largest id. - while ( - visit_idx < len(visits) - 1 - and visit["date"] == visits[visit_idx + 1]["date"] - ): - visit_idx = visit_idx + 1 - visit = visits[visit_idx] - return visit - else: - raise NotFoundExc( + if visit_ts: + + target_visit_ts = math.floor(parse_timestamp(visit_ts).timestamp()) + + # Find the visit with date closest to the target (in absolute value) + (abs_time_delta, visit_idx) = min( ( - "Visit with timestamp %s for origin with " - "url %s not found!" % (visit_ts, origin_info["url"]) - ) + (math.floor(parse_timestamp(visit["date"]).timestamp()), i) + for (i, visit) in enumerate(visits) + ), + key=lambda ts_and_i: abs(ts_and_i[0] - target_visit_ts), ) + + if visit_idx is not None: + visit = visits[visit_idx] + # If multiple visits have the same date, select the one with + # the largest id. + while ( + visit_idx < len(visits) - 1 + and visit["date"] == visits[visit_idx + 1]["date"] + ): + visit_idx = visit_idx + 1 + visit = visits[visit_idx] + return visit + else: + raise NotFoundExc( + ( + "Visit with timestamp %s for origin with " + "url %s not found!" % (visit_ts, origin_info["url"]) + ) + ) + return visits[-1] diff --git a/swh/web/common/service.py b/swh/web/common/service.py --- a/swh/web/common/service.py +++ b/swh/web/common/service.py @@ -903,21 +903,34 @@ def lookup_origin_visit_latest( - origin_url: str, require_snapshot: bool + origin_url: str, + require_snapshot: bool = False, + type: Optional[str] = None, + allowed_statuses: Optional[Iterable[str]] = None, ) -> Optional[OriginVisitInfo]: """Return the origin's latest visit Args: - origin_url (str): origin to list visits for - require_snapshot (bool): filter out origins without a snapshot + origin_url: origin to list visits for + type: Optional visit type to filter on (e.g git, tar, dsc, svn, + hg, npm, pypi, ...) + allowed_statuses: list of visit statuses considered + to find the latest visit. For instance, + ``allowed_statuses=['full']`` will only consider visits that + have successfully run to completion. + require_snapshot: filter out origins without a snapshot Returns: - The origin_visit as dict if found + The origin visit info as dict if found """ visit_and_status = origin_get_latest_visit_status( - storage, origin_url, require_snapshot=require_snapshot + storage, + origin_url, + type=type, + allowed_statuses=allowed_statuses, + require_snapshot=require_snapshot, ) return ( converters.from_origin_visit( diff --git a/swh/web/tests/api/views/test_origin.py b/swh/web/tests/api/views/test_origin.py --- a/swh/web/tests/api/views/test_origin.py +++ b/swh/web/tests/api/views/test_origin.py @@ -205,7 +205,7 @@ visit_status = OriginVisitStatus( origin=new_origin.url, visit=origin_visit.visit, - date=now(), + date=visit_date, status="full", snapshot=new_snapshots[i].id, ) diff --git a/swh/web/tests/common/test_origin_visits.py b/swh/web/tests/common/test_origin_visits.py --- a/swh/web/tests/common/test_origin_visits.py +++ b/swh/web/tests/common/test_origin_visits.py @@ -3,14 +3,19 @@ # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information +from datetime import timedelta + from hypothesis import given import pytest from swh.model.hashutil import hash_to_hex +from swh.model.model import OriginVisit, OriginVisitStatus +from swh.storage.utils import now from swh.web.common.exc import NotFoundExc from swh.web.common.origin_visits import get_origin_visits, get_origin_visit -from swh.web.tests.strategies import new_snapshots +from swh.web.common.typing import OriginInfo +from swh.web.tests.strategies import new_origin, new_snapshots @given(new_snapshots(3)) @@ -140,57 +145,94 @@ assert visit == visits[-1] -def test_get_origin_visit_latest_valid_snapshot(mocker): - mock_origin_visits = mocker.patch("swh.web.common.origin_visits.get_origin_visits") - origin_info = { - "url": "https://nix-community.github.io/nixpkgs-swh/sources-unstable.json", - } - visits = [ - { - "date": "2020-04-15T12:42:52.936520+00:00", - "origin": origin_info["url"], - "snapshot": "d820451681c74eec63693b6ea4e4b8417c76bb7a", - "status": "partial", - "type": "nixguix", - "visit": 16, - }, - { - "date": "2020-04-17T17:25:13.738789+00:00", - "origin": origin_info["url"], - "snapshot": "d20627c1ae2b5e553e8adcf625f37e37cc5190dd", - "status": "partial", - "type": "nixguix", - "visit": 17, - }, - { - "date": "2020-04-19T19:02:42.906079+00:00", - "origin": origin_info["url"], - "snapshot": None, - "status": "partial", - "type": "nixguix", - "visit": 18, - }, - { - "date": "2020-04-20T12:43:41.120422+00:00", - "origin": origin_info["url"], - "snapshot": None, - "status": "partial", - "type": "nixguix", - "visit": 19, - }, - { - "date": "2020-04-20T12:46:40.255418+00:00", - "origin": origin_info["url"], - "snapshot": None, - "status": "partial", - "type": "nixguix", - "visit": 20, - }, - ] - - mock_origin_visits.return_value = visits - - visit = get_origin_visit(origin_info) - - assert visit["snapshot"] is not None - assert visit["visit"] == 17 +@given(new_origin(), new_snapshots(6)) +def test_get_origin_visit_return_first_valid_full_visit( + archive_data, new_origin, new_snapshots +): + visits = [] + archive_data.origin_add([new_origin]) + # create 6 visits, the first three have full status while the + # last three have partial status and set a null snapshot for + # the last four visits + for i, snp in enumerate(new_snapshots): + visit_date = now() + timedelta(days=i * 10) + visit = archive_data.origin_visit_add( + [OriginVisit(origin=new_origin.url, date=visit_date, type="git",)] + )[0] + archive_data.snapshot_add([new_snapshots[i]]) + visit_status = OriginVisitStatus( + origin=new_origin.url, + visit=visit.visit, + date=visit_date + timedelta(minutes=5), + status="full" if i < 3 else "partial", + snapshot=new_snapshots[i].id if i < 2 else None, + ) + if i < 2: + archive_data.origin_visit_status_add([visit_status]) + visits.append(visit.visit) + + # should return the second visit + expected_visit = archive_data.origin_visit_get_by(new_origin.url, visits[1]) + assert get_origin_visit((OriginInfo(url=new_origin.url))) == expected_visit + + +@given(new_origin(), new_snapshots(6)) +def test_get_origin_visit_non_resolvable_snapshots( + archive_data, new_origin, new_snapshots +): + visits = [] + archive_data.origin_add([new_origin]) + # create 6 full visits, the first three have resolvable snapshots + # while the last three have non resolvable snapshots + for i, snp in enumerate(new_snapshots): + visit_date = now() + timedelta(days=i * 10) + visit = archive_data.origin_visit_add( + [OriginVisit(origin=new_origin.url, date=visit_date, type="git",)] + )[0] + archive_data.snapshot_add([new_snapshots[i]]) + visit_status = OriginVisitStatus( + origin=new_origin.url, + visit=visit.visit, + date=visit_date + timedelta(minutes=5), + status="full", + snapshot=new_snapshots[i].id, + ) + if i < 3: + archive_data.origin_visit_status_add([visit_status]) + visits.append(visit.visit) + + # should return the third visit + expected_visit = archive_data.origin_visit_get_by(new_origin.url, visits[2]) + assert get_origin_visit((OriginInfo(url=new_origin.url))) == expected_visit + + +@given(new_origin(), new_snapshots(6)) +def test_get_origin_visit_return_first_valid_partial_visit( + archive_data, new_origin, new_snapshots +): + visits = [] + + archive_data.origin_add([new_origin]) + # create 6 visits, the first three have full status but null snapshot + # while the last three have partial status with valid snapshot + for i, snp in enumerate(new_snapshots): + visit_date = now() + timedelta(days=i * 10) + visit = archive_data.origin_visit_add( + [OriginVisit(origin=new_origin.url, date=visit_date, type="git",)] + )[0] + archive_data.snapshot_add([new_snapshots[i]]) + visit_status = OriginVisitStatus( + origin=new_origin.url, + visit=visit.visit, + date=visit_date + timedelta(minutes=5), + status="full" if i < 3 else "partial", + snapshot=new_snapshots[i].id if i > 2 else None, + ) + if i > 2: + archive_data.origin_visit_status_add([visit_status]) + + visits.append(visit.visit) + + # should return the last visit + expected_visit = archive_data.origin_visit_get_by(new_origin.url, visits[-1]) + assert get_origin_visit((OriginInfo(url=new_origin.url))) == expected_visit diff --git a/swh/web/tests/conftest.py b/swh/web/tests/conftest.py --- a/swh/web/tests/conftest.py +++ b/swh/web/tests/conftest.py @@ -232,7 +232,8 @@ def origin_visit_get_by(self, origin_url, visit_id): visit = self.storage.origin_visit_get_by(origin_url, visit_id) - return converters.from_origin_visit(visit) + visit_status = self.storage.origin_visit_status_get_latest(origin_url, visit_id) + return converters.from_origin_visit({**visit, **visit_status.to_dict()}) def origin_visit_status_get_latest( self,