Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/algos/origin.py
# Copyright (C) 2019-2020 The Software Heritage developers | # Copyright (C) 2019-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from typing import Any, Dict, Optional, Iterable, Iterator, Tuple | from typing import Optional, Iterable, Iterator, Tuple | ||||
from swh.model.model import Origin, OriginVisit, OriginVisitStatus | from swh.model.model import Origin, OriginVisit, OriginVisitStatus | ||||
from swh.storage.interface import StorageInterface | from swh.storage.interface import StorageInterface | ||||
def iter_origins( | def iter_origins( | ||||
storage: StorageInterface, | storage: StorageInterface, | ||||
origin_from: int = 1, | origin_from: int = 1, | ||||
Show All 27 Lines | while True: | ||||
for origin in origins: | for origin in origins: | ||||
del origin["id"] | del origin["id"] | ||||
yield Origin.from_dict(origin) | yield Origin.from_dict(origin) | ||||
if origin_to and start > origin_to: | if origin_to and start > origin_to: | ||||
break | break | ||||
def origin_get_latest_visit_status( | def origin_get_latest_visit_status( | ||||
storage, | storage: StorageInterface, | ||||
origin_url: str, | origin_url: str, | ||||
type: Optional[str] = None, | type: Optional[str] = None, | ||||
allowed_statuses: Optional[Iterable[str]] = None, | allowed_statuses: Optional[Iterable[str]] = None, | ||||
require_snapshot: bool = False, | require_snapshot: bool = False, | ||||
) -> Optional[Tuple[OriginVisit, OriginVisitStatus]]: | ) -> Optional[Tuple[OriginVisit, OriginVisitStatus]]: | ||||
"""Get the latest origin visit (and status) of an origin. Optionally, a combination of | """Get the latest origin visit (and status) of an origin. Optionally, a combination of | ||||
criteria can be provided, origin type, allowed statuses or if a visit has a | criteria can be provided, origin type, allowed statuses or if a visit has a | ||||
snapshot. | snapshot. | ||||
Show All 13 Lines | Args: | ||||
require_snapshot: If True, only a visit with a snapshot | require_snapshot: If True, only a visit with a snapshot | ||||
will be returned. | will be returned. | ||||
Returns: | Returns: | ||||
a tuple of (visit, visit_status) model object if the visit *and* the visit | a tuple of (visit, visit_status) model object if the visit *and* the visit | ||||
status exist (and match the search criteria), None otherwise. | status exist (and match the search criteria), None otherwise. | ||||
""" | """ | ||||
last_visit = None | visit = storage.origin_visit_get_latest( | ||||
while True: | origin_url, | ||||
visits = list( | type=type, | ||||
storage.origin_visit_get( | allowed_statuses=allowed_statuses, | ||||
origin_url, last_visit=last_visit, order="desc", limit=10, | require_snapshot=require_snapshot, | ||||
) | |||||
) | ) | ||||
if not visits: | result: Optional[Tuple[OriginVisit, OriginVisitStatus]] = None | ||||
return None | if visit: | ||||
last_visit = visits[-1]["visit"] | |||||
visit_status: Optional[OriginVisitStatus] = None | |||||
visit: Dict[str, Any] | |||||
for visit in visits: | |||||
if type is not None and visit["type"] != type: | |||||
continue | |||||
visit_status = storage.origin_visit_status_get_latest( | visit_status = storage.origin_visit_status_get_latest( | ||||
origin_url, | origin_url, | ||||
visit["visit"], | visit.visit, | ||||
allowed_statuses=allowed_statuses, | allowed_statuses=allowed_statuses, | ||||
require_snapshot=require_snapshot, | require_snapshot=require_snapshot, | ||||
) | ) | ||||
if visit_status is not None: | if visit_status: | ||||
# storage api gives us too many data which no longer map to an | result = visit, visit_status | ||||
# origin-visit, so we drop those | return result | ||||
for key in ["metadata", "status", "snapshot"]: | |||||
visit.pop(key, None) | |||||
return (OriginVisit.from_dict(visit), visit_status) |