Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/algos/origin.py
# Copyright (C) 2019-2020 The Software Heritage developers | # Copyright (C) 2019-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from typing import Optional, Iterable, Tuple | from typing import Any, Dict, Optional, Iterable, Tuple | ||||
from swh.model.model import OriginVisit, OriginVisitStatus | from swh.model.model import OriginVisit, OriginVisitStatus | ||||
def iter_origins(storage, origin_from=1, origin_to=None, batch_size=10000): | def iter_origins(storage, origin_from=1, origin_to=None, batch_size=10000): | ||||
"""Iterates over all origins in the storage. | """Iterates over all origins in the storage. | ||||
Args: | Args: | ||||
storage: the storage object used for queries. | storage: the storage object used for queries. | ||||
Show All 25 Lines | |||||
def origin_get_latest_visit_status( | def origin_get_latest_visit_status( | ||||
storage, | storage, | ||||
origin_url: str, | origin_url: str, | ||||
type: Optional[str] = None, | type: Optional[str] = None, | ||||
allowed_statuses: Optional[Iterable[str]] = None, | allowed_statuses: Optional[Iterable[str]] = None, | ||||
require_snapshot: bool = False, | require_snapshot: bool = False, | ||||
) -> Optional[Tuple[OriginVisit, OriginVisitStatus]]: | ) -> Optional[Tuple[OriginVisit, OriginVisitStatus]]: | ||||
"""Get the latest origin visit and visit status information for a given origin, | """Get the latest origin visit (and status) of an origin. Optionally, a combination of | ||||
optionally looking only for those with one of the given allowed_statuses or for | criteria can be provided, origin type, allowed statuses or if a visit has a | ||||
those with a snapshot. | snapshot. | ||||
If nothing matches the criteria, this returns None. | If no visit matching the criteria is found, returns None. Otherwise, returns a tuple | ||||
of origin visit, origin visit status. | |||||
Args: | Args: | ||||
storage: A storage backend | storage: A storage backend | ||||
origin: origin URL | origin: origin URL | ||||
type: Optional visit type to filter on (e.g git, tar, dsc, svn, | type: Optional visit type to filter on (e.g git, tar, dsc, svn, | ||||
hg, npm, pypi, ...) | hg, npm, pypi, ...) | ||||
allowed_statuses: list of visit statuses considered | allowed_statuses: list of visit statuses considered | ||||
to find the latest visit. For instance, | to find the latest visit. For instance, | ||||
``allowed_statuses=['full']`` will only consider visits that | ``allowed_statuses=['full']`` will only consider visits that | ||||
have successfully run to completion. | have successfully run to completion. | ||||
require_snapshot: If True, only a visit with a snapshot | require_snapshot: If True, only a visit with a snapshot | ||||
will be returned. | will be returned. | ||||
Returns: | Returns: | ||||
a tuple of (visit, visit_status) model object if the visit *and* the visit | a tuple of (visit, visit_status) model object if the visit *and* the visit | ||||
status exist, None otherwise. | status exist (and match the search criteria), None otherwise. | ||||
""" | """ | ||||
visit_d = storage.origin_visit_get_latest(origin_url, type=type) | # visits order are from older visit to most recent. | ||||
if not visit_d: | visits = list(storage.origin_visit_get(origin_url)) | ||||
visits.reverse() | |||||
if not visits: | |||||
return None | return None | ||||
visit = OriginVisit.from_dict(visit_d) | visit_status: Optional[OriginVisitStatus] = None | ||||
visit: Dict[str, Any] | |||||
# Iterate over the visits in reverse order, so the most recent match is found first | |||||
for visit in visits: | |||||
if type is not None and visit["type"] != type: | |||||
continue | |||||
visit_status = storage.origin_visit_status_get_latest( | visit_status = storage.origin_visit_status_get_latest( | ||||
origin_url, | origin_url, | ||||
visit.visit, | visit["visit"], | ||||
allowed_statuses=allowed_statuses, | allowed_statuses=allowed_statuses, | ||||
require_snapshot=require_snapshot, | require_snapshot=require_snapshot, | ||||
) | ) | ||||
if not visit_status: | if visit_status is not None: | ||||
break | |||||
if visit_status is None: | |||||
return None | return None | ||||
return (visit, visit_status) | return (OriginVisit.from_dict(visit), visit_status) |