Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/in_memory.py
Show First 20 Lines • Show All 786 Lines • ▼ Show 20 Lines | def snapshot_get_latest(self, origin, allowed_statuses=None): | ||||
given origin, optionally only from visits that have one of the given | given origin, optionally only from visits that have one of the given | ||||
allowed_statuses | allowed_statuses | ||||
The branches of the snapshot are iterated in the lexicographical | The branches of the snapshot are iterated in the lexicographical | ||||
order of their names. | order of their names. | ||||
.. warning:: At most 1000 branches contained in the snapshot will be | .. warning:: At most 1000 branches contained in the snapshot will be | ||||
returned for performance reasons. In order to browse the whole | returned for performance reasons. In order to browse the whole | ||||
set of branches, the method :meth:`snapshot_get_branches` | set of branches, the methods :meth:`origin_visit_get_latest` | ||||
should be used instead. | and :meth:`snapshot_get_branches` should be used instead. | ||||
Args: | Args: | ||||
origin (Union[str,int]): the origin's URL or identifier | origin (Union[str,int]): the origin's URL or identifier | ||||
allowed_statuses (list of str): list of visit statuses considered | allowed_statuses (list of str): list of visit statuses considered | ||||
to find the latest snapshot for the visit. For instance, | to find the latest snapshot for the origin. For instance, | ||||
``allowed_statuses=['full']`` will only consider visits that | ``allowed_statuses=['full']`` will only consider visits that | ||||
have successfully run to completion. | have successfully run to completion. | ||||
Returns: | Returns: | ||||
dict: a dict with three keys: | dict: a dict with three keys: | ||||
* **id**: identifier of the snapshot | * **id**: identifier of the snapshot | ||||
* **branches**: a dict of branches contained in the snapshot | * **branches**: a dict of branches contained in the snapshot | ||||
whose keys are the branches' names. | whose keys are the branches' names. | ||||
* **next_branch**: the name of the first branch not returned | * **next_branch**: the name of the first branch not returned | ||||
or :const:`None` if the snapshot has less than 1000 | or :const:`None` if the snapshot has less than 1000 | ||||
branches. | branches. | ||||
""" | """ | ||||
if isinstance(origin, str): | if isinstance(origin, int): | ||||
origin = self.origin_get({'url': origin})['id'] | origin = self.origin_get({'id': origin})['url'] | ||||
visits = self._origin_visits[origin-1] | |||||
if allowed_statuses is not None: | |||||
visits = [visit for visit in visits | |||||
if visit['status'] in allowed_statuses] | |||||
snapshot = None | |||||
for visit in sorted(visits, key=lambda v: (v['date'], v['visit']), | |||||
reverse=True): | |||||
snapshot_id = visit['snapshot'] | |||||
snapshot = self.snapshot_get(snapshot_id) | |||||
if snapshot: | |||||
break | |||||
return snapshot | visit = self.origin_visit_get_latest( | ||||
origin, allowed_statuses=allowed_statuses, require_snapshot=True) | |||||
if visit and visit['snapshot']: | |||||
return self.snapshot_get(visit['snapshot']) | |||||
def snapshot_count_branches(self, snapshot_id, db=None, cur=None): | def snapshot_count_branches(self, snapshot_id, db=None, cur=None): | ||||
"""Count the number of branches in the snapshot with the given id | """Count the number of branches in the snapshot with the given id | ||||
Args: | Args: | ||||
snapshot_id (bytes): identifier of the snapshot | snapshot_id (bytes): identifier of the snapshot | ||||
Returns: | Returns: | ||||
▲ Show 20 Lines • Show All 482 Lines • ▼ Show 20 Lines | def origin_visit_get_by(self, origin, visit): | ||||
Args: | Args: | ||||
origin (int): the origin's identifier | origin (int): the origin's identifier | ||||
Returns: | Returns: | ||||
The information on that particular (origin, visit) or None if | The information on that particular (origin, visit) or None if | ||||
it does not exist | it does not exist | ||||
""" | """ | ||||
if isinstance(origin, str): | |||||
origin = self.origin_get({'url': origin})['id'] | |||||
origin_visit = None | origin_visit = None | ||||
if origin <= len(self._origin_visits) and \ | if origin <= len(self._origin_visits) and \ | ||||
visit <= len(self._origin_visits[origin-1]): | visit <= len(self._origin_visits[origin-1]): | ||||
origin_visit = self._origin_visits[origin-1][visit-1] | origin_visit = self._origin_visits[origin-1][visit-1] | ||||
return copy.deepcopy(origin_visit) | return copy.deepcopy(origin_visit) | ||||
def origin_visit_get_latest( | |||||
self, origin, allowed_statuses=None, require_snapshot=False): | |||||
"""Get the latest origin visit for the given origin, optionally | |||||
looking only for those with one of the given allowed_statuses | |||||
or for those with a known snapshot. | |||||
Args: | |||||
origin (str): the origin's URL | |||||
allowed_statuses (list of str): list of visit statuses considered | |||||
to find the latest visit. For instance, | |||||
``allowed_statuses=['full']`` will only consider visits that | |||||
have successfully run to completion. | |||||
require_snapshot (bool): If True, only a visit with a snapshot | |||||
will be returned. | |||||
Returns: | |||||
dict: a dict with the following keys: | |||||
origin: the URL of the origin | |||||
visit: origin visit id | |||||
type: type of loader used for the visit | |||||
date: timestamp of such visit | |||||
status: Visit's new status | |||||
metadata: Data associated to the visit | |||||
snapshot (Optional[sha1_git]): identifier of the snapshot | |||||
associated to the visit | |||||
""" | |||||
origin = self.origin_get({'url': origin})['id'] | |||||
visits = self._origin_visits[origin-1] | |||||
if allowed_statuses is not None: | |||||
visits = [visit for visit in visits | |||||
if visit['status'] in allowed_statuses] | |||||
if require_snapshot: | |||||
visits = [visit for visit in visits | |||||
if visit['snapshot'] | |||||
and visit['snapshot'] in self._snapshots] | |||||
return max(visits, key=lambda v: (v['date'], v['visit']), default=None) | |||||
def person_get(self, person): | def person_get(self, person): | ||||
"""Return the persons identified by their ids. | """Return the persons identified by their ids. | ||||
Args: | Args: | ||||
person: array of ids. | person: array of ids. | ||||
Returns: | Returns: | ||||
The array of persons corresponding of the ids. | The array of persons corresponding of the ids. | ||||
▲ Show 20 Lines • Show All 232 Lines • Show Last 20 Lines |