Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/storage.py
Show First 20 Lines • Show All 1,057 Lines • ▼ Show 20 Lines | def snapshot_get_latest(self, origin, allowed_statuses=None, db=None, | ||||
dict: a dict with three keys: | dict: a dict with three keys: | ||||
* **id**: identifier of the snapshot | * **id**: identifier of the snapshot | ||||
* **branches**: a dict of branches contained in the snapshot | * **branches**: a dict of branches contained in the snapshot | ||||
whose keys are the branches' names. | whose keys are the branches' names. | ||||
* **next_branch**: the name of the first branch not returned | * **next_branch**: the name of the first branch not returned | ||||
or :const:`None` if the snapshot has less than 1000 | or :const:`None` if the snapshot has less than 1000 | ||||
branches. | branches. | ||||
""" | """ | ||||
if isinstance(origin, str): | if isinstance(origin, int): | ||||
origin = self.origin_get({'url': origin})['id'] | origin = self.origin_get({'id': origin}, db=db, cur=cur)['url'] | ||||
origin_visit = db.origin_visit_get_latest_snapshot( | origin_visit = self.origin_visit_get_latest( | ||||
origin, allowed_statuses=allowed_statuses, cur=cur) | origin, allowed_statuses=allowed_statuses, require_snapshot=True, | ||||
if origin_visit: | db=db, cur=cur) | ||||
origin_visit = dict(zip(db.origin_visit_get_cols, origin_visit)) | if origin_visit and origin_visit['snapshot']: | ||||
return self.snapshot_get(origin_visit['snapshot'], db=db, cur=cur) | return self.snapshot_get(origin_visit['snapshot'], db=db, cur=cur) | ||||
@db_transaction(statement_timeout=2000) | @db_transaction(statement_timeout=2000) | ||||
def snapshot_count_branches(self, snapshot_id, db=None, cur=None): | def snapshot_count_branches(self, snapshot_id, db=None, cur=None): | ||||
"""Count the number of branches in the snapshot with the given id | """Count the number of branches in the snapshot with the given id | ||||
Args: | Args: | ||||
snapshot_id (bytes): identifier of the snapshot | snapshot_id (bytes): identifier of the snapshot | ||||
▲ Show 20 Lines • Show All 242 Lines • ▼ Show 20 Lines | def origin_visit_get_by(self, origin, visit, db=None, cur=None): | ||||
Args: | Args: | ||||
origin: The occurrence's origin (identifier). | origin: The occurrence's origin (identifier). | ||||
Returns: | Returns: | ||||
The information on that particular (origin, visit) or None if | The information on that particular (origin, visit) or None if | ||||
it does not exist | it does not exist | ||||
""" | """ | ||||
if isinstance(origin, str): | |||||
origin = self.origin_get({'url': origin}, db=db, cur=cur)['id'] | |||||
ori_visit = db.origin_visit_get(origin, visit, cur) | ori_visit = db.origin_visit_get(origin, visit, cur) | ||||
if not ori_visit: | if not ori_visit: | ||||
return None | return None | ||||
return dict(zip(db.origin_visit_get_cols, ori_visit)) | return dict(zip(db.origin_visit_get_cols, ori_visit)) | ||||
@db_transaction(statement_timeout=4000) | |||||
def origin_visit_get_latest( | |||||
self, origin, allowed_statuses=None, require_snapshot=False, | |||||
db=None, cur=None): | |||||
"""Get the latest origin visit for the given origin, optionally | |||||
looking only for those with one of the given allowed_statuses | |||||
or for those with a known snapshot. | |||||
Args: | |||||
origin (str): the origin's URL | |||||
allowed_statuses (list of str): list of visit statuses considered | |||||
to find the latest visit. For instance, | |||||
``allowed_statuses=['full']`` will only consider visits that | |||||
have successfully run to completion. | |||||
require_snapshot (bool): If True, only a visit with a snapshot | |||||
will be returned. | |||||
Returns: | |||||
dict: a dict with the following keys: | |||||
origin: the URL of the origin | |||||
visit: origin visit id | |||||
type: type of loader used for the visit | |||||
date: timestamp of such visit | |||||
status: Visit's new status | |||||
metadata: Data associated to the visit | |||||
snapshot (Optional[sha1_git]): identifier of the snapshot | |||||
associated to the visit | |||||
""" | |||||
origin = self.origin_get({'url': origin}, db=db, cur=cur)['id'] | |||||
origin_visit = db.origin_visit_get_latest( | |||||
origin, allowed_statuses=allowed_statuses, | |||||
require_snapshot=require_snapshot, cur=cur) | |||||
if origin_visit: | |||||
return dict(zip(db.origin_visit_get_cols, origin_visit)) | |||||
@db_transaction(statement_timeout=2000) | @db_transaction(statement_timeout=2000) | ||||
def object_find_by_sha1_git(self, ids, db=None, cur=None): | def object_find_by_sha1_git(self, ids, db=None, cur=None): | ||||
"""Return the objects found with the given ids. | """Return the objects found with the given ids. | ||||
Args: | Args: | ||||
ids: a generator of sha1_gits | ids: a generator of sha1_gits | ||||
Returns: | Returns: | ||||
▲ Show 20 Lines • Show All 480 Lines • Show Last 20 Lines |