Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/storage.py
Show First 20 Lines • Show All 869 Lines • ▼ Show 20 Lines | class Storage: | ||||
) -> Optional[OriginVisitStatus]: | ) -> Optional[OriginVisitStatus]: | ||||
row = db.origin_visit_status_get_latest( | row = db.origin_visit_status_get_latest( | ||||
origin_url, visit, allowed_statuses, require_snapshot, cur=cur | origin_url, visit, allowed_statuses, require_snapshot, cur=cur | ||||
) | ) | ||||
if not row: | if not row: | ||||
return None | return None | ||||
return OriginVisitStatus.from_dict(row) | return OriginVisitStatus.from_dict(row) | ||||
def _origin_visit_apply_update( | |||||
self, visit: Dict[str, Any], db, cur=None | |||||
) -> Dict[str, Any]: | |||||
"""Retrieve the latest visit status information for the origin visit. | |||||
Then merge it with the visit and return it. | |||||
""" | |||||
visit_status = db.origin_visit_status_get_latest( | |||||
visit["origin"], visit["visit"], cur=cur | |||||
) | |||||
return { | |||||
# default to the values in visit | |||||
**visit, | |||||
# override with the last update | |||||
**visit_status, | |||||
# visit['origin'] is the URL (via a join), while | |||||
# visit_status['origin'] is only an id. | |||||
"origin": visit["origin"], | |||||
# but keep the date of the creation of the origin visit | |||||
"date": visit["date"], | |||||
} | |||||
@timed | @timed | ||||
@db_transaction_generator(statement_timeout=500) | @db_transaction_generator(statement_timeout=500) | ||||
def origin_visit_get( | def origin_visit_get( | ||||
self, | self, | ||||
origin: str, | origin: str, | ||||
last_visit: Optional[int] = None, | last_visit: Optional[int] = None, | ||||
limit: Optional[int] = None, | limit: Optional[int] = None, | ||||
order: str = "asc", | order: str = "asc", | ||||
db=None, | db=None, | ||||
cur=None, | cur=None, | ||||
) -> Iterable[Dict[str, Any]]: | ) -> Iterable[Dict[str, Any]]: | ||||
assert order in ["asc", "desc"] | assert order in ["asc", "desc"] | ||||
lines = db.origin_visit_get_all( | lines = db.origin_visit_get_all( | ||||
origin, last_visit=last_visit, limit=limit, order=order, cur=cur | origin, last_visit=last_visit, limit=limit, order=order, cur=cur | ||||
) | ) | ||||
for line in lines: | for line in lines: | ||||
visit = dict(zip(db.origin_visit_get_cols, line)) | yield dict(zip(db.origin_visit_get_cols, line)) | ||||
yield self._origin_visit_apply_update(visit, db) | |||||
@timed | @timed | ||||
@db_transaction(statement_timeout=500) | @db_transaction(statement_timeout=500) | ||||
def origin_visit_find_by_date( | def origin_visit_find_by_date( | ||||
self, origin: str, visit_date: datetime.datetime, db=None, cur=None | self, origin: str, visit_date: datetime.datetime, db=None, cur=None | ||||
) -> Optional[Dict[str, Any]]: | ) -> Optional[Dict[str, Any]]: | ||||
visit = db.origin_visit_find_by_date(origin, visit_date, cur=cur) | return db.origin_visit_find_by_date(origin, visit_date, cur=cur) | ||||
if visit: | |||||
return self._origin_visit_apply_update(visit, db) | |||||
return None | |||||
@timed | @timed | ||||
@db_transaction(statement_timeout=500) | @db_transaction(statement_timeout=500) | ||||
def origin_visit_get_by( | def origin_visit_get_by( | ||||
self, origin: str, visit: int, db=None, cur=None | self, origin: str, visit: int, db=None, cur=None | ||||
) -> Optional[Dict[str, Any]]: | ) -> Optional[Dict[str, Any]]: | ||||
row = db.origin_visit_get(origin, visit, cur) | row = db.origin_visit_get(origin, visit, cur) | ||||
if row: | if row: | ||||
visit_dict = dict(zip(db.origin_visit_get_cols, row)) | return dict(zip(db.origin_visit_get_cols, row)) | ||||
return self._origin_visit_apply_update(visit_dict, db) | |||||
return None | return None | ||||
@timed | @timed | ||||
@db_transaction(statement_timeout=4000) | @db_transaction(statement_timeout=4000) | ||||
def origin_visit_get_latest( | def origin_visit_get_latest( | ||||
self, | self, | ||||
origin: str, | origin: str, | ||||
type: Optional[str] = None, | type: Optional[str] = None, | ||||
allowed_statuses: Optional[List[str]] = None, | allowed_statuses: Optional[List[str]] = None, | ||||
require_snapshot: bool = False, | require_snapshot: bool = False, | ||||
db=None, | db=None, | ||||
cur=None, | cur=None, | ||||
) -> Optional[Dict[str, Any]]: | ) -> Optional[Dict[str, Any]]: | ||||
row = db.origin_visit_get_latest( | row = db.origin_visit_get_latest( | ||||
origin, | origin, | ||||
type=type, | type=type, | ||||
allowed_statuses=allowed_statuses, | allowed_statuses=allowed_statuses, | ||||
require_snapshot=require_snapshot, | require_snapshot=require_snapshot, | ||||
cur=cur, | cur=cur, | ||||
) | ) | ||||
if row: | if row: | ||||
visit = dict(zip(db.origin_visit_get_cols, row)) | return dict(zip(db.origin_visit_get_cols, row)) | ||||
return self._origin_visit_apply_update(visit, db) | |||||
return None | return None | ||||
@timed | @timed | ||||
@db_transaction() | @db_transaction() | ||||
def origin_visit_get_random( | def origin_visit_get_random( | ||||
self, type: str, db=None, cur=None | self, type: str, db=None, cur=None | ||||
) -> Optional[Dict[str, Any]]: | ) -> Optional[Dict[str, Any]]: | ||||
row = db.origin_visit_get_random(type, cur) | row = db.origin_visit_get_random(type, cur) | ||||
if row: | if row: | ||||
visit = dict(zip(db.origin_visit_get_cols, row)) | return dict(zip(db.origin_visit_get_cols, row)) | ||||
return self._origin_visit_apply_update(visit, db) | |||||
return None | return None | ||||
@timed | @timed | ||||
@db_transaction(statement_timeout=2000) | @db_transaction(statement_timeout=2000) | ||||
def object_find_by_sha1_git(self, ids, db=None, cur=None): | def object_find_by_sha1_git(self, ids, db=None, cur=None): | ||||
ret = {id: [] for id in ids} | ret = {id: [] for id in ids} | ||||
for retval in db.object_find_by_sha1_git(ids, cur=cur): | for retval in db.object_find_by_sha1_git(ids, cur=cur): | ||||
▲ Show 20 Lines • Show All 363 Lines • Show Last 20 Lines |