Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/in_memory.py
Show First 20 Lines • Show All 575 Lines • ▼ Show 20 Lines | def snapshot_get_by_origin_visit(self, origin, visit): | ||||
return | return | ||||
if origin_url not in self._origins or visit > len( | if origin_url not in self._origins or visit > len( | ||||
self._origin_visits[origin_url] | self._origin_visits[origin_url] | ||||
): | ): | ||||
return None | return None | ||||
visit = self._origin_visit_get_updated(origin_url, visit) | visit = self._origin_visit_get_updated(origin_url, visit) | ||||
snapshot_id = visit.snapshot | snapshot_id = visit["snapshot"] | ||||
if snapshot_id: | if snapshot_id: | ||||
return self.snapshot_get(snapshot_id) | return self.snapshot_get(snapshot_id) | ||||
else: | else: | ||||
return None | return None | ||||
def snapshot_count_branches(self, snapshot_id): | def snapshot_count_branches(self, snapshot_id): | ||||
snapshot = self._snapshots[snapshot_id] | snapshot = self._snapshots[snapshot_id] | ||||
return collections.Counter( | return collections.Counter( | ||||
▲ Show 20 Lines • Show All 141 Lines • ▼ Show 20 Lines | ): | ||||
if with_visit: | if with_visit: | ||||
filtered_origins = [] | filtered_origins = [] | ||||
for orig in origins: | for orig in origins: | ||||
visits = ( | visits = ( | ||||
self._origin_visit_get_updated(ov.origin, ov.visit) | self._origin_visit_get_updated(ov.origin, ov.visit) | ||||
for ov in self._origin_visits[orig["url"]] | for ov in self._origin_visits[orig["url"]] | ||||
) | ) | ||||
for ov in visits: | for ov in visits: | ||||
if ov.snapshot and ov.snapshot in self._snapshots: | snapshot = ov["snapshot"] | ||||
if snapshot and snapshot in self._snapshots: | |||||
filtered_origins.append(orig) | filtered_origins.append(orig) | ||||
break | break | ||||
else: | else: | ||||
filtered_origins = origins | filtered_origins = origins | ||||
return filtered_origins[offset : offset + limit] | return filtered_origins[offset : offset + limit] | ||||
def origin_count(self, url_pattern, regexp=False, with_visit=False): | def origin_count(self, url_pattern, regexp=False, with_visit=False): | ||||
▲ Show 20 Lines • Show All 89 Lines • ▼ Show 20 Lines | ) -> None: | ||||
for visit_status in visit_statuses: | for visit_status in visit_statuses: | ||||
origin_url = self.origin_get({"url": visit_status.origin}) | origin_url = self.origin_get({"url": visit_status.origin}) | ||||
if not origin_url: | if not origin_url: | ||||
raise StorageArgumentException(f"Unknown origin {visit_status.origin}") | raise StorageArgumentException(f"Unknown origin {visit_status.origin}") | ||||
for visit_status in visit_statuses: | for visit_status in visit_statuses: | ||||
self._origin_visit_status_add_one(visit_status) | self._origin_visit_status_add_one(visit_status) | ||||
def _origin_visit_get_updated(self, origin: str, visit_id: int) -> OriginVisit: | def _origin_visit_get_updated(self, origin: str, visit_id: int) -> Dict[str, Any]: | ||||
"""Merge origin visit and latest origin visit status | """Merge origin visit and latest origin visit status | ||||
""" | """ | ||||
assert visit_id >= 1 | assert visit_id >= 1 | ||||
visit = self._origin_visits[origin][visit_id - 1] | visit = self._origin_visits[origin][visit_id - 1] | ||||
assert visit is not None | assert visit is not None | ||||
visit_key = (origin, visit_id) | visit_key = (origin, visit_id) | ||||
visit_update = max(self._origin_visit_statuses[visit_key], key=lambda v: v.date) | visit_update = max(self._origin_visit_statuses[visit_key], key=lambda v: v.date) | ||||
return OriginVisit.from_dict( | return { | ||||
{ | |||||
# default to the values in visit | # default to the values in visit | ||||
**visit.to_dict(), | **visit.to_dict(), | ||||
# override with the last update | # override with the last update | ||||
**visit_update.to_dict(), | **visit_update.to_dict(), | ||||
# but keep the date of the creation of the origin visit | # but keep the date of the creation of the origin visit | ||||
"date": visit.date, | "date": visit.date, | ||||
} | } | ||||
) | |||||
def origin_visit_get( | def origin_visit_get( | ||||
self, | self, | ||||
origin: str, | origin: str, | ||||
last_visit: Optional[int] = None, | last_visit: Optional[int] = None, | ||||
limit: Optional[int] = None, | limit: Optional[int] = None, | ||||
order: str = "asc", | order: str = "asc", | ||||
) -> Iterable[Dict[str, Any]]: | ) -> Iterable[Dict[str, Any]]: | ||||
Show All 12 Lines | ) -> Iterable[Dict[str, Any]]: | ||||
visits = visits[:limit] | visits = visits[:limit] | ||||
for visit in visits: | for visit in visits: | ||||
if not visit: | if not visit: | ||||
continue | continue | ||||
visit_id = visit.visit | visit_id = visit.visit | ||||
visit_update = self._origin_visit_get_updated(origin_url, visit_id) | visit_update = self._origin_visit_get_updated(origin_url, visit_id) | ||||
assert visit_update is not None | assert visit_update is not None | ||||
yield visit_update.to_dict() | yield visit_update | ||||
def origin_visit_find_by_date( | def origin_visit_find_by_date( | ||||
self, origin: str, visit_date: datetime.datetime | self, origin: str, visit_date: datetime.datetime | ||||
) -> Optional[Dict[str, Any]]: | ) -> Optional[Dict[str, Any]]: | ||||
origin_url = self._get_origin_url(origin) | origin_url = self._get_origin_url(origin) | ||||
if origin_url in self._origin_visits: | if origin_url in self._origin_visits: | ||||
visits = self._origin_visits[origin_url] | visits = self._origin_visits[origin_url] | ||||
visit = min(visits, key=lambda v: (abs(v.date - visit_date), -v.visit)) | visit = min(visits, key=lambda v: (abs(v.date - visit_date), -v.visit)) | ||||
visit_update = self._origin_visit_get_updated(origin, visit.visit) | visit_update = self._origin_visit_get_updated(origin, visit.visit) | ||||
assert visit_update is not None | assert visit_update is not None | ||||
return visit_update.to_dict() | return visit_update | ||||
return None | return None | ||||
def origin_visit_get_by(self, origin: str, visit: int) -> Optional[Dict[str, Any]]: | def origin_visit_get_by(self, origin: str, visit: int) -> Optional[Dict[str, Any]]: | ||||
origin_url = self._get_origin_url(origin) | origin_url = self._get_origin_url(origin) | ||||
if origin_url in self._origin_visits and visit <= len( | if origin_url in self._origin_visits and visit <= len( | ||||
self._origin_visits[origin_url] | self._origin_visits[origin_url] | ||||
): | ): | ||||
visit_update = self._origin_visit_get_updated(origin_url, visit) | visit_update = self._origin_visit_get_updated(origin_url, visit) | ||||
assert visit_update is not None | assert visit_update is not None | ||||
return visit_update.to_dict() | return visit_update | ||||
return None | return None | ||||
def origin_visit_get_latest( | def origin_visit_get_latest( | ||||
self, | self, | ||||
origin: str, | origin: str, | ||||
type: Optional[str] = None, | type: Optional[str] = None, | ||||
allowed_statuses: Optional[List[str]] = None, | allowed_statuses: Optional[List[str]] = None, | ||||
require_snapshot: bool = False, | require_snapshot: bool = False, | ||||
) -> Optional[Dict[str, Any]]: | ) -> Optional[Dict[str, Any]]: | ||||
ori = self._origins.get(origin) | ori = self._origins.get(origin) | ||||
if not ori: | if not ori: | ||||
return None | return None | ||||
visits = self._origin_visits[ori.url] | visits = self._origin_visits[ori.url] | ||||
visits = [ | visits = [ | ||||
self._origin_visit_get_updated(visit.origin, visit.visit) | self._origin_visit_get_updated(visit.origin, visit.visit) | ||||
for visit in visits | for visit in visits | ||||
if visit is not None | if visit is not None | ||||
] | ] | ||||
if type is not None: | if type is not None: | ||||
visits = [visit for visit in visits if visit.type == type] | visits = [visit for visit in visits if visit["type"] == type] | ||||
if allowed_statuses is not None: | if allowed_statuses is not None: | ||||
visits = [visit for visit in visits if visit.status in allowed_statuses] | visits = [visit for visit in visits if visit["status"] in allowed_statuses] | ||||
if require_snapshot: | if require_snapshot: | ||||
visits = [visit for visit in visits if visit.snapshot] | visits = [visit for visit in visits if visit["snapshot"]] | ||||
visit = max(visits, key=lambda v: (v.date, v.visit), default=None) | visit = max(visits, key=lambda v: (v["date"], v["visit"]), default=None) | ||||
if visit is None: | if visit is None: | ||||
return None | return None | ||||
return visit.to_dict() | return visit | ||||
def origin_visit_status_get_latest( | def origin_visit_status_get_latest( | ||||
self, | self, | ||||
origin_url: str, | origin_url: str, | ||||
visit: int, | visit: int, | ||||
allowed_statuses: Optional[List[str]] = None, | allowed_statuses: Optional[List[str]] = None, | ||||
require_snapshot: bool = False, | require_snapshot: bool = False, | ||||
) -> Optional[OriginVisitStatus]: | ) -> Optional[OriginVisitStatus]: | ||||
Show All 25 Lines | def origin_visit_get_random(self, type: str) -> Optional[Dict[str, Any]]: | ||||
url = self._select_random_origin_visit_by_type(type) | url = self._select_random_origin_visit_by_type(type) | ||||
random_origin_visits = copy.deepcopy(self._origin_visits[url]) | random_origin_visits = copy.deepcopy(self._origin_visits[url]) | ||||
random_origin_visits.reverse() | random_origin_visits.reverse() | ||||
back_in_the_day = now() - timedelta(weeks=12) # 3 months back | back_in_the_day = now() - timedelta(weeks=12) # 3 months back | ||||
# This should be enough for tests | # This should be enough for tests | ||||
for visit in random_origin_visits: | for visit in random_origin_visits: | ||||
updated_visit = self._origin_visit_get_updated(url, visit.visit) | updated_visit = self._origin_visit_get_updated(url, visit.visit) | ||||
assert updated_visit is not None | assert updated_visit is not None | ||||
if updated_visit.date > back_in_the_day and updated_visit.status == "full": | if ( | ||||
return updated_visit.to_dict() | updated_visit["date"] > back_in_the_day | ||||
and updated_visit["status"] == "full" | |||||
): | |||||
return updated_visit | |||||
else: | else: | ||||
return None | return None | ||||
def stat_counters(self): | def stat_counters(self): | ||||
keys = ( | keys = ( | ||||
"content", | "content", | ||||
"directory", | "directory", | ||||
"origin", | "origin", | ||||
▲ Show 20 Lines • Show All 291 Lines • Show Last 20 Lines |