Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/in_memory.py
Show First 20 Lines • Show All 875 Lines • ▼ Show 20 Lines | def _origin_visit_get_updated(self, origin: str, visit_id: int) -> Dict[str, Any]: | ||||
# default to the values in visit | # default to the values in visit | ||||
**visit.to_dict(), | **visit.to_dict(), | ||||
# override with the last update | # override with the last update | ||||
**visit_update.to_dict(), | **visit_update.to_dict(), | ||||
# but keep the date of the creation of the origin visit | # but keep the date of the creation of the origin visit | ||||
"date": visit.date, | "date": visit.date, | ||||
} | } | ||||
def _origin_visit_status_get_updated( | |||||
self, origin: str, visit_id: int | |||||
) -> Tuple[OriginVisit, OriginVisitStatus]: | |||||
"""Return a tuple of OriginVisit, latest associated OriginVisitStatus. | |||||
""" | |||||
assert visit_id >= 1 | |||||
visit = self._origin_visits[origin][visit_id - 1] | |||||
assert visit is not None | |||||
visit_key = (origin, visit_id) | |||||
visit_update = max(self._origin_visit_statuses[visit_key], key=lambda v: v.date) | |||||
return visit, visit_update | |||||
def origin_visit_get( | def origin_visit_get( | ||||
self, | self, | ||||
origin: str, | origin: str, | ||||
last_visit: Optional[int] = None, | last_visit: Optional[int] = None, | ||||
limit: Optional[int] = None, | limit: Optional[int] = None, | ||||
order: str = "asc", | order: str = "asc", | ||||
) -> Iterable[Dict[str, Any]]: | ) -> Iterable[Dict[str, Any]]: | ||||
order = order.lower() | order = order.lower() | ||||
▲ Show 20 Lines • Show All 96 Lines • ▼ Show 20 Lines | class InMemoryStorage: | ||||
def _select_random_origin_visit_by_type(self, type: str) -> str: | def _select_random_origin_visit_by_type(self, type: str) -> str: | ||||
while True: | while True: | ||||
url = random.choice(list(self._origin_visits.keys())) | url = random.choice(list(self._origin_visits.keys())) | ||||
random_origin_visits = self._origin_visits[url] | random_origin_visits = self._origin_visits[url] | ||||
if random_origin_visits[0].type == type: | if random_origin_visits[0].type == type: | ||||
return url | return url | ||||
def origin_visit_get_random(self, type: str) -> Optional[Dict[str, Any]]: | def origin_visit_get_random( | ||||
self, type: str | |||||
) -> Optional[Tuple[OriginVisit, OriginVisitStatus]]: | |||||
url = self._select_random_origin_visit_by_type(type) | url = self._select_random_origin_visit_by_type(type) | ||||
random_origin_visits = copy.deepcopy(self._origin_visits[url]) | random_origin_visits = copy.deepcopy(self._origin_visits[url]) | ||||
random_origin_visits.reverse() | random_origin_visits.reverse() | ||||
back_in_the_day = now() - timedelta(weeks=12) # 3 months back | back_in_the_day = now() - timedelta(weeks=12) # 3 months back | ||||
# This should be enough for tests | # This should be enough for tests | ||||
for visit in random_origin_visits: | for visit in random_origin_visits: | ||||
updated_visit = self._origin_visit_get_updated(url, visit.visit) | origin_visit, latest_visit_status = self._origin_visit_status_get_updated( | ||||
assert updated_visit is not None | url, visit.visit | ||||
) | |||||
assert latest_visit_status is not None | |||||
if ( | if ( | ||||
updated_visit["date"] > back_in_the_day | origin_visit.date > back_in_the_day | ||||
and updated_visit["status"] == "full" | and latest_visit_status.status == "full" | ||||
): | ): | ||||
return updated_visit | return origin_visit, latest_visit_status | ||||
else: | else: | ||||
return None | return None | ||||
def stat_counters(self): | def stat_counters(self): | ||||
keys = ( | keys = ( | ||||
"content", | "content", | ||||
"directory", | "directory", | ||||
"origin", | "origin", | ||||
▲ Show 20 Lines • Show All 209 Lines • Show Last 20 Lines |