Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/in_memory.py
Show First 20 Lines • Show All 856 Lines • ▼ Show 20 Lines | ) -> None: | ||||
for visit_status in visit_statuses: | for visit_status in visit_statuses: | ||||
origin_url = self.origin_get({"url": visit_status.origin}) | origin_url = self.origin_get({"url": visit_status.origin}) | ||||
if not origin_url: | if not origin_url: | ||||
raise StorageArgumentException(f"Unknown origin {visit_status.origin}") | raise StorageArgumentException(f"Unknown origin {visit_status.origin}") | ||||
for visit_status in visit_statuses: | for visit_status in visit_statuses: | ||||
self._origin_visit_status_add_one(visit_status) | self._origin_visit_status_add_one(visit_status) | ||||
def _origin_visit_get_updated(self, origin: str, visit_id: int) -> Dict[str, Any]: | def _origin_visit_status_get_latest( | ||||
"""Merge origin visit and latest origin visit status | self, origin: str, visit_id: int | ||||
) -> Tuple[OriginVisit, OriginVisitStatus]: | |||||
"""Return a tuple of OriginVisit, latest associated OriginVisitStatus. | |||||
""" | """ | ||||
assert visit_id >= 1 | assert visit_id >= 1 | ||||
visit = self._origin_visits[origin][visit_id - 1] | visit = self._origin_visits[origin][visit_id - 1] | ||||
assert visit is not None | assert visit is not None | ||||
visit_key = (origin, visit_id) | visit_key = (origin, visit_id) | ||||
visit_update = max(self._origin_visit_statuses[visit_key], key=lambda v: v.date) | visit_update = max(self._origin_visit_statuses[visit_key], key=lambda v: v.date) | ||||
return visit, visit_update | |||||
def _origin_visit_get_updated(self, origin: str, visit_id: int) -> Dict[str, Any]: | |||||
"""Merge origin visit and latest origin visit status | |||||
""" | |||||
visit, visit_update = self._origin_visit_status_get_latest(origin, visit_id) | |||||
assert visit is not None and visit_update is not None | |||||
return { | return { | ||||
# default to the values in visit | # default to the values in visit | ||||
**visit.to_dict(), | **visit.to_dict(), | ||||
# override with the last update | # override with the last update | ||||
**visit_update.to_dict(), | **visit_update.to_dict(), | ||||
# but keep the date of the creation of the origin visit | # but keep the date of the creation of the origin visit | ||||
"date": visit.date, | "date": visit.date, | ||||
} | } | ||||
▲ Show 20 Lines • Show All 105 Lines • ▼ Show 20 Lines | class InMemoryStorage: | ||||
def _select_random_origin_visit_by_type(self, type: str) -> str: | def _select_random_origin_visit_by_type(self, type: str) -> str: | ||||
while True: | while True: | ||||
url = random.choice(list(self._origin_visits.keys())) | url = random.choice(list(self._origin_visits.keys())) | ||||
random_origin_visits = self._origin_visits[url] | random_origin_visits = self._origin_visits[url] | ||||
if random_origin_visits[0].type == type: | if random_origin_visits[0].type == type: | ||||
return url | return url | ||||
def origin_visit_get_random(self, type: str) -> Optional[Dict[str, Any]]: | def origin_visit_get_random( | ||||
self, type: str | |||||
) -> Optional[Tuple[OriginVisit, OriginVisitStatus]]: | |||||
url = self._select_random_origin_visit_by_type(type) | url = self._select_random_origin_visit_by_type(type) | ||||
random_origin_visits = copy.deepcopy(self._origin_visits[url]) | random_origin_visits = copy.deepcopy(self._origin_visits[url]) | ||||
random_origin_visits.reverse() | random_origin_visits.reverse() | ||||
back_in_the_day = now() - timedelta(weeks=12) # 3 months back | back_in_the_day = now() - timedelta(weeks=12) # 3 months back | ||||
# This should be enough for tests | # This should be enough for tests | ||||
for visit in random_origin_visits: | for visit in random_origin_visits: | ||||
updated_visit = self._origin_visit_get_updated(url, visit.visit) | origin_visit, latest_visit_status = self._origin_visit_status_get_latest( | ||||
assert updated_visit is not None | url, visit.visit | ||||
) | |||||
assert latest_visit_status is not None | |||||
if ( | if ( | ||||
updated_visit["date"] > back_in_the_day | origin_visit.date > back_in_the_day | ||||
and updated_visit["status"] == "full" | and latest_visit_status.status == "full" | ||||
): | ): | ||||
return updated_visit | return origin_visit, latest_visit_status | ||||
else: | else: | ||||
return None | return None | ||||
def stat_counters(self): | def stat_counters(self): | ||||
keys = ( | keys = ( | ||||
"content", | "content", | ||||
"directory", | "directory", | ||||
"origin", | "origin", | ||||
▲ Show 20 Lines • Show All 209 Lines • Show Last 20 Lines |