Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/in_memory.py
Show First 20 Lines • Show All 1,086 Lines • ▼ Show 20 Lines | def origin_get_by_sha1(self, sha1s): | ||||
by :meth:`swh.storage.in_memory.Storage.origin_get`, or None if an | by :meth:`swh.storage.in_memory.Storage.origin_get`, or None if an | ||||
origin matching the sha1 is not found. | origin matching the sha1 is not found. | ||||
""" | """ | ||||
return [ | return [ | ||||
self._convert_origin(self._origins_by_sha1.get(sha1)) | self._convert_origin(self._origins_by_sha1.get(sha1)) | ||||
for sha1 in sha1s | for sha1 in sha1s | ||||
] | ] | ||||
def _select_random_origin_by_type(self, type: str) -> str: | |||||
"""Select randomly an origin visit """ | |||||
while True: | |||||
url = random.choice(list(self._origin_visits.keys())) | |||||
random_origin_visits = self._origin_visits[url] | |||||
if random_origin_visits[0].type == type: | |||||
return url | |||||
def origin_visit_get_random(self, type: str) -> Mapping[str, Any]: | |||||
"""Randomly select one origin with <type> whose visit was successful | |||||
in the last 3 months. | |||||
Returns: | |||||
origin dict selected randomly on the dataset | |||||
""" | |||||
random_visit: Dict[str, Any] = {} | |||||
if not self._origin_visits: # empty dataset | |||||
return random_visit | |||||
url = self._select_random_origin_by_type(type) | |||||
random_origin_visits = copy.deepcopy(self._origin_visits[url]) | |||||
random_origin_visits.reverse() | |||||
back_in_the_day = now() - timedelta(weeks=12) # 3 months back | |||||
# This should be enough for tests | |||||
for visit in random_origin_visits: | |||||
if visit.date > back_in_the_day and visit.status == 'full': | |||||
random_visit = visit.to_dict() | |||||
break | |||||
return random_visit | |||||
def origin_get_range(self, origin_from=1, origin_count=100): | def origin_get_range(self, origin_from=1, origin_count=100): | ||||
"""Retrieve ``origin_count`` origins whose ids are greater | """Retrieve ``origin_count`` origins whose ids are greater | ||||
or equal than ``origin_from``. | or equal than ``origin_from``. | ||||
Origins are sorted by id before retrieving them. | Origins are sorted by id before retrieving them. | ||||
Args: | Args: | ||||
origin_from (int): the minimum id of origins to retrieve | origin_from (int): the minimum id of origins to retrieve | ||||
▲ Show 20 Lines • Show All 369 Lines • ▼ Show 20 Lines | def origin_visit_get_latest( | ||||
if require_snapshot: | if require_snapshot: | ||||
visits = [visit for visit in visits | visits = [visit for visit in visits | ||||
if visit.snapshot] | if visit.snapshot] | ||||
visit = max( | visit = max( | ||||
visits, key=lambda v: (v.date, v.visit), default=None) | visits, key=lambda v: (v.date, v.visit), default=None) | ||||
return self._convert_visit(visit) | return self._convert_visit(visit) | ||||
def _select_random_origin_visit_by_type(self, type: str) -> str: | |||||
"""Select randomly an origin visit """ | |||||
while True: | |||||
url = random.choice(list(self._origin_visits.keys())) | |||||
random_origin_visits = self._origin_visits[url] | |||||
if random_origin_visits[0].type == type: | |||||
return url | |||||
def origin_visit_get_random(self, type: str) -> Mapping[str, Any]: | |||||
"""Randomly select one successful origin visit with <type> | |||||
made in the last 3 months. | |||||
Returns: | |||||
dict representing an origin visit, in the same format as | |||||
`origin_visit_get`. | |||||
""" | |||||
random_visit: Dict[str, Any] = {} | |||||
if not self._origin_visits: # empty dataset | |||||
return random_visit | |||||
url = self._select_random_origin_visit_by_type(type) | |||||
random_origin_visits = copy.deepcopy(self._origin_visits[url]) | |||||
random_origin_visits.reverse() | |||||
back_in_the_day = now() - timedelta(weeks=12) # 3 months back | |||||
# This should be enough for tests | |||||
for visit in random_origin_visits: | |||||
if visit.date > back_in_the_day and visit.status == 'full': | |||||
random_visit = visit.to_dict() | |||||
break | |||||
return random_visit | |||||
def stat_counters(self): | def stat_counters(self): | ||||
"""compute statistics about the number of tuples in various tables | """compute statistics about the number of tuples in various tables | ||||
Returns: | Returns: | ||||
dict: a dictionary mapping textual labels (e.g., content) to | dict: a dictionary mapping textual labels (e.g., content) to | ||||
integer values (e.g., the number of tuples in table content) | integer values (e.g., the number of tuples in table content) | ||||
""" | """ | ||||
▲ Show 20 Lines • Show All 224 Lines • Show Last 20 Lines |