Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/in_memory.py
# Copyright (C) 2015-2020 The Software Heritage developers | # Copyright (C) 2015-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import re | import re | ||||
import bisect | import bisect | ||||
import dateutil | import dateutil | ||||
import collections | import collections | ||||
import copy | import copy | ||||
import datetime | import datetime | ||||
import itertools | import itertools | ||||
import random | import random | ||||
from collections import defaultdict | from collections import defaultdict | ||||
from datetime import timedelta | from datetime import timedelta | ||||
from typing import Any, Dict, List, Mapping, Optional | from typing import Any, Dict, List, Optional | ||||
import attr | import attr | ||||
from swh.model.model import ( | from swh.model.model import ( | ||||
Content, Directory, Revision, Release, Snapshot, OriginVisit, Origin, | Content, Directory, Revision, Release, Snapshot, OriginVisit, Origin, | ||||
SHA1_SIZE) | SHA1_SIZE) | ||||
from swh.model.hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, hash_to_hex | from swh.model.hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, hash_to_hex | ||||
from swh.objstorage import get_objstorage | from swh.objstorage import get_objstorage | ||||
▲ Show 20 Lines • Show All 1,566 Lines • ▼ Show 20 Lines | |||||
def _select_random_origin_visit_by_type(self, type: str) -> str: | def _select_random_origin_visit_by_type(self, type: str) -> str: | ||||
"""Select randomly an origin visit """ | """Select randomly an origin visit """ | ||||
while True: | while True: | ||||
url = random.choice(list(self._origin_visits.keys())) | url = random.choice(list(self._origin_visits.keys())) | ||||
random_origin_visits = self._origin_visits[url] | random_origin_visits = self._origin_visits[url] | ||||
if random_origin_visits[0].type == type: | if random_origin_visits[0].type == type: | ||||
return url | return url | ||||
def origin_visit_get_random(self, type: str) -> Mapping[str, Any]: | def origin_visit_get_random(self, type: str) -> Optional[Dict[str, Any]]: | ||||
"""Randomly select one successful origin visit with <type> | """Randomly select one successful origin visit with <type> | ||||
made in the last 3 months. | made in the last 3 months. | ||||
Returns: | Returns: | ||||
dict representing an origin visit, in the same format as | dict representing an origin visit, in the same format as | ||||
`origin_visit_get`. | `origin_visit_get`. | ||||
""" | """ | ||||
random_visit: Dict[str, Any] = {} | |||||
if not self._origin_visits: # empty dataset | |||||
return random_visit | |||||
url = self._select_random_origin_visit_by_type(type) | url = self._select_random_origin_visit_by_type(type) | ||||
random_origin_visits = copy.deepcopy(self._origin_visits[url]) | random_origin_visits = copy.deepcopy(self._origin_visits[url]) | ||||
random_origin_visits.reverse() | random_origin_visits.reverse() | ||||
back_in_the_day = now() - timedelta(weeks=12) # 3 months back | back_in_the_day = now() - timedelta(weeks=12) # 3 months back | ||||
# This should be enough for tests | # This should be enough for tests | ||||
for visit in random_origin_visits: | for visit in random_origin_visits: | ||||
if visit.date > back_in_the_day and visit.status == 'full': | if visit.date > back_in_the_day and visit.status == 'full': | ||||
random_visit = visit.to_dict() | return visit.to_dict() | ||||
break | else: | ||||
return random_visit | return None | ||||
def stat_counters(self): | def stat_counters(self): | ||||
"""compute statistics about the number of tuples in various tables | """compute statistics about the number of tuples in various tables | ||||
Returns: | Returns: | ||||
dict: a dictionary mapping textual labels (e.g., content) to | dict: a dictionary mapping textual labels (e.g., content) to | ||||
integer values (e.g., the number of tuples in table content) | integer values (e.g., the number of tuples in table content) | ||||
▲ Show 20 Lines • Show All 225 Lines • Show Last 20 Lines |