Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/in_memory.py
Show All 12 Lines | |||||
import random | import random | ||||
from collections import defaultdict | from collections import defaultdict | ||||
from datetime import timedelta | from datetime import timedelta | ||||
from typing import Any, Dict, Mapping, Optional | from typing import Any, Dict, Mapping, Optional | ||||
import attr | import attr | ||||
from swh.model.model import \ | from swh.model.model import ( | ||||
Content, Directory, Revision, Release, Snapshot, OriginVisit, Origin | Content, Directory, Revision, Release, Snapshot, OriginVisit, Origin, | ||||
from swh.model.hashutil import DEFAULT_ALGORITHMS | SHA1_SIZE) | ||||
from swh.model.hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, hash_to_hex | |||||
from swh.objstorage import get_objstorage | from swh.objstorage import get_objstorage | ||||
from swh.objstorage.exc import ObjNotFoundError | from swh.objstorage.exc import ObjNotFoundError | ||||
from .storage import get_journal_writer | from .storage import get_journal_writer | ||||
from .converters import origin_url_to_sha1 | from .converters import origin_url_to_sha1 | ||||
from .utils import get_partition_bounds_bytes | |||||
# Max block size of contents to return | # Max block size of contents to return | ||||
BULK_BLOCK_CONTENT_LEN_MAX = 10000 | BULK_BLOCK_CONTENT_LEN_MAX = 10000 | ||||
def now(): | def now(): | ||||
return datetime.datetime.now(tz=datetime.timezone.utc) | return datetime.datetime.now(tz=datetime.timezone.utc) | ||||
▲ Show 20 Lines • Show All 264 Lines • ▼ Show 20 Lines | def content_get_range(self, start, end, limit=1000, db=None, cur=None): | ||||
next_content = sha1 | next_content = sha1 | ||||
break | break | ||||
matched.append(self._contents[key].to_dict()) | matched.append(self._contents[key].to_dict()) | ||||
return { | return { | ||||
'contents': matched, | 'contents': matched, | ||||
'next': next_content, | 'next': next_content, | ||||
} | } | ||||
def content_get_partition( | |||||
ardumont: type? | |||||
self, partition_id: int, nb_partitions: int, limit: int = 1000, | |||||
page_token: str = None): | |||||
Done Inline Actionsmust be in [0, nb_partitions[ ? ardumont: `must be in [0, nb_partitions[`
or
`must be in [0, nb_partitions - 1]`
? | |||||
"""Splits contents into nb_partitions, and returns one of these based on | |||||
partition_id (which must be in [0, nb_partitions-1]) | |||||
Done Inline Actionsor the result order. ardumont: or the result order. | |||||
There is no guarantee on how the partitioning is done, or the | |||||
result order. | |||||
Args: | |||||
partition_id (int): index of the partition to fetch | |||||
nb_partitions (int): total number of partitions to split into | |||||
limit (int): Limit result (default to 1000) | |||||
page_token (Optional[str]): opaque token used for pagination. | |||||
Returns: | |||||
a dict with keys: | |||||
- contents (List[dict]): iterable of contents in the partition. | |||||
Done Inline Actionsretrieving ardumont: retrieving | |||||
- **next_page_token** (Optional[str]): opaque token to be used as | |||||
`page_token` for retrieving the next page. if absent, there is | |||||
no more pages to gather. | |||||
""" | |||||
if limit is None: | |||||
raise ValueError('Development error: limit should not be None') | |||||
(start, end) = get_partition_bounds_bytes( | |||||
partition_id, nb_partitions, SHA1_SIZE) | |||||
if page_token: | |||||
start = hash_to_bytes(page_token) | |||||
if end is None: | |||||
end = b'\xff'*SHA1_SIZE | |||||
result = self.content_get_range(start, end, limit) | |||||
result2 = { | |||||
'contents': result['contents'], | |||||
'next_page_token': None, | |||||
} | |||||
if result['next']: | |||||
result2['next_page_token'] = hash_to_hex(result['next']) | |||||
return result2 | |||||
def content_get_metadata(self, content): | def content_get_metadata(self, content): | ||||
"""Retrieve content metadata in bulk | """Retrieve content metadata in bulk | ||||
Args: | Args: | ||||
content: iterable of content identifiers (sha1) | content: iterable of content identifiers (sha1) | ||||
Returns: | Returns: | ||||
an iterable with content metadata corresponding to the given ids | an iterable with content metadata corresponding to the given ids | ||||
▲ Show 20 Lines • Show All 1,498 Lines • Show Last 20 Lines |
type?