Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/in_memory.py
# Copyright (C) 2015-2019 The Software Heritage developers | # Copyright (C) 2015-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import re | import re | ||||
import bisect | import bisect | ||||
import dateutil | import dateutil | ||||
import collections | import collections | ||||
import copy | import copy | ||||
import datetime | import datetime | ||||
import itertools | import itertools | ||||
import random | import random | ||||
from collections import defaultdict | from collections import defaultdict | ||||
from datetime import timedelta | from datetime import timedelta | ||||
from typing import Any, Dict, Mapping, Optional | from typing import Any, Dict, List, Mapping, Optional | ||||
import attr | import attr | ||||
from swh.model.model import ( | from swh.model.model import ( | ||||
Content, Directory, Revision, Release, Snapshot, OriginVisit, Origin, | Content, Directory, Revision, Release, Snapshot, OriginVisit, Origin, | ||||
SHA1_SIZE) | SHA1_SIZE) | ||||
from swh.model.hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, hash_to_hex | from swh.model.hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, hash_to_hex | ||||
from swh.objstorage import get_objstorage | from swh.objstorage import get_objstorage | ||||
▲ Show 20 Lines • Show All 316 Lines • ▼ Show 20 Lines | def content_get_partition( | ||||
result2 = { | result2 = { | ||||
'contents': result['contents'], | 'contents': result['contents'], | ||||
'next_page_token': None, | 'next_page_token': None, | ||||
} | } | ||||
if result['next']: | if result['next']: | ||||
result2['next_page_token'] = hash_to_hex(result['next']) | result2['next_page_token'] = hash_to_hex(result['next']) | ||||
return result2 | return result2 | ||||
def content_get_metadata(self, content): | def content_get_metadata( | ||||
self, contents: List[bytes]) -> Dict[bytes, List[Dict]]: | |||||
"""Retrieve content metadata in bulk | """Retrieve content metadata in bulk | ||||
Args: | Args: | ||||
content: iterable of content identifiers (sha1) | content: iterable of content identifiers (sha1) | ||||
Returns: | Returns: | ||||
an iterable with content metadata corresponding to the given ids | a dict with keys the content's sha1 and the associated value | ||||
either the existing content's metadata or None if the content does | |||||
not exist. | |||||
""" | """ | ||||
# FIXME: the return value should be a mapping from search key to found | result: Dict = {sha1: [] for sha1 in contents} | ||||
# content*s* | for sha1 in contents: | ||||
for sha1 in content: | |||||
if sha1 in self._content_indexes['sha1']: | if sha1 in self._content_indexes['sha1']: | ||||
objs = self._content_indexes['sha1'][sha1] | objs = self._content_indexes['sha1'][sha1] | ||||
# FIXME: rather than selecting one of the objects with that | # only 1 element as content_add_metadata would have raised a | ||||
ardumont: This can actually be fixed in that diff now as @vlorentz mentioned.
In progress ;) | |||||
# hash, we should return all of them. See: | # hash collision otherwise | ||||
# https://forge.softwareheritage.org/D645?id=1994#inline-3389 | for key in objs: | ||||
key = random.sample(objs, 1)[0] | |||||
d = self._contents[key].to_dict() | d = self._contents[key].to_dict() | ||||
del d['ctime'] | del d['ctime'] | ||||
yield d | result[sha1].append(d) | ||||
else: | return result | ||||
Done Inline ActionsIs it better to skip it or to yield None here? ardumont: Is it better to skip it or to `yield None` here? | |||||
# FIXME: should really be None | |||||
yield { | |||||
'sha1': sha1, | |||||
'sha1_git': None, | |||||
'sha256': None, | |||||
'blake2s256': None, | |||||
'length': None, | |||||
'status': None, | |||||
} | |||||
def content_find(self, content): | def content_find(self, content): | ||||
if not set(content).intersection(DEFAULT_ALGORITHMS): | if not set(content).intersection(DEFAULT_ALGORITHMS): | ||||
raise ValueError('content keys must contain at least one of: ' | raise ValueError('content keys must contain at least one of: ' | ||||
'%s' % ', '.join(sorted(DEFAULT_ALGORITHMS))) | '%s' % ', '.join(sorted(DEFAULT_ALGORITHMS))) | ||||
found = [] | found = [] | ||||
for algo in DEFAULT_ALGORITHMS: | for algo in DEFAULT_ALGORITHMS: | ||||
hash = content.get(algo) | hash = content.get(algo) | ||||
▲ Show 20 Lines • Show All 1,467 Lines • Show Last 20 Lines |
This can actually be fixed in that diff now as @vlorentz mentioned.
In progress ;)