Differential D2522 Diff 8964 swh/storage/in_memory.py

Changeset View

Standalone View

swh/storage/in_memory.py

# Copyright (C) 2015-2019 The Software Heritage developers		# Copyright (C) 2015-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution		# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version		# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information		# See top-level LICENSE file for more information

import re		import re
import bisect		import bisect
import dateutil		import dateutil
import collections		import collections
import copy		import copy
import datetime		import datetime
import itertools		import itertools
import random		import random

from collections import defaultdict		from collections import defaultdict
from datetime import timedelta		from datetime import timedelta
from typing import Any, Dict, Mapping, Optional		from typing import Any, Dict, List, Mapping, Optional

import attr		import attr

from swh.model.model import (		from swh.model.model import (
Content, Directory, Revision, Release, Snapshot, OriginVisit, Origin,		Content, Directory, Revision, Release, Snapshot, OriginVisit, Origin,
SHA1_SIZE)		SHA1_SIZE)
from swh.model.hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, hash_to_hex		from swh.model.hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, hash_to_hex
from swh.objstorage import get_objstorage		from swh.objstorage import get_objstorage
▲ Show 20 Lines • Show All 316 Lines • ▼ Show 20 Lines	def content_get_partition(
result2 = {		result2 = {
'contents': result['contents'],		'contents': result['contents'],
'next_page_token': None,		'next_page_token': None,
}		}
if result['next']:		if result['next']:
result2['next_page_token'] = hash_to_hex(result['next'])		result2['next_page_token'] = hash_to_hex(result['next'])
return result2		return result2

def content_get_metadata(self, content):		def content_get_metadata(
		self, contents: List[bytes]) -> Dict[bytes, List[Dict]]:
"""Retrieve content metadata in bulk		"""Retrieve content metadata in bulk

Args:		Args:
content: iterable of content identifiers (sha1)		content: iterable of content identifiers (sha1)

Returns:		Returns:
an iterable with content metadata corresponding to the given ids		a dict with keys the content's sha1 and the associated value
		either the existing content's metadata or None if the content does
		not exist.

"""		"""
# FIXME: the return value should be a mapping from search key to found		result: Dict = {sha1: [] for sha1 in contents}
# contents		for sha1 in contents:
for sha1 in content:
if sha1 in self._content_indexes['sha1']:		if sha1 in self._content_indexes['sha1']:
objs = self._content_indexes['sha1'][sha1]		objs = self._content_indexes['sha1'][sha1]
# FIXME: rather than selecting one of the objects with that		# only 1 element as content_add_metadata would have raised a
		ardumontAuthorUnsubmitted Done Inline Actions This can actually be fixed in that diff now as @vlorentz mentioned. In progress ;) ardumont: This can actually be fixed in that diff now as @vlorentz mentioned. In progress ;)
# hash, we should return all of them. See:		# hash collision otherwise
# https://forge.softwareheritage.org/D645?id=1994#inline-3389		for key in objs:
key = random.sample(objs, 1)[0]
d = self._contents[key].to_dict()		d = self._contents[key].to_dict()
del d['ctime']		del d['ctime']
yield d		result[sha1].append(d)
else:		return result
		ardumontAuthorUnsubmitted Done Inline Actions Is it better to skip it or to `yield None` here? ardumont: Is it better to skip it or to `yield None` here?
# FIXME: should really be None
yield {
'sha1': sha1,
'sha1_git': None,
'sha256': None,
'blake2s256': None,
'length': None,
'status': None,
}

def content_find(self, content):		def content_find(self, content):
if not set(content).intersection(DEFAULT_ALGORITHMS):		if not set(content).intersection(DEFAULT_ALGORITHMS):
raise ValueError('content keys must contain at least one of: '		raise ValueError('content keys must contain at least one of: '
'%s' % ', '.join(sorted(DEFAULT_ALGORITHMS)))		'%s' % ', '.join(sorted(DEFAULT_ALGORITHMS)))
found = []		found = []
for algo in DEFAULT_ALGORITHMS:		for algo in DEFAULT_ALGORITHMS:
hash = content.get(algo)		hash = content.get(algo)
▲ Show 20 Lines • Show All 1,467 Lines • Show Last 20 Lines