Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/in_memory.py
Show First 20 Lines • Show All 70 Lines • ▼ Show 20 Lines | def _content_add(self, contents, with_data): | ||||
count_content_added = 0 | count_content_added = 0 | ||||
count_content_bytes_added = 0 | count_content_bytes_added = 0 | ||||
for content in contents: | for content in contents: | ||||
key = self._content_key(content) | key = self._content_key(content) | ||||
if key in self._contents: | if key in self._contents: | ||||
continue | continue | ||||
for algorithm in DEFAULT_ALGORITHMS: | for algorithm in DEFAULT_ALGORITHMS: | ||||
if content[algorithm] in self._content_indexes[algorithm]: | if content[algorithm] in self._content_indexes[algorithm]\ | ||||
and (algorithm not in {'blake2s256', 'sha256'}): | |||||
from . import HashCollision | from . import HashCollision | ||||
raise HashCollision(algorithm, content[algorithm], key) | raise HashCollision(algorithm, content[algorithm], key) | ||||
for algorithm in DEFAULT_ALGORITHMS: | for algorithm in DEFAULT_ALGORITHMS: | ||||
self._content_indexes[algorithm][content[algorithm]].add(key) | self._content_indexes[algorithm][content[algorithm]].add(key) | ||||
self._objects[content['sha1_git']].append( | self._objects[content['sha1_git']].append( | ||||
('content', content['sha1'])) | ('content', content['sha1'])) | ||||
self._contents[key] = copy.deepcopy(content) | self._contents[key] = copy.deepcopy(content) | ||||
bisect.insort(self._sorted_sha1s, content['sha1']) | bisect.insort(self._sorted_sha1s, content['sha1']) | ||||
▲ Show 20 Lines • Show All 197 Lines • ▼ Show 20 Lines | def content_find(self, content): | ||||
'%s' % ', '.join(sorted(DEFAULT_ALGORITHMS))) | '%s' % ', '.join(sorted(DEFAULT_ALGORITHMS))) | ||||
found = [] | found = [] | ||||
for algo in DEFAULT_ALGORITHMS: | for algo in DEFAULT_ALGORITHMS: | ||||
hash = content.get(algo) | hash = content.get(algo) | ||||
if hash and hash in self._content_indexes[algo]: | if hash and hash in self._content_indexes[algo]: | ||||
found.append(self._content_indexes[algo][hash]) | found.append(self._content_indexes[algo][hash]) | ||||
if not found: | if not found: | ||||
return | return | ||||
keys = list(set.intersection(*found)) | keys = list(set.intersection(*found)) | ||||
return copy.deepcopy([self._contents[key] for key in keys]) | |||||
vlorentz: You can remove that FIXME, because you fixed it :) | |||||
# FIXME: should really be a list of all the objects found | |||||
return copy.deepcopy(self._contents[keys[0]]) | |||||
def content_missing(self, contents, key_hash='sha1'): | def content_missing(self, contents, key_hash='sha1'): | ||||
"""List content missing from storage | """List content missing from storage | ||||
Args: | Args: | ||||
contents ([dict]): iterable of dictionaries whose keys are | contents ([dict]): iterable of dictionaries whose keys are | ||||
either 'length' or an item of | either 'length' or an item of | ||||
:data:`swh.model.hashutil.ALGORITHMS`; | :data:`swh.model.hashutil.ALGORITHMS`; | ||||
Show All 9 Lines | def content_missing(self, contents, key_hash='sha1'): | ||||
""" | """ | ||||
for content in contents: | for content in contents: | ||||
for (algo, hash_) in content.items(): | for (algo, hash_) in content.items(): | ||||
if algo not in DEFAULT_ALGORITHMS: | if algo not in DEFAULT_ALGORITHMS: | ||||
continue | continue | ||||
if hash_ not in self._content_indexes.get(algo, []): | if hash_ not in self._content_indexes.get(algo, []): | ||||
yield content[key_hash] | yield content[key_hash] | ||||
break | break | ||||
else: | else: | ||||
# content_find cannot return None here, because we checked | for result in self.content_find(content): | ||||
Done Inline ActionsThis comment is no longer relevant vlorentz: This comment is no longer relevant | |||||
# above that there is a content with matching hashes. | if result['status'] == 'missing': | ||||
if self.content_find(content)['status'] == 'missing': | |||||
yield content[key_hash] | yield content[key_hash] | ||||
def content_missing_per_sha1(self, contents): | def content_missing_per_sha1(self, contents): | ||||
"""List content missing from storage based only on sha1. | """List content missing from storage based only on sha1. | ||||
Args: | Args: | ||||
contents: Iterable of sha1 to check for absence. | contents: Iterable of sha1 to check for absence. | ||||
Returns: | Returns: | ||||
▲ Show 20 Lines • Show All 64 Lines • ▼ Show 20 Lines | def _join_dentry_to_content(self, dentry): | ||||
'sha1', | 'sha1', | ||||
'sha1_git', | 'sha1_git', | ||||
'sha256', | 'sha256', | ||||
'length', | 'length', | ||||
) | ) | ||||
ret = dict.fromkeys(keys) | ret = dict.fromkeys(keys) | ||||
ret.update(dentry) | ret.update(dentry) | ||||
if ret['type'] == 'file': | if ret['type'] == 'file': | ||||
# TODO: Make it able to handle more than one content | |||||
content = self.content_find({'sha1_git': ret['target']}) | content = self.content_find({'sha1_git': ret['target']}) | ||||
if content: | if content: | ||||
content = content[0] | |||||
for key in keys: | for key in keys: | ||||
ret[key] = content[key] | ret[key] = content[key] | ||||
return ret | return ret | ||||
def _directory_ls(self, directory_id, recursive, prefix=b''): | def _directory_ls(self, directory_id, recursive, prefix=b''): | ||||
if directory_id in self._directories: | if directory_id in self._directories: | ||||
for entry in self._directories[directory_id]['entries']: | for entry in self._directories[directory_id]['entries']: | ||||
ret = self._join_dentry_to_content(entry) | ret = self._join_dentry_to_content(entry) | ||||
▲ Show 20 Lines • Show All 1,121 Lines • Show Last 20 Lines |
You can remove that FIXME, because you fixed it :)