diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py --- a/swh/storage/in_memory.py +++ b/swh/storage/in_memory.py @@ -33,6 +33,8 @@ def __init__(self, journal_writer=None): self._contents = {} self._content_indexes = defaultdict(lambda: defaultdict(set)) + self._skipped_contents = {} + self._skipped_content_indexes = defaultdict(lambda: defaultdict(set)) self.reset() @@ -347,6 +349,21 @@ if content not in self._content_indexes['sha1']: yield content + def skipped_content_missing(self, contents): + """List all skipped content that are missing data + + Args: + contents: Iterable of sha1 to check for skipped content entry + + Returns: + iterable: dict of skipped content entry + """ + + for content in contents: + key, algorithm = self._content_key_algorithm(content) + if key in self._skipped_content_indexes[algorithm]: + yield content + def directory_add(self, directories): """Add directories to the storage @@ -1656,6 +1673,12 @@ return tuple(content.get(key) for key in sorted(DEFAULT_ALGORITHMS)) @staticmethod + def _content_key_algorithm(content): + """ A stable key and the algorithm for a content""" + return tuple((content.get(key), key) + for key in sorted(DEFAULT_ALGORITHMS)) + + @staticmethod def _tool_key(tool): return '%r %r %r' % (tool['name'], tool['version'], tuple(sorted(tool['configuration'].items())))