Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/filter.py
# Copyright (C) 2019-2020 The Software Heritage developers | # Copyright (C) 2019-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from typing import Dict, Iterable, List, Set | from typing import Dict, Iterable, List, Set | ||||
from swh.model.model import ( | from swh.model.model import ( | ||||
Content, | Content, | ||||
SkippedContent, | SkippedContent, | ||||
Directory, | Directory, | ||||
Revision, | Revision, | ||||
Sha1Git, | |||||
) | ) | ||||
from swh.storage import get_storage | from swh.storage import get_storage | ||||
from swh.storage.interface import StorageInterface | from swh.storage.interface import StorageInterface | ||||
class FilteringProxyStorage: | class FilteringProxyStorage: | ||||
"""Filtering Storage implementation. This is in charge of transparently | """Filtering Storage implementation. This is in charge of transparently | ||||
▲ Show 20 Lines • Show All 54 Lines • ▼ Show 20 Lines | def _filter_missing_contents(self, contents: List[Content]) -> Set[bytes]: | ||||
missing_contents = [] | missing_contents = [] | ||||
for content in contents: | for content in contents: | ||||
missing_contents.append(content.hashes()) | missing_contents.append(content.hashes()) | ||||
return set(self.storage.content_missing(missing_contents, key_hash="sha256",)) | return set(self.storage.content_missing(missing_contents, key_hash="sha256",)) | ||||
def _filter_missing_skipped_contents( | def _filter_missing_skipped_contents( | ||||
self, contents: List[SkippedContent] | self, contents: List[SkippedContent] | ||||
) -> Set[bytes]: | ) -> Set[Sha1Git]: | ||||
"""Return only the content keys missing from swh | """Return only the content keys missing from swh | ||||
Args: | Args: | ||||
content_hashes: List of sha1_git to check for existence in swh | content_hashes: List of sha1_git to check for existence in swh | ||||
storage | storage | ||||
""" | """ | ||||
missing_contents = [] | missing_contents = [c.hashes() for c in contents if c.sha1_git is not None] | ||||
for content in contents: | |||||
if content.sha1_git is None: | |||||
continue | |||||
missing_contents.append(content.hashes()) | |||||
return { | ids = set() | ||||
c.get("sha1_git") | for c in self.storage.skipped_content_missing(missing_contents): | ||||
for c in self.storage.skipped_content_missing(missing_contents) | if c is None or c.get("sha1_git") is None: | ||||
} | continue | ||||
ids.add(c["sha1_git"]) | |||||
return ids | |||||
def _filter_missing_ids(self, object_type: str, ids: Iterable[bytes]) -> Set[bytes]: | def _filter_missing_ids(self, object_type: str, ids: Iterable[bytes]) -> Set[bytes]: | ||||
"""Filter missing ids from the storage for a given object type. | """Filter missing ids from the storage for a given object type. | ||||
Args: | Args: | ||||
object_type: object type to use {revision, directory} | object_type: object type to use {revision, directory} | ||||
ids: List of object_type ids | ids: List of object_type ids | ||||
Show All 15 Lines |