diff --git a/swh/storage/cassandra/storage.py b/swh/storage/cassandra/storage.py --- a/swh/storage/cassandra/storage.py +++ b/swh/storage/cassandra/storage.py @@ -321,7 +321,9 @@ contents = [attr.evolve(c, ctime=now()) for c in content] return self._skipped_content_add(contents) - def skipped_content_missing(self, contents): + def skipped_content_missing( + self, contents: List[Dict[str, Any]] + ) -> Iterable[Dict[str, Any]]: for content in contents: if not self._cql_runner.skipped_content_get_from_pk(content): yield {algo: content[algo] for algo in DEFAULT_ALGORITHMS} diff --git a/swh/storage/filter.py b/swh/storage/filter.py --- a/swh/storage/filter.py +++ b/swh/storage/filter.py @@ -11,6 +11,7 @@ SkippedContent, Directory, Revision, + Sha1Git, ) from swh.storage import get_storage @@ -81,7 +82,7 @@ def _filter_missing_skipped_contents( self, contents: List[SkippedContent] - ) -> Set[bytes]: + ) -> Set[Sha1Git]: """Return only the content keys missing from swh Args: @@ -89,16 +90,14 @@ storage """ - missing_contents = [] - for content in contents: - if content.sha1_git is None: - continue - missing_contents.append(content.hashes()) + missing_contents = [c.hashes() for c in contents if c.sha1_git is not None] - return { - c.get("sha1_git") - for c in self.storage.skipped_content_missing(missing_contents) - } + ids = set() + for c in self.storage.skipped_content_missing(missing_contents): + if c is None or c.get("sha1_git") is None: + continue + ids.add(c["sha1_git"]) + return ids def _filter_missing_ids(self, object_type: str, ids: Iterable[bytes]) -> Set[bytes]: """Filter missing ids from the storage for a given object type. diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py --- a/swh/storage/in_memory.py +++ b/swh/storage/in_memory.py @@ -398,7 +398,9 @@ content = [attr.evolve(c, ctime=now()) for c in content] return self._skipped_content_add(content) - def skipped_content_missing(self, contents): + def skipped_content_missing( + self, contents: List[Dict[str, Any]] + ) -> Iterable[Dict[str, Any]]: for content in contents: matches = list(self._skipped_contents.values()) for (algorithm, key) in self._content_key(content): diff --git a/swh/storage/interface.py b/swh/storage/interface.py --- a/swh/storage/interface.py +++ b/swh/storage/interface.py @@ -366,15 +366,17 @@ ... @remote_api_endpoint("content/skipped/missing") - def skipped_content_missing(self, contents): - """List skipped_content missing from storage + def skipped_content_missing( + self, contents: List[Dict[str, Any]] + ) -> Iterable[Dict[str, Any]]: + """List skipped contents missing from storage. Args: - content: iterable of dictionaries containing the data for each + contents: iterable of dictionaries containing the data for each checksum algorithm. Returns: - iterable: missing signatures + Iterable of missing skipped contents as dict """ ... diff --git a/swh/storage/storage.py b/swh/storage/storage.py --- a/swh/storage/storage.py +++ b/swh/storage/storage.py @@ -457,7 +457,9 @@ @timed @db_transaction_generator() - def skipped_content_missing(self, contents, db=None, cur=None): + def skipped_content_missing( + self, contents: List[Dict[str, Any]], db=None, cur=None + ) -> Iterable[Dict[str, Any]]: contents = list(contents) for content in db.skipped_content_missing(contents, cur): yield dict(zip(db.content_hash_keys, content))