diff --git a/swh/indexer/storage/__init__.py b/swh/indexer/storage/__init__.py --- a/swh/indexer/storage/__init__.py +++ b/swh/indexer/storage/__init__.py @@ -12,7 +12,7 @@ import psycopg2 import psycopg2.pool -from swh.core.db.common import db_transaction, db_transaction_generator +from swh.core.db.common import db_transaction from swh.model.hashutil import hash_to_bytes, hash_to_hex from swh.model.model import SHA1_SIZE from swh.storage.exc import StorageDBError @@ -143,12 +143,11 @@ return cur.fetchone()[0] @timed - @db_transaction_generator() + @db_transaction() def content_mimetype_missing( self, mimetypes: Iterable[Dict], db=None, cur=None - ) -> Iterable[Tuple[Sha1, int]]: - for obj in db.content_mimetype_missing_from_list(mimetypes, cur): - yield obj[0] + ) -> List[Tuple[Sha1, int]]: + return [obj[0] for obj in db.content_mimetype_missing_from_list(mimetypes, cur)] @timed @db_transaction() @@ -266,26 +265,29 @@ return {"content_mimetype:add": count} @timed - @db_transaction_generator() + @db_transaction() def content_mimetype_get( self, ids: Iterable[Sha1], db=None, cur=None - ) -> Iterable[ContentMimetypeRow]: - for c in db.content_mimetype_get_from_list(ids, cur): - yield ContentMimetypeRow.from_dict( + ) -> List[ContentMimetypeRow]: + return [ + ContentMimetypeRow.from_dict( converters.db_to_mimetype(dict(zip(db.content_mimetype_cols, c))) ) + for c in db.content_mimetype_get_from_list(ids, cur) + ] @timed - @db_transaction_generator() + @db_transaction() def content_language_missing(self, languages, db=None, cur=None): - for obj in db.content_language_missing_from_list(languages, cur): - yield obj[0] + return [obj[0] for obj in db.content_language_missing_from_list(languages, cur)] @timed - @db_transaction_generator() + @db_transaction() def content_language_get(self, ids, db=None, cur=None): - for c in db.content_language_get_from_list(ids, cur): - yield converters.db_to_language(dict(zip(db.content_language_cols, c))) + return [ + converters.db_to_language(dict(zip(db.content_language_cols, c))) + for c in db.content_language_get_from_list(ids, cur) + ] @timed @process_metrics @@ -315,16 +317,17 @@ return {"content_language:add": count} @timed - @db_transaction_generator() + @db_transaction() def content_ctags_missing(self, ctags, db=None, cur=None): - for obj in db.content_ctags_missing_from_list(ctags, cur): - yield obj[0] + return [obj[0] for obj in db.content_ctags_missing_from_list(ctags, cur)] @timed - @db_transaction_generator() + @db_transaction() def content_ctags_get(self, ids, db=None, cur=None): - for c in db.content_ctags_get_from_list(ids, cur): - yield converters.db_to_ctags(dict(zip(db.content_ctags_cols, c))) + return [ + converters.db_to_ctags(dict(zip(db.content_ctags_cols, c))) + for c in db.content_ctags_get_from_list(ids, cur) + ] @timed @process_metrics @@ -348,24 +351,28 @@ return {"content_ctags:add": count} @timed - @db_transaction_generator() + @db_transaction() def content_ctags_search( self, expression, limit=10, last_sha1=None, db=None, cur=None ): - for obj in db.content_ctags_search(expression, last_sha1, limit, cur=cur): - yield converters.db_to_ctags(dict(zip(db.content_ctags_cols, obj))) + return [ + converters.db_to_ctags(dict(zip(db.content_ctags_cols, obj))) + for obj in db.content_ctags_search(expression, last_sha1, limit, cur=cur) + ] @timed - @db_transaction_generator() + @db_transaction() def content_fossology_license_get( self, ids: Iterable[Sha1], db=None, cur=None - ) -> Iterable[ContentLicenseRow]: - for c in db.content_fossology_license_get_from_list(ids, cur): - yield ContentLicenseRow.from_dict( + ) -> List[ContentLicenseRow]: + return [ + ContentLicenseRow.from_dict( converters.db_to_fossology_license( dict(zip(db.content_fossology_license_cols, c)) ) ) + for c in db.content_fossology_license_get_from_list(ids, cur) + ] @timed @process_metrics @@ -414,16 +421,17 @@ ) @timed - @db_transaction_generator() + @db_transaction() def content_metadata_missing(self, metadata, db=None, cur=None): - for obj in db.content_metadata_missing_from_list(metadata, cur): - yield obj[0] + return [obj[0] for obj in db.content_metadata_missing_from_list(metadata, cur)] @timed - @db_transaction_generator() + @db_transaction() def content_metadata_get(self, ids, db=None, cur=None): - for c in db.content_metadata_get_from_list(ids, cur): - yield converters.db_to_metadata(dict(zip(db.content_metadata_cols, c))) + return [ + converters.db_to_metadata(dict(zip(db.content_metadata_cols, c))) + for c in db.content_metadata_get_from_list(ids, cur) + ] @timed @process_metrics @@ -448,18 +456,20 @@ } @timed - @db_transaction_generator() + @db_transaction() def revision_intrinsic_metadata_missing(self, metadata, db=None, cur=None): - for obj in db.revision_intrinsic_metadata_missing_from_list(metadata, cur): - yield obj[0] + return [ + obj[0] + for obj in db.revision_intrinsic_metadata_missing_from_list(metadata, cur) + ] @timed - @db_transaction_generator() + @db_transaction() def revision_intrinsic_metadata_get(self, ids, db=None, cur=None): - for c in db.revision_intrinsic_metadata_get_from_list(ids, cur): - yield converters.db_to_metadata( - dict(zip(db.revision_intrinsic_metadata_cols, c)) - ) + return [ + converters.db_to_metadata(dict(zip(db.revision_intrinsic_metadata_cols, c))) + for c in db.revision_intrinsic_metadata_get_from_list(ids, cur) + ] @timed @process_metrics @@ -493,12 +503,12 @@ return {"revision_intrinsic_metadata:del": count} @timed - @db_transaction_generator() + @db_transaction() def origin_intrinsic_metadata_get(self, ids, db=None, cur=None): - for c in db.origin_intrinsic_metadata_get_from_list(ids, cur): - yield converters.db_to_metadata( - dict(zip(db.origin_intrinsic_metadata_cols, c)) - ) + return [ + converters.db_to_metadata(dict(zip(db.origin_intrinsic_metadata_cols, c))) + for c in db.origin_intrinsic_metadata_get_from_list(ids, cur) + ] @timed @process_metrics @@ -534,16 +544,16 @@ } @timed - @db_transaction_generator() + @db_transaction() def origin_intrinsic_metadata_search_fulltext( self, conjunction, limit=100, db=None, cur=None ): - for c in db.origin_intrinsic_metadata_search_fulltext( - conjunction, limit=limit, cur=cur - ): - yield converters.db_to_metadata( - dict(zip(db.origin_intrinsic_metadata_cols, c)) + return [ + converters.db_to_metadata(dict(zip(db.origin_intrinsic_metadata_cols, c))) + for c in db.origin_intrinsic_metadata_search_fulltext( + conjunction, limit=limit, cur=cur ) + ] @timed @db_transaction() @@ -616,7 +626,7 @@ } @timed - @db_transaction_generator() + @db_transaction() def indexer_configuration_add(self, tools, db=None, cur=None): db.mktemp_indexer_configuration(cur) db.copy_to( @@ -627,13 +637,13 @@ ) tools = db.indexer_configuration_add_from_temp(cur) - count = 0 - for line in tools: - yield dict(zip(db.indexer_configuration_cols, line)) - count += 1 + results = [dict(zip(db.indexer_configuration_cols, line)) for line in tools] send_metric( - "indexer_configuration:add", count, method_name="indexer_configuration_add" + "indexer_configuration:add", + len(results), + method_name="indexer_configuration_add", ) + return results @timed @db_transaction() diff --git a/swh/indexer/storage/in_memory.py b/swh/indexer/storage/in_memory.py --- a/swh/indexer/storage/in_memory.py +++ b/swh/indexer/storage/in_memory.py @@ -14,7 +14,6 @@ Dict, Generic, Iterable, - Iterator, List, Optional, Set, @@ -86,7 +85,7 @@ belong in the unique key.""" return _key_from_dict({k: d[k] for k in self.row_class.UNIQUE_KEY_FIELDS}) - def missing(self, keys: Iterable[Dict]) -> Iterator[Sha1]: + def missing(self, keys: Iterable[Dict]) -> List[Sha1]: """List data missing from storage. Args: @@ -100,13 +99,15 @@ missing sha1s """ + results = [] for key in keys: tool_id = key["indexer_configuration_id"] id_ = key["id"] if tool_id not in self._tools_per_id.get(id_, set()): - yield id_ + results.append(id_) + return results - def get(self, ids: Iterable[Sha1]) -> Iterator[TValue]: + def get(self, ids: Iterable[Sha1]) -> List[TValue]: """Retrieve data per id. Args: @@ -120,16 +121,20 @@ - arbitrary data (as provided to `add`) """ + results = [] for id_ in ids: for entry in self._data[id_].values(): entry = entry.copy() tool_id = entry.pop("indexer_configuration_id") - yield self.row_class( - id=id_, tool=_transform_tool(self._tools[tool_id]), **entry, + results.append( + self.row_class( + id=id_, tool=_transform_tool(self._tools[tool_id]), **entry, + ) ) + return results - def get_all(self) -> Iterator[TValue]: - yield from self.get(self._sorted_ids) + def get_all(self) -> List[TValue]: + return self.get(self._sorted_ids) def get_partition( self, @@ -259,8 +264,8 @@ def content_mimetype_missing( self, mimetypes: Iterable[Dict] - ) -> Iterable[Tuple[Sha1, int]]: - yield from self._mimetypes.missing(mimetypes) + ) -> List[Tuple[Sha1, int]]: + return self._mimetypes.missing(mimetypes) def content_mimetype_get_partition( self, @@ -280,14 +285,14 @@ added = self._mimetypes.add(mimetypes, conflict_update) return {"content_mimetype:add": added} - def content_mimetype_get(self, ids: Iterable[Sha1]) -> Iterable[ContentMimetypeRow]: - yield from self._mimetypes.get(ids) + def content_mimetype_get(self, ids: Iterable[Sha1]) -> List[ContentMimetypeRow]: + return self._mimetypes.get(ids) def content_language_missing(self, languages): - yield from self._languages.missing(languages) + return self._languages.missing(languages) def content_language_get(self, ids): - yield from (obj.to_dict() for obj in self._languages.get(ids)) + return [obj.to_dict() for obj in self._languages.get(ids)] def content_language_add( self, languages: List[Dict], conflict_update: bool = False @@ -299,11 +304,13 @@ return {"content_language:add": added} def content_ctags_missing(self, ctags): - yield from self._content_ctags.missing(ctags) + return self._content_ctags.missing(ctags) def content_ctags_get(self, ids): + results = [] for item in self._content_ctags.get(ids): - yield {"id": item.id, "tool": item.tool, **item.to_dict()} + results.append({"id": item.id, "tool": item.tool, **item.to_dict()}) + return results def content_ctags_add( self, ctags: List[Dict], conflict_update: bool = False @@ -328,6 +335,7 @@ (item.id, item.indexer_configuration_id), [] ).append(item) + results = [] for items in items_per_id.values(): ctags = [] for item in items: @@ -343,11 +351,12 @@ if ctags: for ctag in ctags: - yield {"id": id_, "tool": tool, **ctag} + results.append({"id": id_, "tool": tool, **ctag}) + return results def content_fossology_license_get( self, ids: Iterable[Sha1] - ) -> Iterable[ContentLicenseRow]: + ) -> List[ContentLicenseRow]: return self._licenses.get(ids) def content_fossology_license_add( @@ -369,10 +378,10 @@ ) def content_metadata_missing(self, metadata): - yield from self._content_metadata.missing(metadata) + return self._content_metadata.missing(metadata) def content_metadata_get(self, ids): - yield from (obj.to_dict() for obj in self._content_metadata.get(ids)) + return [obj.to_dict() for obj in self._content_metadata.get(ids)] def content_metadata_add( self, metadata: List[Dict], conflict_update: bool = False @@ -384,10 +393,10 @@ return {"content_metadata:add": added} def revision_intrinsic_metadata_missing(self, metadata): - yield from self._revision_intrinsic_metadata.missing(metadata) + return self._revision_intrinsic_metadata.missing(metadata) def revision_intrinsic_metadata_get(self, ids): - yield from (obj.to_dict() for obj in self._revision_intrinsic_metadata.get(ids)) + return [obj.to_dict() for obj in self._revision_intrinsic_metadata.get(ids)] def revision_intrinsic_metadata_add( self, metadata: List[Dict], conflict_update: bool = False @@ -403,7 +412,7 @@ return {"revision_intrinsic_metadata:del": deleted} def origin_intrinsic_metadata_get(self, ids): - yield from (obj.to_dict() for obj in self._origin_intrinsic_metadata.get(ids)) + return [obj.to_dict() for obj in self._origin_intrinsic_metadata.get(ids)] def origin_intrinsic_metadata_add( self, metadata: List[Dict], conflict_update: bool = False @@ -447,8 +456,7 @@ results.sort( key=operator.itemgetter(0), reverse=True # Don't try to order 'data' ) - for (rank_, result) in results[:limit]: - yield result.to_dict() + return [result.to_dict() for (rank_, result) in results[:limit]] def origin_intrinsic_metadata_search_by_producer( self, page_token="", limit=100, ids_only=False, mappings=None, tool_ids=None diff --git a/swh/indexer/storage/interface.py b/swh/indexer/storage/interface.py --- a/swh/indexer/storage/interface.py +++ b/swh/indexer/storage/interface.py @@ -25,7 +25,7 @@ @remote_api_endpoint("content_mimetype/missing") def content_mimetype_missing( self, mimetypes: Iterable[Dict] - ) -> Iterable[Tuple[Sha1, int]]: + ) -> List[Tuple[Sha1, int]]: """Generate mimetypes missing from storage. Args: @@ -91,7 +91,7 @@ ... @remote_api_endpoint("content_mimetype") - def content_mimetype_get(self, ids: Iterable[Sha1]) -> Iterable[ContentMimetypeRow]: + def content_mimetype_get(self, ids: Iterable[Sha1]) -> List[ContentMimetypeRow]: """Retrieve full content mimetype per ids. Args: @@ -235,7 +235,7 @@ @remote_api_endpoint("content/fossology_license") def content_fossology_license_get( self, ids: Iterable[Sha1] - ) -> Iterable[ContentLicenseRow]: + ) -> List[ContentLicenseRow]: """Retrieve licenses per id. Args: