diff --git a/swh/indexer/cli.py b/swh/indexer/cli.py --- a/swh/indexer/cli.py +++ b/swh/indexer/cli.py @@ -3,6 +3,7 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +from typing import Iterator # WARNING: do not import unnecessary things here to keep cli startup time under # control @@ -146,7 +147,7 @@ ctx.obj["scheduler"] = None -def list_origins_by_producer(idx_storage, mappings, tool_ids): +def list_origins_by_producer(idx_storage, mappings, tool_ids) -> Iterator[str]: next_page_token = "" limit = 10000 while next_page_token is not None: @@ -157,8 +158,8 @@ mappings=mappings or None, tool_ids=tool_ids or None, ) - next_page_token = result.get("next_page_token") - yield from result["origins"] + next_page_token = result.next_page_token + yield from result.results @schedule.command("reindex_origin_metadata") diff --git a/swh/indexer/indexer.py b/swh/indexer/indexer.py --- a/swh/indexer/indexer.py +++ b/swh/indexer/indexer.py @@ -16,7 +16,6 @@ from swh.indexer.storage import INDEXER_CFG_KEY, PagedResult, Sha1, get_indexer_storage from swh.indexer.storage.interface import IndexerStorageInterface from swh.model import hashutil -from swh.model.model import Revision from swh.objstorage.exc import ObjNotFoundError from swh.objstorage.factory import get_objstorage from swh.scheduler import CONFIG as SWH_CONFIG @@ -218,9 +217,7 @@ else: return [] - def index( - self, id: Union[bytes, Dict, Revision], data: Optional[bytes] = None, **kwargs - ) -> List[TResult]: + def index(self, id, data: Optional[bytes] = None, **kwargs) -> List[TResult]: """Index computation for the id and associated raw data. Args: @@ -550,13 +547,13 @@ summary.update(summary_persist) return summary - def index_list(self, origins: List[Any], **kwargs: Any) -> List[TResult]: + def index_list(self, origin_urls: List[str], **kwargs) -> List[TResult]: results = [] - for origin in origins: + for origin_url in origin_urls: try: - results.extend(self.index(origin, **kwargs)) + results.extend(self.index(origin_url, **kwargs)) except Exception: - self.log.exception("Problem when processing origin %s", origin) + self.log.exception("Problem when processing origin %s", origin_url) raise return results diff --git a/swh/indexer/metadata.py b/swh/indexer/metadata.py --- a/swh/indexer/metadata.py +++ b/swh/indexer/metadata.py @@ -4,7 +4,17 @@ # See top-level LICENSE file for more information from copy import deepcopy -from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple +from typing import ( + Any, + Callable, + Dict, + Iterable, + Iterator, + List, + Optional, + Tuple, + TypeVar, +) from swh.core.config import merge_configs from swh.core.utils import grouper @@ -14,7 +24,11 @@ from swh.indexer.metadata_dictionary import MAPPINGS from swh.indexer.origin_head import OriginHeadIndexer from swh.indexer.storage import INDEXER_CFG_KEY -from swh.indexer.storage.model import ContentMetadataRow, RevisionIntrinsicMetadataRow +from swh.indexer.storage.model import ( + ContentMetadataRow, + OriginIntrinsicMetadataRow, + RevisionIntrinsicMetadataRow, +) from swh.model import hashutil from swh.model.model import Revision @@ -22,11 +36,13 @@ ORIGIN_GET_BATCH_SIZE = 10 +T1 = TypeVar("T1") +T2 = TypeVar("T2") + + def call_with_batches( - f: Callable[[List[Dict[str, Any]]], Dict["str", Any]], - args: List[Dict[str, str]], - batch_size: int, -) -> Iterator[str]: + f: Callable[[List[T1]], Iterable[T2]], args: List[T1], batch_size: int, +) -> Iterator[T2]: """Calls a function with batches of args, and concatenates the results. """ groups = grouper(args, batch_size) @@ -285,7 +301,9 @@ return (used_mappings, metadata) -class OriginMetadataIndexer(OriginIndexer[Tuple[Dict, RevisionIntrinsicMetadataRow]]): +class OriginMetadataIndexer( + OriginIndexer[Tuple[OriginIntrinsicMetadataRow, RevisionIntrinsicMetadataRow]] +): USE_TOOLS = False def __init__(self, config=None, **kwargs) -> None: @@ -293,7 +311,9 @@ self.origin_head_indexer = OriginHeadIndexer(config=config) self.revision_metadata_indexer = RevisionMetadataIndexer(config=config) - def index_list(self, origin_urls, **kwargs): + def index_list( + self, origin_urls: List[str], **kwargs + ) -> List[Tuple[OriginIntrinsicMetadataRow, RevisionIntrinsicMetadataRow]]: head_rev_ids = [] origins_with_head = [] origins = list( @@ -325,31 +345,31 @@ for rev_metadata in self.revision_metadata_indexer.index(rev): # There is at most one rev_metadata - orig_metadata = { - "from_revision": rev_metadata.id, - "id": origin.url, - "metadata": rev_metadata.metadata, - "mappings": rev_metadata.mappings, - "indexer_configuration_id": rev_metadata.indexer_configuration_id, - } + orig_metadata = OriginIntrinsicMetadataRow( + from_revision=rev_metadata.id, + id=origin.url, + metadata=rev_metadata.metadata, + mappings=rev_metadata.mappings, + indexer_configuration_id=rev_metadata.indexer_configuration_id, + ) results.append((orig_metadata, rev_metadata)) return results def persist_index_computations( self, - results: List[Tuple[Dict, RevisionIntrinsicMetadataRow]], + results: List[Tuple[OriginIntrinsicMetadataRow, RevisionIntrinsicMetadataRow]], policy_update: str, ) -> Dict[str, int]: conflict_update = policy_update == "update-dups" # Deduplicate revisions rev_metadata: List[RevisionIntrinsicMetadataRow] = [] - orig_metadata: List[Dict] = [] + orig_metadata: List[OriginIntrinsicMetadataRow] = [] revs_to_delete: List[Dict] = [] origs_to_delete: List[Dict] = [] summary: Dict = {} for (orig_item, rev_item) in results: - assert rev_item.metadata == orig_item["metadata"] + assert rev_item.metadata == orig_item.metadata if not rev_item.metadata or rev_item.metadata.keys() <= {"@context"}: # If we didn't find any metadata, don't store a DB record # (and delete existing ones, if any) @@ -363,7 +383,14 @@ } ) if orig_item not in origs_to_delete: - origs_to_delete.append(orig_item) + origs_to_delete.append( + { + "id": orig_item.id, + "indexer_configuration_id": ( + orig_item.indexer_configuration_id + ), + } + ) else: if rev_item not in rev_metadata: rev_metadata.append(rev_item) diff --git a/swh/indexer/storage/__init__.py b/swh/indexer/storage/__init__.py --- a/swh/indexer/storage/__init__.py +++ b/swh/indexer/storage/__init__.py @@ -6,7 +6,7 @@ from collections import Counter import json -from typing import Dict, Iterable, List, Optional, Tuple +from typing import Dict, Iterable, List, Optional, Tuple, Union import psycopg2 import psycopg2.pool @@ -551,25 +551,35 @@ @timed @db_transaction() - def origin_intrinsic_metadata_get(self, ids, db=None, cur=None): + def origin_intrinsic_metadata_get( + self, urls: Iterable[str], db=None, cur=None + ) -> List[OriginIntrinsicMetadataRow]: return [ - converters.db_to_metadata(dict(zip(db.origin_intrinsic_metadata_cols, c))) - for c in db.origin_intrinsic_metadata_get_from_list(ids, cur) + OriginIntrinsicMetadataRow.from_dict( + converters.db_to_metadata( + dict(zip(db.origin_intrinsic_metadata_cols, c)) + ) + ) + for c in db.origin_intrinsic_metadata_get_from_list(urls, cur) ] @timed @process_metrics @db_transaction() def origin_intrinsic_metadata_add( - self, metadata: List[Dict], conflict_update: bool = False, db=None, cur=None + self, + metadata: List[OriginIntrinsicMetadataRow], + conflict_update: bool = False, + db=None, + cur=None, ) -> Dict[str, int]: - check_id_duplicates(map(OriginIntrinsicMetadataRow.from_dict, metadata)) - metadata.sort(key=lambda m: m["id"]) + check_id_duplicates(metadata) + metadata.sort(key=lambda m: m.id) db.mktemp_origin_intrinsic_metadata(cur) db.copy_to( - metadata, + [m.to_dict() for m in metadata], "tmp_origin_intrinsic_metadata", ["id", "metadata", "indexer_configuration_id", "from_revision", "mappings"], cur, @@ -593,10 +603,14 @@ @timed @db_transaction() def origin_intrinsic_metadata_search_fulltext( - self, conjunction, limit=100, db=None, cur=None - ): + self, conjunction: List[str], limit: int = 100, db=None, cur=None + ) -> List[OriginIntrinsicMetadataRow]: return [ - converters.db_to_metadata(dict(zip(db.origin_intrinsic_metadata_cols, c))) + OriginIntrinsicMetadataRow.from_dict( + converters.db_to_metadata( + dict(zip(db.origin_intrinsic_metadata_cols, c)) + ) + ) for c in db.origin_intrinsic_metadata_search_fulltext( conjunction, limit=limit, cur=cur ) @@ -606,37 +620,40 @@ @db_transaction() def origin_intrinsic_metadata_search_by_producer( self, - page_token="", - limit=100, - ids_only=False, - mappings=None, - tool_ids=None, + page_token: str = "", + limit: int = 100, + ids_only: bool = False, + mappings: Optional[List[str]] = None, + tool_ids: Optional[List[int]] = None, db=None, cur=None, - ): + ) -> PagedResult[Union[str, OriginIntrinsicMetadataRow]]: assert isinstance(page_token, str) # we go to limit+1 to check whether we should add next_page_token in # the response - res = db.origin_intrinsic_metadata_search_by_producer( + rows = db.origin_intrinsic_metadata_search_by_producer( page_token, limit + 1, ids_only, mappings, tool_ids, cur ) - result = {} + next_page_token = None if ids_only: - result["origins"] = [origin for (origin,) in res] - if len(result["origins"]) > limit: - result["origins"][limit:] = [] - result["next_page_token"] = result["origins"][-1] + results = [origin for (origin,) in rows] + if len(results) > limit: + results[limit:] = [] + next_page_token = results[-1] else: - result["origins"] = [ - converters.db_to_metadata( - dict(zip(db.origin_intrinsic_metadata_cols, c)) + results = [ + OriginIntrinsicMetadataRow.from_dict( + converters.db_to_metadata( + dict(zip(db.origin_intrinsic_metadata_cols, row)) + ) ) - for c in res + for row in rows ] - if len(result["origins"]) > limit: - result["origins"][limit:] = [] - result["next_page_token"] = result["origins"][-1]["id"] - return result + if len(results) > limit: + results[limit:] = [] + next_page_token = results[-1].id + + return PagedResult(results=results, next_page_token=next_page_token,) @timed @db_transaction() diff --git a/swh/indexer/storage/in_memory.py b/swh/indexer/storage/in_memory.py --- a/swh/indexer/storage/in_memory.py +++ b/swh/indexer/storage/in_memory.py @@ -20,6 +20,7 @@ Tuple, Type, TypeVar, + Union, ) from swh.core.collections import SortedList @@ -397,22 +398,24 @@ deleted = self._revision_intrinsic_metadata.delete(entries) return {"revision_intrinsic_metadata:del": deleted} - def origin_intrinsic_metadata_get(self, ids): - return [obj.to_dict() for obj in self._origin_intrinsic_metadata.get(ids)] + def origin_intrinsic_metadata_get( + self, urls: Iterable[str] + ) -> List[OriginIntrinsicMetadataRow]: + return self._origin_intrinsic_metadata.get(urls) def origin_intrinsic_metadata_add( - self, metadata: List[Dict], conflict_update: bool = False + self, metadata: List[OriginIntrinsicMetadataRow], conflict_update: bool = False ) -> Dict[str, int]: - added = self._origin_intrinsic_metadata.add( - map(OriginIntrinsicMetadataRow.from_dict, metadata), conflict_update - ) + added = self._origin_intrinsic_metadata.add(metadata, conflict_update) return {"origin_intrinsic_metadata:add": added} def origin_intrinsic_metadata_delete(self, entries: List[Dict]) -> Dict: deleted = self._origin_intrinsic_metadata.delete(entries) return {"origin_intrinsic_metadata:del": deleted} - def origin_intrinsic_metadata_search_fulltext(self, conjunction, limit=100): + def origin_intrinsic_metadata_search_fulltext( + self, conjunction: List[str], limit: int = 100 + ) -> List[OriginIntrinsicMetadataRow]: # A very crude fulltext search implementation, but that's enough # to work on English metadata tokens_re = re.compile("[a-zA-Z0-9]+") @@ -442,18 +445,23 @@ results.sort( key=operator.itemgetter(0), reverse=True # Don't try to order 'data' ) - return [result.to_dict() for (rank_, result) in results[:limit]] + return [result for (rank_, result) in results[:limit]] def origin_intrinsic_metadata_search_by_producer( - self, page_token="", limit=100, ids_only=False, mappings=None, tool_ids=None - ): + self, + page_token: str = "", + limit: int = 100, + ids_only: bool = False, + mappings: Optional[List[str]] = None, + tool_ids: Optional[List[int]] = None, + ) -> PagedResult[Union[str, OriginIntrinsicMetadataRow]]: assert isinstance(page_token, str) nb_results = 0 if mappings is not None: - mappings = frozenset(mappings) + mapping_set = frozenset(mappings) if tool_ids is not None: - tool_ids = frozenset(tool_ids) - origins = [] + tool_id_set = frozenset(tool_ids) + rows = [] # we go to limit+1 to check whether we should add next_page_token in # the response @@ -462,21 +470,21 @@ continue if nb_results >= (limit + 1): break - if mappings is not None and mappings.isdisjoint(entry.mappings): + if mappings and mapping_set.isdisjoint(entry.mappings): continue - if tool_ids is not None and entry.tool["id"] not in tool_ids: + if tool_ids and entry.tool["id"] not in tool_id_set: continue - origins.append(entry.to_dict()) + rows.append(entry) nb_results += 1 - result = {} - if len(origins) > limit: - origins = origins[:limit] - result["next_page_token"] = origins[-1]["id"] + if len(rows) > limit: + rows = rows[:limit] + next_page_token = rows[-1].id + else: + next_page_token = None if ids_only: - origins = [origin["id"] for origin in origins] - result["origins"] = origins - return result + rows = [row.id for row in rows] + return PagedResult(results=rows, next_page_token=next_page_token,) def origin_intrinsic_metadata_stats(self): mapping_count = {m: 0 for m in MAPPING_NAMES} diff --git a/swh/indexer/storage/interface.py b/swh/indexer/storage/interface.py --- a/swh/indexer/storage/interface.py +++ b/swh/indexer/storage/interface.py @@ -3,7 +3,7 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from typing import Dict, Iterable, List, Optional, Tuple, TypeVar +from typing import Dict, Iterable, List, Optional, Tuple, TypeVar, Union from swh.core.api import remote_api_endpoint from swh.core.api.classes import PagedResult as CorePagedResult @@ -13,6 +13,7 @@ ContentLicenseRow, ContentMetadataRow, ContentMimetypeRow, + OriginIntrinsicMetadataRow, RevisionIntrinsicMetadataRow, ) @@ -421,42 +422,26 @@ ... @remote_api_endpoint("origin_intrinsic_metadata") - def origin_intrinsic_metadata_get(self, ids): + def origin_intrinsic_metadata_get( + self, urls: Iterable[str] + ) -> List[OriginIntrinsicMetadataRow]: """Retrieve origin metadata per id. Args: - ids (iterable): origin identifiers - - Yields: - list: dictionaries with the following keys: - - - **id** (str): origin url - - **from_revision** (bytes): which revision this metadata - was extracted from - - **metadata** (str): associated metadata - - **tool** (dict): tool used to compute metadata - - **mappings** (List[str]): list of mappings used to translate - these metadata + urls (iterable): origin URLs + Returns: list of OriginIntrinsicMetadataRow """ ... @remote_api_endpoint("origin_intrinsic_metadata/add") def origin_intrinsic_metadata_add( - self, metadata: List[Dict], conflict_update: bool = False + self, metadata: List[OriginIntrinsicMetadataRow], conflict_update: bool = False ) -> Dict[str, int]: """Add origin metadata not present in storage. Args: - metadata (iterable): dictionaries with keys: - - - **id**: origin urls - - **from_revision**: sha1 id of the revision used to generate - these metadata. - - **metadata**: arbitrary dict - - **indexer_configuration_id**: tool used to compute metadata - - **mappings** (List[str]): list of mappings used to translate - these metadata + metadata: list of OriginIntrinsicMetadataRow objects conflict_update: Flag to determine if we want to overwrite (true) or skip duplicates (false, the default) @@ -484,31 +469,30 @@ ... @remote_api_endpoint("origin_intrinsic_metadata/search/fulltext") - def origin_intrinsic_metadata_search_fulltext(self, conjunction, limit=100): + def origin_intrinsic_metadata_search_fulltext( + self, conjunction: List[str], limit: int = 100 + ) -> List[OriginIntrinsicMetadataRow]: """Returns the list of origins whose metadata contain all the terms. Args: - conjunction (List[str]): List of terms to be searched for. - limit (int): The maximum number of results to return + conjunction: List of terms to be searched for. + limit: The maximum number of results to return - Yields: - list: dictionaries with the following keys: - - - **id** (str): origin urls - - **from_revision**: sha1 id of the revision used to generate - these metadata. - - **metadata** (str): associated metadata - - **tool** (dict): tool used to compute metadata - - **mappings** (List[str]): list of mappings used to translate - these metadata + Returns: + list of OriginIntrinsicMetadataRow """ ... @remote_api_endpoint("origin_intrinsic_metadata/search/by_producer") def origin_intrinsic_metadata_search_by_producer( - self, page_token="", limit=100, ids_only=False, mappings=None, tool_ids=None - ): + self, + page_token: str = "", + limit: int = 100, + ids_only: bool = False, + mappings: Optional[List[str]] = None, + tool_ids: Optional[List[int]] = None, + ) -> PagedResult[Union[str, OriginIntrinsicMetadataRow]]: """Returns the list of origins whose metadata contain all the terms. Args: @@ -520,20 +504,7 @@ were generated using at least one of these mappings. Returns: - dict: dict with the following keys: - - **next_page_token** (str, optional): opaque token to be used as - `page_token` for retrieving the next page. If absent, there is - no more pages to gather. - - **origins** (list): list of origin url (str) if `ids_only=True` - else dictionaries with the following keys: - - - **id** (str): origin urls - - **from_revision**: sha1 id of the revision used to generate - these metadata. - - **metadata** (str): associated metadata - - **tool** (dict): tool used to compute metadata - - **mappings** (List[str]): list of mappings used to translate - these metadata + OriginIntrinsicMetadataRow objects """ ... diff --git a/swh/indexer/storage/model.py b/swh/indexer/storage/model.py --- a/swh/indexer/storage/model.py +++ b/swh/indexer/storage/model.py @@ -21,7 +21,7 @@ class BaseRow: UNIQUE_KEY_FIELDS: Tuple = ("id", "indexer_configuration_id") - id = attr.ib(type=Sha1Git) + id = attr.ib(type=Any) indexer_configuration_id = attr.ib(type=Optional[int], default=None, kw_only=True) tool = attr.ib(type=Optional[Dict], default=None, kw_only=True) @@ -64,12 +64,14 @@ @attr.s class ContentMimetypeRow(BaseRow): + id = attr.ib(type=Sha1Git) mimetype = attr.ib(type=str) encoding = attr.ib(type=str) @attr.s class ContentLanguageRow(BaseRow): + id = attr.ib(type=Sha1Git) lang = attr.ib(type=str) @@ -84,6 +86,7 @@ "lang", ) + id = attr.ib(type=Sha1Git) name = attr.ib(type=str) kind = attr.ib(type=str) line = attr.ib(type=int) @@ -94,22 +97,26 @@ class ContentLicenseRow(BaseRow): UNIQUE_KEY_FIELDS = ("id", "indexer_configuration_id", "license") + id = attr.ib(type=Sha1Git) license = attr.ib(type=str) @attr.s class ContentMetadataRow(BaseRow): + id = attr.ib(type=Sha1Git) metadata = attr.ib(type=Dict[str, Any]) @attr.s class RevisionIntrinsicMetadataRow(BaseRow): + id = attr.ib(type=Sha1Git) metadata = attr.ib(type=Dict[str, Any]) mappings = attr.ib(type=List[str]) @attr.s class OriginIntrinsicMetadataRow(BaseRow): + id = attr.ib(type=str) metadata = attr.ib(type=Dict[str, Any]) from_revision = attr.ib(type=Sha1Git) mappings = attr.ib(type=List[str]) diff --git a/swh/indexer/tests/storage/test_storage.py b/swh/indexer/tests/storage/test_storage.py --- a/swh/indexer/tests/storage/test_storage.py +++ b/swh/indexer/tests/storage/test_storage.py @@ -6,13 +6,13 @@ import inspect import math import threading -from typing import Any, Dict, List, Tuple, Union +from typing import Any, Dict, List, Tuple, Union, cast import attr import pytest from swh.indexer.storage.exc import DuplicateId, IndexerStorageArgumentException -from swh.indexer.storage.interface import IndexerStorageInterface +from swh.indexer.storage.interface import IndexerStorageInterface, PagedResult from swh.indexer.storage.model import ( BaseRow, ContentCtagsRow, @@ -20,6 +20,7 @@ ContentLicenseRow, ContentMetadataRow, ContentMimetypeRow, + OriginIntrinsicMetadataRow, RevisionIntrinsicMetadataRow, ) from swh.model.hashutil import hash_to_bytes @@ -1148,13 +1149,13 @@ mappings=["mapping1"], indexer_configuration_id=tool_id, ) - metadata_origin = { - "id": data.origin_url_1, - "metadata": metadata, - "indexer_configuration_id": tool_id, - "mappings": ["mapping1"], - "from_revision": data.revision_id_2, - } + metadata_origin = OriginIntrinsicMetadataRow( + id=data.origin_url_1, + metadata=metadata, + indexer_configuration_id=tool_id, + mappings=["mapping1"], + from_revision=data.revision_id_2, + ) # when storage.revision_intrinsic_metadata_add([metadata_rev]) @@ -1166,13 +1167,13 @@ ) expected_metadata = [ - { - "id": data.origin_url_1, - "metadata": metadata, - "tool": data.tools["swh-metadata-detector"], - "from_revision": data.revision_id_2, - "mappings": ["mapping1"], - } + OriginIntrinsicMetadataRow( + id=data.origin_url_1, + metadata=metadata, + tool=data.tools["swh-metadata-detector"], + from_revision=data.revision_id_2, + mappings=["mapping1"], + ) ] assert actual_metadata == expected_metadata @@ -1194,15 +1195,14 @@ metadata=metadata, mappings=["mapping1"], ) - metadata_origin = { - "id": data.origin_url_1, - "metadata": metadata, - "indexer_configuration_id": tool_id, - "mappings": ["mapping1"], - "from_revision": data.revision_id_2, - } - metadata_origin2 = metadata_origin.copy() - metadata_origin2["id"] = data.origin_url_2 + metadata_origin = OriginIntrinsicMetadataRow( + id=data.origin_url_1, + metadata=metadata, + indexer_configuration_id=tool_id, + mappings=["mapping1"], + from_revision=data.revision_id_2, + ) + metadata_origin2 = attr.evolve(metadata_origin, id=data.origin_url_2) # when storage.revision_intrinsic_metadata_add([metadata_rev]) @@ -1218,9 +1218,10 @@ [data.origin_url_1, data.origin_url_2, "no://where"] ) ) - for item in actual_metadata: - item["indexer_configuration_id"] = item.pop("tool")["id"] - assert actual_metadata == [metadata_origin2] + assert [ + attr.evolve(m, indexer_configuration_id=cast(Dict, m.tool)["id"], tool=None) + for m in actual_metadata + ] == [metadata_origin2] def test_origin_intrinsic_metadata_delete_nonexisting( self, swh_indexer_storage_with_data: Tuple[IndexerStorageInterface, Any] @@ -1248,13 +1249,13 @@ mappings=[], indexer_configuration_id=tool_id, ) - metadata_origin_v1 = { - "id": data.origin_url_1, - "metadata": metadata_v1.copy(), - "indexer_configuration_id": tool_id, - "mappings": [], - "from_revision": data.revision_id_1, - } + metadata_origin_v1 = OriginIntrinsicMetadataRow( + id=data.origin_url_1, + metadata=metadata_v1.copy(), + indexer_configuration_id=tool_id, + mappings=[], + from_revision=data.revision_id_1, + ) # given storage.revision_intrinsic_metadata_add([metadata_rev_v1]) @@ -1266,13 +1267,13 @@ ) expected_metadata_v1 = [ - { - "id": data.origin_url_1, - "metadata": metadata_v1, - "tool": data.tools["swh-metadata-detector"], - "from_revision": data.revision_id_1, - "mappings": [], - } + OriginIntrinsicMetadataRow( + id=data.origin_url_1, + metadata=metadata_v1, + tool=data.tools["swh-metadata-detector"], + from_revision=data.revision_id_1, + mappings=[], + ) ] assert actual_metadata == expected_metadata_v1 @@ -1283,8 +1284,7 @@ {"name": "test_metadata", "author": "MG",} ) metadata_rev_v2 = attr.evolve(metadata_rev_v1, metadata=metadata_v2) - metadata_origin_v2 = metadata_origin_v1.copy() - metadata_origin_v2["metadata"] = metadata_v2 + metadata_origin_v2 = attr.evolve(metadata_origin_v1, metadata=metadata_v2) storage.revision_intrinsic_metadata_add([metadata_rev_v2]) storage.origin_intrinsic_metadata_add([metadata_origin_v2]) @@ -1314,13 +1314,13 @@ mappings=[], indexer_configuration_id=tool_id, ) - metadata_origin_v1 = { - "id": data.origin_url_1, - "metadata": metadata_v1.copy(), - "indexer_configuration_id": tool_id, - "mappings": [], - "from_revision": data.revision_id_2, - } + metadata_origin_v1 = OriginIntrinsicMetadataRow( + id=data.origin_url_1, + metadata=metadata_v1.copy(), + indexer_configuration_id=tool_id, + mappings=[], + from_revision=data.revision_id_2, + ) # given storage.revision_intrinsic_metadata_add([metadata_rev_v1]) @@ -1333,13 +1333,13 @@ # then expected_metadata_v1 = [ - { - "id": data.origin_url_1, - "metadata": metadata_v1, - "tool": data.tools["swh-metadata-detector"], - "from_revision": data.revision_id_2, - "mappings": [], - } + OriginIntrinsicMetadataRow( + id=data.origin_url_1, + metadata=metadata_v1, + tool=data.tools["swh-metadata-detector"], + from_revision=data.revision_id_2, + mappings=[], + ) ] assert actual_metadata == expected_metadata_v1 @@ -1349,14 +1349,13 @@ {"name": "test_update_duplicated_metadata", "author": "MG",} ) metadata_rev_v2 = attr.evolve(metadata_rev_v1, metadata=metadata_v2) - metadata_origin_v2 = metadata_origin_v1.copy() - metadata_origin_v2 = { - "id": data.origin_url_1, - "metadata": metadata_v2.copy(), - "indexer_configuration_id": tool_id, - "mappings": ["npm"], - "from_revision": data.revision_id_1, - } + metadata_origin_v2 = OriginIntrinsicMetadataRow( + id=data.origin_url_1, + metadata=metadata_v2.copy(), + indexer_configuration_id=tool_id, + mappings=["npm"], + from_revision=data.revision_id_1, + ) storage.revision_intrinsic_metadata_add([metadata_rev_v2], conflict_update=True) storage.origin_intrinsic_metadata_add( @@ -1368,13 +1367,13 @@ ) expected_metadata_v2 = [ - { - "id": data.origin_url_1, - "metadata": metadata_v2, - "tool": data.tools["swh-metadata-detector"], - "from_revision": data.revision_id_1, - "mappings": ["npm"], - } + OriginIntrinsicMetadataRow( + id=data.origin_url_1, + metadata=metadata_v2, + tool=data.tools["swh-metadata-detector"], + from_revision=data.revision_id_1, + mappings=["npm"], + ) ] # metadata did change as the v2 was used to overwrite v1 @@ -1389,11 +1388,11 @@ ids = list(range(10)) - example_data1 = { + example_data1: Dict[str, Any] = { "metadata": {"version": None, "name": None,}, "mappings": [], } - example_data2 = { + example_data2: Dict[str, Any] = { "metadata": {"version": "v1.1.1", "name": "foo",}, "mappings": [], } @@ -1406,21 +1405,21 @@ ) data_v1 = [ - { - "id": "file:///tmp/origin%d" % id_, - "from_revision": data.revision_id_2, + OriginIntrinsicMetadataRow( + id="file:///tmp/origin%d" % id_, + from_revision=data.revision_id_2, + indexer_configuration_id=tool_id, **example_data1, - "indexer_configuration_id": tool_id, - } + ) for id_ in ids ] data_v2 = [ - { - "id": "file:///tmp/origin%d" % id_, - "from_revision": data.revision_id_2, + OriginIntrinsicMetadataRow( + id="file:///tmp/origin%d" % id_, + from_revision=data.revision_id_2, + indexer_configuration_id=tool_id, **example_data2, - "indexer_configuration_id": tool_id, - } + ) for id_ in ids ] @@ -1438,12 +1437,12 @@ actual_data = list(storage.origin_intrinsic_metadata_get(origins)) expected_data_v1 = [ - { - "id": "file:///tmp/origin%d" % id_, - "from_revision": data.revision_id_2, + OriginIntrinsicMetadataRow( + id="file:///tmp/origin%d" % id_, + from_revision=data.revision_id_2, + tool=data.tools["swh-metadata-detector"], **example_data1, - "tool": data.tools["swh-metadata-detector"], - } + ) for id_ in ids ] @@ -1468,17 +1467,17 @@ actual_data = list(storage.origin_intrinsic_metadata_get(origins)) expected_data_v2 = [ - { - "id": "file:///tmp/origin%d" % id_, - "from_revision": data.revision_id_2, + OriginIntrinsicMetadataRow( + id="file:///tmp/origin%d" % id_, + from_revision=data.revision_id_2, + tool=data.tools["swh-metadata-detector"], **example_data2, - "tool": data.tools["swh-metadata-detector"], - } + ) for id_ in ids ] assert len(actual_data) == len(expected_data_v2) - assert sorted(actual_data, key=lambda x: x["id"]) == expected_data_v2 + assert sorted(actual_data, key=lambda x: x.id) == expected_data_v2 def test_origin_intrinsic_metadata_add__duplicate_twice( self, swh_indexer_storage_with_data: Tuple[IndexerStorageInterface, Any] @@ -1497,13 +1496,13 @@ mappings=["mapping1"], indexer_configuration_id=tool_id, ) - metadata_origin = { - "id": data.origin_url_1, - "metadata": metadata, - "indexer_configuration_id": tool_id, - "mappings": ["mapping1"], - "from_revision": data.revision_id_2, - } + metadata_origin = OriginIntrinsicMetadataRow( + id=data.origin_url_1, + metadata=metadata, + indexer_configuration_id=tool_id, + mappings=["mapping1"], + from_revision=data.revision_id_2, + ) # when storage.revision_intrinsic_metadata_add([metadata_rev]) @@ -1527,13 +1526,13 @@ mappings=[], indexer_configuration_id=tool_id, ) - metadata1_origin = { - "id": data.origin_url_1, - "metadata": metadata1, - "mappings": [], - "indexer_configuration_id": tool_id, - "from_revision": data.revision_id_1, - } + metadata1_origin = OriginIntrinsicMetadataRow( + id=data.origin_url_1, + metadata=metadata1, + mappings=[], + indexer_configuration_id=tool_id, + from_revision=data.revision_id_1, + ) metadata2 = { "author": "Jane Doe", } @@ -1543,13 +1542,13 @@ mappings=[], indexer_configuration_id=tool_id, ) - metadata2_origin = { - "id": data.origin_url_2, - "metadata": metadata2, - "mappings": [], - "indexer_configuration_id": tool_id, - "from_revision": data.revision_id_2, - } + metadata2_origin = OriginIntrinsicMetadataRow( + id=data.origin_url_2, + metadata=metadata2, + mappings=[], + indexer_configuration_id=tool_id, + from_revision=data.revision_id_2, + ) # when storage.revision_intrinsic_metadata_add([metadata1_rev]) @@ -1559,11 +1558,11 @@ # then search = storage.origin_intrinsic_metadata_search_fulltext - assert set([res["id"] for res in search(["Doe"])]) == set( + assert set([res.id for res in search(["Doe"])]) == set( [data.origin_url_1, data.origin_url_2] ) - assert [res["id"] for res in search(["John", "Doe"])] == [data.origin_url_1] - assert [res["id"] for res in search(["John"])] == [data.origin_url_1] + assert [res.id for res in search(["John", "Doe"])] == [data.origin_url_1] + assert [res.id for res in search(["John"])] == [data.origin_url_1] assert not list(search(["John", "Jane"])) def test_origin_intrinsic_metadata_search_fulltext_rank( @@ -1584,13 +1583,13 @@ mappings=[], indexer_configuration_id=tool_id, ) - metadata1_origin = { - "id": data.origin_url_1, - "metadata": metadata1, - "mappings": [], - "indexer_configuration_id": tool_id, - "from_revision": data.revision_id_1, - } + metadata1_origin = OriginIntrinsicMetadataRow( + id=data.origin_url_1, + metadata=metadata1, + mappings=[], + indexer_configuration_id=tool_id, + from_revision=data.revision_id_1, + ) metadata2 = {"author": ["Random Person", "Jane Doe",]} metadata2_rev = RevisionIntrinsicMetadataRow( id=data.revision_id_2, @@ -1598,13 +1597,13 @@ mappings=[], indexer_configuration_id=tool_id, ) - metadata2_origin = { - "id": data.origin_url_2, - "metadata": metadata2, - "mappings": [], - "indexer_configuration_id": tool_id, - "from_revision": data.revision_id_2, - } + metadata2_origin = OriginIntrinsicMetadataRow( + id=data.origin_url_2, + metadata=metadata2, + mappings=[], + indexer_configuration_id=tool_id, + from_revision=data.revision_id_2, + ) # when storage.revision_intrinsic_metadata_add([metadata1_rev]) @@ -1614,17 +1613,17 @@ # then search = storage.origin_intrinsic_metadata_search_fulltext - assert [res["id"] for res in search(["Doe"])] == [ + assert [res.id for res in search(["Doe"])] == [ data.origin_url_1, data.origin_url_2, ] - assert [res["id"] for res in search(["Doe"], limit=1)] == [data.origin_url_1] - assert [res["id"] for res in search(["John"])] == [data.origin_url_1] - assert [res["id"] for res in search(["Jane"])] == [ + assert [res.id for res in search(["Doe"], limit=1)] == [data.origin_url_1] + assert [res.id for res in search(["John"])] == [data.origin_url_1] + assert [res.id for res in search(["Jane"])] == [ data.origin_url_2, data.origin_url_1, ] - assert [res["id"] for res in search(["John", "Jane"])] == [data.origin_url_1] + assert [res.id for res in search(["John", "Jane"])] == [data.origin_url_1] def _fill_origin_intrinsic_metadata( self, swh_indexer_storage_with_data: Tuple[IndexerStorageInterface, Any] @@ -1643,13 +1642,13 @@ mappings=["npm"], indexer_configuration_id=tool1_id, ) - metadata1_origin = { - "id": data.origin_url_1, - "metadata": metadata1, - "mappings": ["npm"], - "indexer_configuration_id": tool1_id, - "from_revision": data.revision_id_1, - } + metadata1_origin = OriginIntrinsicMetadataRow( + id=data.origin_url_1, + metadata=metadata1, + mappings=["npm"], + indexer_configuration_id=tool1_id, + from_revision=data.revision_id_1, + ) metadata2 = { "@context": "foo", "author": "Jane Doe", @@ -1660,13 +1659,13 @@ mappings=["npm", "gemspec"], indexer_configuration_id=tool2_id, ) - metadata2_origin = { - "id": data.origin_url_2, - "metadata": metadata2, - "mappings": ["npm", "gemspec"], - "indexer_configuration_id": tool2_id, - "from_revision": data.revision_id_2, - } + metadata2_origin = OriginIntrinsicMetadataRow( + id=data.origin_url_2, + metadata=metadata2, + mappings=["npm", "gemspec"], + indexer_configuration_id=tool2_id, + from_revision=data.revision_id_2, + ) metadata3 = { "@context": "foo", } @@ -1676,13 +1675,13 @@ mappings=["npm", "gemspec"], indexer_configuration_id=tool2_id, ) - metadata3_origin = { - "id": data.origin_url_3, - "metadata": metadata3, - "mappings": ["pkg-info"], - "indexer_configuration_id": tool2_id, - "from_revision": data.revision_id_3, - } + metadata3_origin = OriginIntrinsicMetadataRow( + id=data.origin_url_3, + metadata=metadata3, + mappings=["pkg-info"], + indexer_configuration_id=tool2_id, + from_revision=data.revision_id_3, + ) storage.revision_intrinsic_metadata_add([metadata1_rev]) storage.origin_intrinsic_metadata_add([metadata1_origin]) @@ -1703,93 +1702,92 @@ # test pagination # no 'page_token' param, return all origins result = endpoint(ids_only=True) - assert result["origins"] == [ - data.origin_url_1, - data.origin_url_2, - data.origin_url_3, - ] - assert "next_page_token" not in result + assert result == PagedResult( + results=[data.origin_url_1, data.origin_url_2, data.origin_url_3,], + next_page_token=None, + ) # 'page_token' is < than origin_1, return everything result = endpoint(page_token=data.origin_url_1[:-1], ids_only=True) - assert result["origins"] == [ - data.origin_url_1, - data.origin_url_2, - data.origin_url_3, - ] - assert "next_page_token" not in result + assert result == PagedResult( + results=[data.origin_url_1, data.origin_url_2, data.origin_url_3,], + next_page_token=None, + ) # 'page_token' is origin_3, return nothing result = endpoint(page_token=data.origin_url_3, ids_only=True) - assert not result["origins"] - assert "next_page_token" not in result + assert result == PagedResult(results=[], next_page_token=None) # test limit argument result = endpoint(page_token=data.origin_url_1[:-1], limit=2, ids_only=True) - assert result["origins"] == [data.origin_url_1, data.origin_url_2] - assert result["next_page_token"] == result["origins"][-1] + assert result == PagedResult( + results=[data.origin_url_1, data.origin_url_2], + next_page_token=data.origin_url_2, + ) result = endpoint(page_token=data.origin_url_1, limit=2, ids_only=True) - assert result["origins"] == [data.origin_url_2, data.origin_url_3] - assert "next_page_token" not in result + assert result == PagedResult( + results=[data.origin_url_2, data.origin_url_3], next_page_token=None, + ) result = endpoint(page_token=data.origin_url_2, limit=2, ids_only=True) - assert result["origins"] == [data.origin_url_3] - assert "next_page_token" not in result + assert result == PagedResult(results=[data.origin_url_3], next_page_token=None,) # test mappings filtering result = endpoint(mappings=["npm"], ids_only=True) - assert result["origins"] == [data.origin_url_1, data.origin_url_2] - assert "next_page_token" not in result + assert result == PagedResult( + results=[data.origin_url_1, data.origin_url_2], next_page_token=None, + ) result = endpoint(mappings=["npm", "gemspec"], ids_only=True) - assert result["origins"] == [data.origin_url_1, data.origin_url_2] - assert "next_page_token" not in result + assert result == PagedResult( + results=[data.origin_url_1, data.origin_url_2], next_page_token=None, + ) result = endpoint(mappings=["gemspec"], ids_only=True) - assert result["origins"] == [data.origin_url_2] - assert "next_page_token" not in result + assert result == PagedResult(results=[data.origin_url_2], next_page_token=None,) result = endpoint(mappings=["pkg-info"], ids_only=True) - assert result["origins"] == [data.origin_url_3] - assert "next_page_token" not in result + assert result == PagedResult(results=[data.origin_url_3], next_page_token=None,) result = endpoint(mappings=["foobar"], ids_only=True) - assert not result["origins"] - assert "next_page_token" not in result + assert result == PagedResult(results=[], next_page_token=None,) # test pagination + mappings result = endpoint(mappings=["npm"], limit=1, ids_only=True) - assert result["origins"] == [data.origin_url_1] - assert result["next_page_token"] == result["origins"][-1] + assert result == PagedResult( + results=[data.origin_url_1], next_page_token=data.origin_url_1, + ) # test tool filtering result = endpoint(tool_ids=[tool1["id"]], ids_only=True) - assert result["origins"] == [data.origin_url_1] - assert "next_page_token" not in result + assert result == PagedResult(results=[data.origin_url_1], next_page_token=None,) result = endpoint(tool_ids=[tool2["id"]], ids_only=True) - assert sorted(result["origins"]) == [data.origin_url_2, data.origin_url_3] - assert "next_page_token" not in result + assert sorted(result.results) == [data.origin_url_2, data.origin_url_3] + assert result.next_page_token is None result = endpoint(tool_ids=[tool1["id"], tool2["id"]], ids_only=True) - assert sorted(result["origins"]) == [ + assert sorted(result.results) == [ data.origin_url_1, data.origin_url_2, data.origin_url_3, ] - assert "next_page_token" not in result + assert result.next_page_token is None # test ids_only=False - assert endpoint(mappings=["gemspec"])["origins"] == [ - { - "id": data.origin_url_2, - "metadata": {"@context": "foo", "author": "Jane Doe",}, - "mappings": ["npm", "gemspec"], - "tool": tool2, - "from_revision": data.revision_id_2, - } - ] + assert endpoint(mappings=["gemspec"]) == PagedResult( + results=[ + OriginIntrinsicMetadataRow( + id=data.origin_url_2, + metadata={"@context": "foo", "author": "Jane Doe",}, + mappings=["npm", "gemspec"], + tool=tool2, + from_revision=data.revision_id_2, + ) + ], + next_page_token=None, + ) def test_origin_intrinsic_metadata_stats( self, swh_indexer_storage_with_data: Tuple[IndexerStorageInterface, Any] diff --git a/swh/indexer/tests/test_cli.py b/swh/indexer/tests/test_cli.py --- a/swh/indexer/tests/test_cli.py +++ b/swh/indexer/tests/test_cli.py @@ -14,7 +14,10 @@ from swh.indexer.cli import indexer_cli_group from swh.indexer.storage.interface import IndexerStorageInterface -from swh.indexer.storage.model import RevisionIntrinsicMetadataRow +from swh.indexer.storage.model import ( + OriginIntrinsicMetadataRow, + RevisionIntrinsicMetadataRow, +) from swh.journal.serializers import value_to_kafka from swh.model.hashutil import hash_to_bytes @@ -38,13 +41,13 @@ tools = idx_storage.indexer_configuration_add(tools) origin_metadata = [ - { - "id": "file://dev/%04d" % origin_id, - "from_revision": hash_to_bytes("abcd{:0>4}".format(origin_id)), - "indexer_configuration_id": tools[origin_id % 2]["id"], - "metadata": {"name": "origin %d" % origin_id}, - "mappings": ["mapping%d" % (origin_id % 10)], - } + OriginIntrinsicMetadataRow( + id="file://dev/%04d" % origin_id, + from_revision=hash_to_bytes("abcd{:0>4}".format(origin_id)), + indexer_configuration_id=tools[origin_id % 2]["id"], + metadata={"name": "origin %d" % origin_id}, + mappings=["mapping%d" % (origin_id % 10)], + ) for origin_id in range(nb_rows) ] revision_metadata = [ diff --git a/swh/indexer/tests/test_origin_metadata.py b/swh/indexer/tests/test_origin_metadata.py --- a/swh/indexer/tests/test_origin_metadata.py +++ b/swh/indexer/tests/test_origin_metadata.py @@ -7,7 +7,10 @@ from swh.indexer.metadata import OriginMetadataIndexer from swh.indexer.storage.interface import IndexerStorageInterface -from swh.indexer.storage.model import RevisionIntrinsicMetadataRow +from swh.indexer.storage.model import ( + OriginIntrinsicMetadataRow, + RevisionIntrinsicMetadataRow, +) from swh.model.model import Origin from swh.storage.interface import StorageInterface @@ -33,13 +36,13 @@ rev_metadata = RevisionIntrinsicMetadataRow( id=rev_id, tool=tool, metadata=YARN_PARSER_METADATA, mappings=["npm"], ) - origin_metadata = { - "id": origin, - "tool": tool, - "from_revision": rev_id, - "metadata": YARN_PARSER_METADATA, - "mappings": ["npm"], - } + origin_metadata = OriginIntrinsicMetadataRow( + id=origin, + tool=tool, + from_revision=rev_id, + metadata=YARN_PARSER_METADATA, + mappings=["npm"], + ) rev_results = list(indexer.idx_storage.revision_intrinsic_metadata_get([rev_id])) for rev_result in rev_results: @@ -49,7 +52,8 @@ orig_results = list(indexer.idx_storage.origin_intrinsic_metadata_get([origin])) for orig_result in orig_results: - del orig_result["tool"]["id"] + assert orig_result.tool + del orig_result.tool["id"] assert orig_results == [origin_metadata] @@ -65,11 +69,11 @@ origin = "https://github.com/librariesio/yarn-parser" rev_id = REVISION.id - results = list(indexer.idx_storage.revision_intrinsic_metadata_get([rev_id])) - assert len(results) == 1 + rev_results = list(indexer.idx_storage.revision_intrinsic_metadata_get([rev_id])) + assert len(rev_results) == 1 - results = list(indexer.idx_storage.origin_intrinsic_metadata_get([origin])) - assert len(results) == 1 + orig_results = list(indexer.idx_storage.origin_intrinsic_metadata_get([origin])) + assert len(orig_results) == 1 def test_origin_metadata_indexer_missing_head( @@ -112,14 +116,14 @@ indexer.idx_storage.origin_intrinsic_metadata_get([origin1, origin2]) ) for orig_result in orig_results: - del orig_result["tool"] assert orig_results == [ - { - "id": origin2, - "from_revision": rev_id, - "metadata": YARN_PARSER_METADATA, - "mappings": ["npm"], - } + OriginIntrinsicMetadataRow( + id=origin2, + from_revision=rev_id, + metadata=YARN_PARSER_METADATA, + mappings=["npm"], + tool=orig_results[0].tool, + ) ] @@ -136,13 +140,13 @@ rev_id = REVISION.id - results = list(indexer.idx_storage.revision_intrinsic_metadata_get([rev_id])) - assert len(results) == 1 + rev_results = list(indexer.idx_storage.revision_intrinsic_metadata_get([rev_id])) + assert len(rev_results) == 1 - results = list( + orig_results = list( indexer.idx_storage.origin_intrinsic_metadata_get([origin1, origin2]) ) - assert len(results) == 2 + assert len(orig_results) == 2 def test_origin_metadata_indexer_no_metadata_file( @@ -156,11 +160,11 @@ rev_id = REVISION.id - results = list(indexer.idx_storage.revision_intrinsic_metadata_get([rev_id])) - assert results == [] + rev_results = list(indexer.idx_storage.revision_intrinsic_metadata_get([rev_id])) + assert rev_results == [] - results = list(indexer.idx_storage.origin_intrinsic_metadata_get([origin])) - assert results == [] + orig_results = list(indexer.idx_storage.origin_intrinsic_metadata_get([origin])) + assert orig_results == [] def test_origin_metadata_indexer_no_metadata( @@ -178,11 +182,11 @@ rev_id = REVISION.id - results = list(indexer.idx_storage.revision_intrinsic_metadata_get([rev_id])) - assert results == [] + rev_results = list(indexer.idx_storage.revision_intrinsic_metadata_get([rev_id])) + assert rev_results == [] - results = list(indexer.idx_storage.origin_intrinsic_metadata_get([origin])) - assert results == [] + orig_results = list(indexer.idx_storage.origin_intrinsic_metadata_get([origin])) + assert orig_results == [] def test_origin_metadata_indexer_error( @@ -200,11 +204,11 @@ rev_id = REVISION.id - results = list(indexer.idx_storage.revision_intrinsic_metadata_get([rev_id])) - assert results == [] + rev_results = list(indexer.idx_storage.revision_intrinsic_metadata_get([rev_id])) + assert rev_results == [] - results = list(indexer.idx_storage.origin_intrinsic_metadata_get([origin])) - assert results == [] + orig_results = list(indexer.idx_storage.origin_intrinsic_metadata_get([origin])) + assert orig_results == [] def test_origin_metadata_indexer_delete_metadata( @@ -217,20 +221,20 @@ rev_id = REVISION.id - results = list(indexer.idx_storage.revision_intrinsic_metadata_get([rev_id])) - assert results != [] + rev_results = list(indexer.idx_storage.revision_intrinsic_metadata_get([rev_id])) + assert rev_results != [] - results = list(indexer.idx_storage.origin_intrinsic_metadata_get([origin])) - assert results != [] + orig_results = list(indexer.idx_storage.origin_intrinsic_metadata_get([origin])) + assert orig_results != [] with patch("swh.indexer.metadata_dictionary.npm.NpmMapping.filename", b"foo.json"): indexer.run([origin]) - results = list(indexer.idx_storage.revision_intrinsic_metadata_get([rev_id])) - assert results == [] + rev_results = list(indexer.idx_storage.revision_intrinsic_metadata_get([rev_id])) + assert rev_results == [] - results = list(indexer.idx_storage.origin_intrinsic_metadata_get([origin])) - assert results == [] + orig_results = list(indexer.idx_storage.origin_intrinsic_metadata_get([origin])) + assert orig_results == [] def test_origin_metadata_indexer_unknown_origin(