diff --git a/swh/indexer/ctags.py b/swh/indexer/ctags.py --- a/swh/indexer/ctags.py +++ b/swh/indexer/ctags.py @@ -74,7 +74,7 @@ } -class CtagsIndexer(ContentIndexer): +class CtagsIndexer(ContentIndexer[Dict]): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.config = merge_configs(DEFAULT_CONFIG, self.config) diff --git a/swh/indexer/fossology_license.py b/swh/indexer/fossology_license.py --- a/swh/indexer/fossology_license.py +++ b/swh/indexer/fossology_license.py @@ -134,7 +134,7 @@ ) -class FossologyLicenseIndexer(MixinFossologyLicenseIndexer, ContentIndexer): +class FossologyLicenseIndexer(MixinFossologyLicenseIndexer, ContentIndexer[Dict]): """Indexer in charge of: - filtering out content already indexed @@ -154,7 +154,7 @@ class FossologyLicensePartitionIndexer( - MixinFossologyLicenseIndexer, ContentPartitionIndexer + MixinFossologyLicenseIndexer, ContentPartitionIndexer[Dict] ): """FossologyLicense Range Indexer working on range/partition of content identifiers. diff --git a/swh/indexer/indexer.py b/swh/indexer/indexer.py --- a/swh/indexer/indexer.py +++ b/swh/indexer/indexer.py @@ -9,7 +9,7 @@ import os import shutil import tempfile -from typing import Any, Dict, Iterator, List, Optional, Set, Union +from typing import Any, Dict, Generic, Iterator, List, Optional, Set, TypeVar, Union from swh.core import utils from swh.core.config import load_from_envvar, merge_configs @@ -57,7 +57,11 @@ } -class BaseIndexer(metaclass=abc.ABCMeta): +# TODO: should be bound=Optional[BaseRow] when all endpoints move away from dicts +TResult = TypeVar("TResult", bound=Union[None, Dict, BaseRow]) + + +class BaseIndexer(Generic[TResult], metaclass=abc.ABCMeta): """Base class for indexers to inherit from. The main entry point is the :func:`run` function which is in @@ -110,7 +114,7 @@ """ - results: List[Union[Dict, BaseRow]] + results: List[TResult] USE_TOOLS = True @@ -212,7 +216,7 @@ def index( self, id: Union[bytes, Dict, Revision], data: Optional[bytes] = None, **kwargs - ) -> Union[Dict[str, Any], BaseRow]: + ) -> TResult: """Index computation for the id and associated raw data. Args: @@ -257,7 +261,7 @@ return {} -class ContentIndexer(BaseIndexer): +class ContentIndexer(BaseIndexer[TResult], Generic[TResult]): """A content indexer working on a list of ids directly. To work on indexer partition, use the :class:`ContentPartitionIndexer` @@ -318,7 +322,7 @@ return summary -class ContentPartitionIndexer(BaseIndexer): +class ContentPartitionIndexer(BaseIndexer[TResult], Generic[TResult]): """A content partition indexer. This expects as input a partition_id and a nb_partitions. This will then index the @@ -386,7 +390,7 @@ def _index_contents( self, partition_id: int, nb_partitions: int, indexed: Set[Sha1], **kwargs: Any - ) -> Iterator[Union[BaseRow, Dict]]: + ) -> Iterator[TResult]: """Index the contents within the partition_id. Args: @@ -416,7 +420,7 @@ def _index_with_skipping_already_done( self, partition_id: int, nb_partitions: int - ) -> Iterator[Union[BaseRow, Dict]]: + ) -> Iterator[TResult]: """Index not already indexed contents within the partition partition_id Args: @@ -495,7 +499,7 @@ return summary -class OriginIndexer(BaseIndexer): +class OriginIndexer(BaseIndexer[TResult], Generic[TResult]): """An object type indexer, inherits from the :class:`BaseIndexer` and implements Origin indexing using the run method @@ -540,9 +544,7 @@ summary.update(summary_persist) return summary - def index_list( - self, origins: List[Any], **kwargs: Any - ) -> List[Union[Dict, BaseRow]]: + def index_list(self, origins: List[Any], **kwargs: Any) -> List[TResult]: results = [] for origin in origins: try: @@ -555,7 +557,7 @@ return results -class RevisionIndexer(BaseIndexer): +class RevisionIndexer(BaseIndexer[TResult], Generic[TResult]): """An object type indexer, inherits from the :class:`BaseIndexer` and implements Revision indexing using the run method diff --git a/swh/indexer/metadata.py b/swh/indexer/metadata.py --- a/swh/indexer/metadata.py +++ b/swh/indexer/metadata.py @@ -32,7 +32,7 @@ yield from f(list(group)) -class ContentMetadataIndexer(ContentIndexer): +class ContentMetadataIndexer(ContentIndexer[Dict]): """Content-level indexer This indexer is in charge of: @@ -111,7 +111,7 @@ } -class RevisionMetadataIndexer(RevisionIndexer): +class RevisionMetadataIndexer(RevisionIndexer[Dict]): """Revision-level indexer This indexer is in charge of: @@ -268,7 +268,6 @@ ) # on the fly possibility: for result in c_metadata_indexer.results: - assert isinstance(result, dict) # TODO: remove this local_metadata = result["metadata"] metadata.append(local_metadata) @@ -279,7 +278,7 @@ return (used_mappings, metadata) -class OriginMetadataIndexer(OriginIndexer): +class OriginMetadataIndexer(OriginIndexer[Dict]): USE_TOOLS = False def __init__(self, config=None, **kwargs) -> None: diff --git a/swh/indexer/mimetype.py b/swh/indexer/mimetype.py --- a/swh/indexer/mimetype.py +++ b/swh/indexer/mimetype.py @@ -112,7 +112,7 @@ ) -class MimetypeIndexer(MixinMimetypeIndexer, ContentIndexer): +class MimetypeIndexer(MixinMimetypeIndexer, ContentIndexer[ContentMimetypeRow]): """Mimetype Indexer working on list of content identifiers. It: @@ -134,7 +134,9 @@ ) -class MimetypePartitionIndexer(MixinMimetypeIndexer, ContentPartitionIndexer): +class MimetypePartitionIndexer( + MixinMimetypeIndexer, ContentPartitionIndexer[ContentMimetypeRow] +): """Mimetype Range Indexer working on range of content identifiers. It: diff --git a/swh/indexer/origin_head.py b/swh/indexer/origin_head.py --- a/swh/indexer/origin_head.py +++ b/swh/indexer/origin_head.py @@ -5,7 +5,7 @@ import logging import re -from typing import Any, Dict, List, Tuple, Union +from typing import Any, Dict, List, Optional, Tuple, Union import click @@ -15,7 +15,7 @@ from swh.storage.algos.snapshot import snapshot_get_all_branches -class OriginHeadIndexer(OriginIndexer): +class OriginHeadIndexer(OriginIndexer[Optional[Dict]]): """Origin-level indexer. This indexer is in charge of looking up the revision that acts as the