diff --git a/swh/indexer/storage/__init__.py b/swh/indexer/storage/__init__.py --- a/swh/indexer/storage/__init__.py +++ b/swh/indexer/storage/__init__.py @@ -253,13 +253,6 @@ db=None, cur=None, ) -> Dict[str, int]: - """Add mimetypes to the storage (if conflict_update is True, this will - override existing data if any). - - Returns: - A dict with the number of new elements added to the storage. - - """ check_id_duplicates(mimetypes) mimetypes.sort(key=lambda m: m.id) db.mktemp_content_mimetype(cur) diff --git a/swh/indexer/storage/interface.py b/swh/indexer/storage/interface.py --- a/swh/indexer/storage/interface.py +++ b/swh/indexer/storage/interface.py @@ -78,7 +78,8 @@ """Add mimetypes not present in storage. Args: - mimetypes: mimetype rows to be added + mimetypes: mimetype rows to be added, with their `tool` attribute set to + not None. conflict_update: Flag to determine if we want to overwrite (``True``) or skip duplicates (``False``, the default) @@ -94,15 +95,10 @@ """Retrieve full content mimetype per ids. Args: - ids (iterable): sha1 identifier + ids: sha1 identifiers Yields: - mimetypes (iterable): dictionaries with keys: - - - **id** (bytes): sha1 identifier - - **mimetype** (bytes): raw content's mimetype - - **encoding** (bytes): raw content's encoding - - **tool** (dict): Tool used to compute the language + mimetype row objects """ ...