Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/mimetype.py
Show First 20 Lines • Show All 106 Lines • ▼ Show 20 Lines | ) -> Dict[str, int]: | ||||
respectively update duplicates or ignore them | respectively update duplicates or ignore them | ||||
""" | """ | ||||
return self.idx_storage.content_mimetype_add( | return self.idx_storage.content_mimetype_add( | ||||
results, conflict_update=(policy_update == "update-dups") | results, conflict_update=(policy_update == "update-dups") | ||||
) | ) | ||||
class MimetypeIndexer(MixinMimetypeIndexer, ContentIndexer): | class MimetypeIndexer(MixinMimetypeIndexer, ContentIndexer[ContentMimetypeRow]): | ||||
"""Mimetype Indexer working on list of content identifiers. | """Mimetype Indexer working on list of content identifiers. | ||||
It: | It: | ||||
- (optionally) filters out content already indexed (cf. | - (optionally) filters out content already indexed (cf. | ||||
:meth:`.filter`) | :meth:`.filter`) | ||||
- reads content from objstorage per the content's id (sha1) | - reads content from objstorage per the content's id (sha1) | ||||
- computes {mimetype, encoding} from that content | - computes {mimetype, encoding} from that content | ||||
- stores result in storage | - stores result in storage | ||||
""" | """ | ||||
def filter(self, ids): | def filter(self, ids): | ||||
"""Filter out known sha1s and return only missing ones. | """Filter out known sha1s and return only missing ones. | ||||
""" | """ | ||||
yield from self.idx_storage.content_mimetype_missing( | yield from self.idx_storage.content_mimetype_missing( | ||||
({"id": sha1, "indexer_configuration_id": self.tool["id"],} for sha1 in ids) | ({"id": sha1, "indexer_configuration_id": self.tool["id"],} for sha1 in ids) | ||||
) | ) | ||||
class MimetypePartitionIndexer(MixinMimetypeIndexer, ContentPartitionIndexer): | class MimetypePartitionIndexer( | ||||
MixinMimetypeIndexer, ContentPartitionIndexer[ContentMimetypeRow] | |||||
): | |||||
"""Mimetype Range Indexer working on range of content identifiers. | """Mimetype Range Indexer working on range of content identifiers. | ||||
It: | It: | ||||
- (optionally) filters out content already indexed (cf | - (optionally) filters out content already indexed (cf | ||||
:meth:`.indexed_contents_in_partition`) | :meth:`.indexed_contents_in_partition`) | ||||
- reads content from objstorage per the content's id (sha1) | - reads content from objstorage per the content's id (sha1) | ||||
- computes {mimetype, encoding} from that content | - computes {mimetype, encoding} from that content | ||||
Show All 22 Lines |