Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/mimetype.py
Show First 20 Lines • Show All 97 Lines • ▼ Show 20 Lines | def persist_index_computations(self, results, policy_update): | ||||
self.idx_storage.content_mimetype_add( | self.idx_storage.content_mimetype_add( | ||||
results, conflict_update=(policy_update == 'update-dups')) | results, conflict_update=(policy_update == 'update-dups')) | ||||
class ContentMimetypeIndexer(MixinMimetypeIndexer, ContentIndexer): | class ContentMimetypeIndexer(MixinMimetypeIndexer, ContentIndexer): | ||||
"""Mimetype Indexer working on list of content identifiers. | """Mimetype Indexer working on list of content identifiers. | ||||
It: | It: | ||||
- (optionally) filters out content already indexed (cf. :callable:`filter`) | - (optionally) filters out content already indexed | ||||
(cf. :func:`filter`) | |||||
- reads content from objstorage per the content's id (sha1) | - reads content from objstorage per the content's id (sha1) | ||||
- computes {mimetype, encoding} from that content | - computes {mimetype, encoding} from that content | ||||
- stores result in storage | - stores result in storage | ||||
FIXME: | FIXME: | ||||
- 1. Rename redundant ContentMimetypeIndexer to MimetypeIndexer | - 1. Rename redundant ContentMimetypeIndexer to MimetypeIndexer | ||||
- 2. Do we keep it afterwards? ~> i think this can be used with the journal | - 2. Do we keep it afterwards? ~> i think this can be used with the journal | ||||
Show All 9 Lines | def filter(self, ids): | ||||
} for sha1 in ids | } for sha1 in ids | ||||
)) | )) | ||||
class MimetypeRangeIndexer(MixinMimetypeIndexer, ContentRangeIndexer): | class MimetypeRangeIndexer(MixinMimetypeIndexer, ContentRangeIndexer): | ||||
"""Mimetype Range Indexer working on range of content identifiers. | """Mimetype Range Indexer working on range of content identifiers. | ||||
It: | It: | ||||
- (optionally) filters out content already indexed (cf :callable:`range`) | - (optionally) filters out content already indexed (cf | ||||
:func:`indexed_contents_in_range`) | |||||
- reads content from objstorage per the content's id (sha1) | - reads content from objstorage per the content's id (sha1) | ||||
- computes {mimetype, encoding} from that content | - computes {mimetype, encoding} from that content | ||||
- stores result in storage | - stores result in storage | ||||
""" | """ | ||||
def indexed_contents_in_range(self, start, end): | def indexed_contents_in_range(self, start, end): | ||||
"""Retrieve indexed content id within range [start, end]. | """Retrieve indexed content id within range [start, end]. | ||||
Show All 13 Lines |