Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/indexer.py
Show First 20 Lines • Show All 282 Lines • ▼ Show 20 Lines | ) -> Dict: | ||||
respectively update duplicates or ignore | respectively update duplicates or ignore | ||||
them | them | ||||
**kwargs: passed to the `index` method | **kwargs: passed to the `index` method | ||||
Returns: | Returns: | ||||
A summary Dict of the task's status | A summary Dict of the task's status | ||||
""" | """ | ||||
status = "uneventful" | |||||
sha1s = [ | sha1s = [ | ||||
hashutil.hash_to_bytes(id_) if isinstance(id_, str) else id_ for id_ in ids | hashutil.hash_to_bytes(id_) if isinstance(id_, str) else id_ for id_ in ids | ||||
] | ] | ||||
results = [] | results = [] | ||||
summary: Dict = {} | summary: Dict = {"status": "uneventful"} | ||||
try: | try: | ||||
for sha1 in sha1s: | for sha1 in sha1s: | ||||
try: | try: | ||||
raw_content = self.objstorage.get(sha1) | raw_content = self.objstorage.get(sha1) | ||||
except ObjNotFoundError: | except ObjNotFoundError: | ||||
self.log.warning( | self.log.warning( | ||||
"Content %s not found in objstorage" | "Content %s not found in objstorage" | ||||
% hashutil.hash_to_hex(sha1) | % hashutil.hash_to_hex(sha1) | ||||
) | ) | ||||
continue | continue | ||||
res = self.index(sha1, raw_content, **kwargs) | res = self.index(sha1, raw_content, **kwargs) | ||||
if res: # If no results, skip it | if res: # If no results, skip it | ||||
results.append(res) | results.append(res) | ||||
status = "eventful" | summary["status"] = "eventful" | ||||
summary = self.persist_index_computations(results, policy_update) | summary = self.persist_index_computations(results, policy_update) | ||||
self.results = results | self.results = results | ||||
except Exception: | except Exception: | ||||
if not self.catch_exceptions: | if not self.catch_exceptions: | ||||
raise | raise | ||||
self.log.exception("Problem when reading contents metadata.") | self.log.exception("Problem when reading contents metadata.") | ||||
status = "failed" | summary["status"] = "failed" | ||||
finally: | |||||
summary["status"] = status | |||||
return summary | return summary | ||||
class ContentPartitionIndexer(BaseIndexer): | class ContentPartitionIndexer(BaseIndexer): | ||||
"""A content partition indexer. | """A content partition indexer. | ||||
This expects as input a partition_id and a nb_partitions. This will then index the | This expects as input a partition_id and a nb_partitions. This will then index the | ||||
contents within that partition. | contents within that partition. | ||||
▲ Show 20 Lines • Show All 282 Lines • Show Last 20 Lines |