Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/rehash.py
Show First 20 Lines • Show All 91 Lines • ▼ Show 20 Lines | def _read_content_ids(self, contents): | ||||
yield h | yield h | ||||
def get_new_contents_metadata(self, all_contents): | def get_new_contents_metadata(self, all_contents): | ||||
"""Retrieve raw contents and compute new checksums on the | """Retrieve raw contents and compute new checksums on the | ||||
contents. Unknown or corrupted contents are skipped. | contents. Unknown or corrupted contents are skipped. | ||||
Args: | Args: | ||||
all_contents ([dict]): List of contents as dictionary with | all_contents ([dict]): List of contents as dictionary with | ||||
the necessary primary keys | the necessary primary keys | ||||
checksum_algorithms ([str]): List of checksums to compute | checksum_algorithms ([str]): List of checksums to compute | ||||
Yields: | Yields: | ||||
tuple of: content to update, list of checksums computed | tuple: tuple of (content to update, list of checksums computed) | ||||
""" | """ | ||||
content_ids = self._read_content_ids(all_contents) | content_ids = self._read_content_ids(all_contents) | ||||
for contents in utils.grouper(content_ids, | for contents in utils.grouper(content_ids, | ||||
self.batch_size_retrieve_content): | self.batch_size_retrieve_content): | ||||
contents_iter = itertools.tee(contents, 2) | contents_iter = itertools.tee(contents, 2) | ||||
try: | try: | ||||
content_metadata = self.storage.content_get_metadata( | content_metadata = self.storage.content_get_metadata( | ||||
▲ Show 20 Lines • Show All 60 Lines • Show Last 20 Lines |