Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/rehash.py
Show First 20 Lines • Show All 104 Lines • ▼ Show 20 Lines | ) -> Generator[Tuple[Dict[str, Any], List[Any]], Any, None]: | ||||
Yields: | Yields: | ||||
tuple: tuple of (content to update, list of checksums computed) | tuple: tuple of (content to update, list of checksums computed) | ||||
""" | """ | ||||
content_ids = self._read_content_ids(all_contents) | content_ids = self._read_content_ids(all_contents) | ||||
for contents in utils.grouper(content_ids, self.batch_size_retrieve_content): | for contents in utils.grouper(content_ids, self.batch_size_retrieve_content): | ||||
contents_iter = itertools.tee(contents, 2) | contents_iter = itertools.tee(contents, 2) | ||||
try: | try: | ||||
content_metadata = self.storage.content_get_metadata( | content_metadata: Dict[ | ||||
bytes, List[Dict] | |||||
] = self.storage.content_get_metadata( # noqa | |||||
[s for s in contents_iter[0]] | [s for s in contents_iter[0]] | ||||
) | ) | ||||
except Exception: | except Exception: | ||||
self.log.exception("Problem when reading contents metadata.") | self.log.exception("Problem when reading contents metadata.") | ||||
continue | continue | ||||
for content in content_metadata: | for sha1, content_dicts in content_metadata.items(): | ||||
if not content_dicts: | |||||
continue | |||||
content: Dict = content_dicts[0] | |||||
# Recompute checksums provided in compute_checksums options | # Recompute checksums provided in compute_checksums options | ||||
if self.recompute_checksums: | if self.recompute_checksums: | ||||
checksums_to_compute = list(self.compute_checksums) | checksums_to_compute = list(self.compute_checksums) | ||||
else: | else: | ||||
# Compute checksums provided in compute_checksums | # Compute checksums provided in compute_checksums | ||||
# options not already defined for that content | # options not already defined for that content | ||||
checksums_to_compute = [ | checksums_to_compute = [ | ||||
h for h in self.compute_checksums if not content.get(h) | h for h in self.compute_checksums if not content.get(h) | ||||
] | ] | ||||
if not checksums_to_compute: # Nothing to recompute | if not checksums_to_compute: # Nothing to recompute | ||||
continue | continue | ||||
try: | try: | ||||
raw_content = self.objstorage.get(content["sha1"]) | raw_content = self.objstorage.get(sha1) | ||||
except ObjNotFoundError: | except ObjNotFoundError: | ||||
self.log.warning( | self.log.warning("Content %s not found in objstorage!", sha1) | ||||
"Content %s not found in objstorage!" % content["sha1"] | |||||
) | |||||
continue | continue | ||||
content_hashes = hashutil.MultiHash.from_data( | content_hashes = hashutil.MultiHash.from_data( | ||||
raw_content, hash_names=checksums_to_compute | raw_content, hash_names=checksums_to_compute | ||||
).digest() | ).digest() | ||||
content.update(content_hashes) | content.update(content_hashes) | ||||
yield content, checksums_to_compute | yield content, checksums_to_compute | ||||
▲ Show 20 Lines • Show All 42 Lines • Show Last 20 Lines |