Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/filter.py
Show First 20 Lines • Show All 86 Lines • ▼ Show 20 Lines | def _filter_missing_contents( | ||||
""" | """ | ||||
objects_seen = self.objects_seen['content'] | objects_seen = self.objects_seen['content'] | ||||
missing_contents = [] | missing_contents = [] | ||||
for content in contents: | for content in contents: | ||||
if content.sha256 in objects_seen: | if content.sha256 in objects_seen: | ||||
continue | continue | ||||
objects_seen.add(content.sha256) | objects_seen.add(content.sha256) | ||||
missing_contents.append(content.to_dict()) | missing_contents.append(content.hashes()) | ||||
return set(self.storage.content_missing( | return set(self.storage.content_missing( | ||||
missing_contents, | missing_contents, | ||||
key_hash='sha256', | key_hash='sha256', | ||||
)) | )) | ||||
def _filter_missing_skipped_contents( | def _filter_missing_skipped_contents( | ||||
self, contents: Iterable[SkippedContent]) -> Set[bytes]: | self, contents: Iterable[SkippedContent]) -> Set[bytes]: | ||||
"""Return only the content keys missing from swh | """Return only the content keys missing from swh | ||||
Args: | Args: | ||||
content_hashes: List of sha1_git to check for existence in swh | content_hashes: List of sha1_git to check for existence in swh | ||||
storage | storage | ||||
""" | """ | ||||
objects_seen = self.objects_seen['skipped_content'] | objects_seen = self.objects_seen['skipped_content'] | ||||
missing_contents = [] | missing_contents = [] | ||||
for content in contents: | for content in contents: | ||||
if content.sha1_git is None or content.sha1_git in objects_seen: | if content.sha1_git is None or content.sha1_git in objects_seen: | ||||
continue | continue | ||||
objects_seen.add(content.sha1_git) | objects_seen.add(content.sha1_git) | ||||
missing_contents.append(content.to_dict()) | missing_contents.append(content.hashes()) | ||||
return { | return { | ||||
c.get('sha1_git') | c.get('sha1_git') | ||||
for c in self.storage.skipped_content_missing(missing_contents)} | for c in self.storage.skipped_content_missing(missing_contents)} | ||||
def _filter_missing_ids( | def _filter_missing_ids( | ||||
self, | self, | ||||
object_type: str, | object_type: str, | ||||
Show All 26 Lines |