diff --git a/swh/storage/filter.py b/swh/storage/filter.py --- a/swh/storage/filter.py +++ b/swh/storage/filter.py @@ -49,7 +49,8 @@ contents = list(content) contents_to_add = self._filter_missing_skipped_contents(contents) return self.storage.skipped_content_add( - x for x in contents if x['sha1_git'] in contents_to_add + x for x in contents + if x.get('sha1_git') is None or x['sha1_git'] in contents_to_add ) def directory_add(self, directories: Iterable[Dict]) -> Dict: @@ -106,7 +107,8 @@ objects_seen = self.objects_seen['skipped_content'] missing_hashes = [] for hashes in content_hashes: - if hashes['sha1_git'] in objects_seen: + if hashes.get('sha1_git') is None \ + or hashes['sha1_git'] in objects_seen: continue objects_seen.add(hashes['sha1_git']) missing_hashes.append(hashes) diff --git a/swh/storage/tests/test_filter.py b/swh/storage/tests/test_filter.py --- a/swh/storage/tests/test_filter.py +++ b/swh/storage/tests/test_filter.py @@ -59,6 +59,32 @@ } +def test_filtering_proxy_storage_skipped_content_missing_sha1_git(sample_data): + sample_content = sample_data['skipped_content'][0] + sample_content2 = sample_data['skipped_content'][1] + storage = FilteringProxyStorage(storage=storage_config) + + sample_content['sha1_git'] = sample_content2['sha1_git'] = None + content = next(storage.skipped_content_missing([sample_content])) + assert content['sha1'] == sample_content['sha1'] + + s = storage.skipped_content_add([sample_content]) + assert s == { + 'skipped_content:add': 1, + } + + content = list(storage.skipped_content_missing([sample_content])) + assert content == [] + + s = storage.skipped_content_add([sample_content2]) + assert s == { + 'skipped_content:add': 1, + } + + content = list(storage.skipped_content_missing([sample_content2])) + assert content == [] + + def test_filtering_proxy_storage_revision(sample_data): sample_revision = sample_data['revision'][0] storage = FilteringProxyStorage(storage=storage_config)