diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py --- a/swh/storage/in_memory.py +++ b/swh/storage/in_memory.py @@ -134,12 +134,11 @@ """List content missing from storage Args: - contents ([dict]): iterable of dictionaries containing one - key for each checksum algorithm in - :data:`swh.model.hashutil.ALGORITHMS`, - mapped to the corresponding checksum, - and a length key mapped to the content - length. + contents ([dict]): iterable of dictionaries whose keys are + either 'length' or an item of + :data:`swh.model.hashutil.ALGORITHMS`; + mapped to the corresponding checksum + (or length). key_hash (str): name of the column to use as hash id result (default: 'sha1') @@ -149,8 +148,17 @@ key_hash column) """ for content in contents: - if self._content_key(content) not in self._contents: - yield content[key_hash] + for (algo, hash_) in content.items(): + if algo not in DEFAULT_ALGORITHMS: + continue + if hash_ not in self._content_indexes.get(algo, []): + yield content[key_hash] + break + else: + # content_find cannot return None here, because we checked + # above that there is a content with matching hashes. + if self.content_find(content)['status'] == 'missing': + yield content[key_hash] def content_missing_per_sha1(self, contents): """List content missing from storage based only on sha1. diff --git a/swh/storage/storage.py b/swh/storage/storage.py --- a/swh/storage/storage.py +++ b/swh/storage/storage.py @@ -305,12 +305,11 @@ """List content missing from storage Args: - content ([dict]): iterable of dictionaries containing one - key for each checksum algorithm in - :data:`swh.model.hashutil.ALGORITHMS`, - mapped to the corresponding checksum, - and a length key mapped to the content - length. + content ([dict]): iterable of dictionaries whose keys are + either 'length' or an item of + :data:`swh.model.hashutil.ALGORITHMS`; + mapped to the corresponding checksum + (or length). key_hash (str): name of the column to use as hash id result (default: 'sha1') diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py --- a/swh/storage/tests/test_storage.py +++ b/swh/storage/tests/test_storage.py @@ -11,7 +11,7 @@ import pytest -from hypothesis import given +from hypothesis import given, strategies from swh.model import from_disk, identifiers from swh.model.hashutil import hash_to_bytes @@ -609,7 +609,13 @@ 'Content too long') ) - def test_content_missing(self): + @pytest.mark.property_based + @given(strategies.sets( + elements=strategies.sampled_from( + ['sha256', 'sha1_git', 'blake2s256']), + min_size=0)) + def test_content_missing(self, algos): + algos |= {'sha1'} cont2 = self.cont2 missing_cont = self.missing_cont self.storage.content_add([cont2]) @@ -617,7 +623,7 @@ missing_per_hash = defaultdict(list) for i in range(256): test_content = missing_cont.copy() - for hash in ['sha1', 'sha256', 'sha1_git', 'blake2s256']: + for hash in algos: test_content[hash] = bytes([i]) + test_content[hash][1:] missing_per_hash[hash].append(test_content[hash]) test_contents.append(test_content) @@ -627,7 +633,7 @@ missing_per_hash['sha1'] ) - for hash in ['sha1', 'sha256', 'sha1_git', 'blake2s256']: + for hash in algos: self.assertCountEqual( self.storage.content_missing(test_contents, key_hash=hash), missing_per_hash[hash]