Page MenuHomeSoftware Heritage

D680.diff
No OneTemporary

D680.diff

diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py
--- a/swh/storage/in_memory.py
+++ b/swh/storage/in_memory.py
@@ -134,12 +134,11 @@
"""List content missing from storage
Args:
- contents ([dict]): iterable of dictionaries containing one
- key for each checksum algorithm in
- :data:`swh.model.hashutil.ALGORITHMS`,
- mapped to the corresponding checksum,
- and a length key mapped to the content
- length.
+ contents ([dict]): iterable of dictionaries whose keys are
+ either 'length' or an item of
+ :data:`swh.model.hashutil.ALGORITHMS`;
+ mapped to the corresponding checksum
+ (or length).
key_hash (str): name of the column to use as hash id
result (default: 'sha1')
@@ -149,8 +148,17 @@
key_hash column)
"""
for content in contents:
- if self._content_key(content) not in self._contents:
- yield content[key_hash]
+ for (algo, hash_) in content.items():
+ if algo not in DEFAULT_ALGORITHMS:
+ continue
+ if hash_ not in self._content_indexes.get(algo, []):
+ yield content[key_hash]
+ break
+ else:
+ # content_find cannot return None here, because we checked
+ # above that there is a content with matching hashes.
+ if self.content_find(content)['status'] == 'missing':
+ yield content[key_hash]
def content_missing_per_sha1(self, contents):
"""List content missing from storage based only on sha1.
diff --git a/swh/storage/storage.py b/swh/storage/storage.py
--- a/swh/storage/storage.py
+++ b/swh/storage/storage.py
@@ -305,12 +305,11 @@
"""List content missing from storage
Args:
- content ([dict]): iterable of dictionaries containing one
- key for each checksum algorithm in
- :data:`swh.model.hashutil.ALGORITHMS`,
- mapped to the corresponding checksum,
- and a length key mapped to the content
- length.
+ content ([dict]): iterable of dictionaries whose keys are
+ either 'length' or an item of
+ :data:`swh.model.hashutil.ALGORITHMS`;
+ mapped to the corresponding checksum
+ (or length).
key_hash (str): name of the column to use as hash id
result (default: 'sha1')
diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py
--- a/swh/storage/tests/test_storage.py
+++ b/swh/storage/tests/test_storage.py
@@ -11,7 +11,7 @@
import pytest
-from hypothesis import given
+from hypothesis import given, strategies
from swh.model import from_disk, identifiers
from swh.model.hashutil import hash_to_bytes
@@ -609,7 +609,13 @@
'Content too long')
)
- def test_content_missing(self):
+ @pytest.mark.property_based
+ @given(strategies.sets(
+ elements=strategies.sampled_from(
+ ['sha256', 'sha1_git', 'blake2s256']),
+ min_size=0))
+ def test_content_missing(self, algos):
+ algos |= {'sha1'}
cont2 = self.cont2
missing_cont = self.missing_cont
self.storage.content_add([cont2])
@@ -617,7 +623,7 @@
missing_per_hash = defaultdict(list)
for i in range(256):
test_content = missing_cont.copy()
- for hash in ['sha1', 'sha256', 'sha1_git', 'blake2s256']:
+ for hash in algos:
test_content[hash] = bytes([i]) + test_content[hash][1:]
missing_per_hash[hash].append(test_content[hash])
test_contents.append(test_content)
@@ -627,7 +633,7 @@
missing_per_hash['sha1']
)
- for hash in ['sha1', 'sha256', 'sha1_git', 'blake2s256']:
+ for hash in algos:
self.assertCountEqual(
self.storage.content_missing(test_contents, key_hash=hash),
missing_per_hash[hash]

File Metadata

Mime Type
text/plain
Expires
Jul 27 2024, 12:50 PM (11 w, 5 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3224193

Event Timeline