Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/tests/test_storage.py
Show All 30 Lines | |||||
from swh.model.model import ( | from swh.model.model import ( | ||||
Content, | Content, | ||||
Directory, | Directory, | ||||
Origin, | Origin, | ||||
OriginVisit, | OriginVisit, | ||||
OriginVisitStatus, | OriginVisitStatus, | ||||
Release, | Release, | ||||
Revision, | Revision, | ||||
SkippedContent, | |||||
Snapshot, | Snapshot, | ||||
MetadataTargetType, | MetadataTargetType, | ||||
) | ) | ||||
from swh.model.hypothesis_strategies import objects | from swh.model.hypothesis_strategies import objects | ||||
from swh.storage import get_storage | from swh.storage import get_storage | ||||
from swh.storage.converters import origin_url_to_sha1 as sha1 | from swh.storage.converters import origin_url_to_sha1 as sha1 | ||||
from swh.storage.exc import HashCollision, StorageArgumentException | from swh.storage.exc import HashCollision, StorageArgumentException | ||||
from swh.storage.interface import StorageInterface | from swh.storage.interface import StorageInterface | ||||
▲ Show 20 Lines • Show All 395 Lines • ▼ Show 20 Lines | def test_content_add_metadata_collision(self, swh_storage, sample_data_model): | ||||
content_hex_hashes(cont1.hashes()), | content_hex_hashes(cont1.hashes()), | ||||
content_hex_hashes(cont1b.hashes()), | content_hex_hashes(cont1b.hashes()), | ||||
] | ] | ||||
assert exc.colliding_content_hashes() == [ | assert exc.colliding_content_hashes() == [ | ||||
cont1.hashes(), | cont1.hashes(), | ||||
cont1b.hashes(), | cont1b.hashes(), | ||||
] | ] | ||||
def test_skipped_content_add(self, swh_storage): | def test_skipped_content_add(self, swh_storage, sample_data_model): | ||||
cont = data.skipped_cont | contents = sample_data_model["skipped_content"][:2] | ||||
cont2 = data.skipped_cont2 | cont = contents[0] | ||||
cont2["blake2s256"] = None | cont2 = attr.evolve(contents[1], blake2s256=None) | ||||
missing = list(swh_storage.skipped_content_missing([cont, cont2])) | contents_dict = [c.to_dict() for c in [cont, cont2]] | ||||
assert missing == [ | missing = list(swh_storage.skipped_content_missing(contents_dict)) | ||||
{ | |||||
"sha1": cont["sha1"], | assert missing == [cont.hashes(), cont2.hashes()] | ||||
"sha1_git": cont["sha1_git"], | |||||
"blake2s256": cont["blake2s256"], | |||||
"sha256": cont["sha256"], | |||||
}, | |||||
{ | |||||
"sha1": cont2["sha1"], | |||||
"sha1_git": cont2["sha1_git"], | |||||
"blake2s256": cont2["blake2s256"], | |||||
"sha256": cont2["sha256"], | |||||
}, | |||||
] | |||||
actual_result = swh_storage.skipped_content_add([cont, cont, cont2]) | actual_result = swh_storage.skipped_content_add([cont, cont, cont2]) | ||||
assert 2 <= actual_result.pop("skipped_content:add") <= 3 | assert 2 <= actual_result.pop("skipped_content:add") <= 3 | ||||
assert actual_result == {} | assert actual_result == {} | ||||
missing = list(swh_storage.skipped_content_missing([cont, cont2])) | missing = list(swh_storage.skipped_content_missing(contents_dict)) | ||||
assert missing == [] | assert missing == [] | ||||
def test_skipped_content_add_missing_hashes(self, swh_storage): | def test_skipped_content_add_missing_hashes(self, swh_storage, sample_data_model): | ||||
cont = data.skipped_cont | cont, cont2 = [ | ||||
cont2 = data.skipped_cont2 | attr.evolve(c, sha1_git=None) | ||||
cont["sha1_git"] = cont2["sha1_git"] = None | for c in sample_data_model["skipped_content"][:2] | ||||
] | |||||
missing = list(swh_storage.skipped_content_missing([cont, cont2])) | contents_dict = [c.to_dict() for c in [cont, cont2]] | ||||
missing = list(swh_storage.skipped_content_missing(contents_dict)) | |||||
assert len(missing) == 2 | assert len(missing) == 2 | ||||
actual_result = swh_storage.skipped_content_add([cont, cont, cont2]) | actual_result = swh_storage.skipped_content_add([cont, cont, cont2]) | ||||
assert 2 <= actual_result.pop("skipped_content:add") <= 3 | assert 2 <= actual_result.pop("skipped_content:add") <= 3 | ||||
assert actual_result == {} | assert actual_result == {} | ||||
missing = list(swh_storage.skipped_content_missing([cont, cont2])) | missing = list(swh_storage.skipped_content_missing(contents_dict)) | ||||
assert missing == [] | assert missing == [] | ||||
def test_skipped_content_missing_partial_hash(self, swh_storage): | def test_skipped_content_missing_partial_hash(self, swh_storage, sample_data_model): | ||||
cont = data.skipped_cont | cont = sample_data_model["skipped_content"][0] | ||||
cont2 = cont.copy() | cont2 = attr.evolve(cont, sha1_git=None) | ||||
cont2["sha1_git"] = None | contents_dict = [c.to_dict() for c in [cont, cont2]] | ||||
missing = list(swh_storage.skipped_content_missing([cont, cont2])) | |||||
missing = list(swh_storage.skipped_content_missing(contents_dict)) | |||||
assert len(missing) == 2 | assert len(missing) == 2 | ||||
actual_result = swh_storage.skipped_content_add([cont]) | actual_result = swh_storage.skipped_content_add([cont]) | ||||
assert actual_result.pop("skipped_content:add") == 1 | assert actual_result.pop("skipped_content:add") == 1 | ||||
assert actual_result == {} | assert actual_result == {} | ||||
missing = list(swh_storage.skipped_content_missing([cont, cont2])) | missing = list(swh_storage.skipped_content_missing(contents_dict)) | ||||
assert missing == [cont2.hashes()] | |||||
assert missing == [ | |||||
{ | |||||
"sha1": cont2["sha1"], | |||||
"sha1_git": cont2["sha1_git"], | |||||
"blake2s256": cont2["blake2s256"], | |||||
"sha256": cont2["sha256"], | |||||
} | |||||
] | |||||
@pytest.mark.property_based | @pytest.mark.property_based | ||||
@settings(deadline=None) # this test is very slow | @settings(deadline=None) # this test is very slow | ||||
@given( | @given( | ||||
strategies.sets( | strategies.sets( | ||||
elements=strategies.sampled_from(["sha256", "sha1_git", "blake2s256"]), | elements=strategies.sampled_from(["sha256", "sha1_git", "blake2s256"]), | ||||
min_size=0, | min_size=0, | ||||
) | ) | ||||
) | ) | ||||
def test_content_missing(self, swh_storage, algos): | def test_content_missing(self, swh_storage, algos): | ||||
algos |= {"sha1"} | algos |= {"sha1"} | ||||
cont2 = data.cont2 | cont = Content.from_dict(data.cont2) | ||||
missing_cont = data.missing_cont | missing_cont = SkippedContent.from_dict(data.missing_cont) | ||||
swh_storage.content_add([cont2]) | swh_storage.content_add([cont]) | ||||
test_contents = [cont2] | |||||
test_contents = [cont.to_dict()] | |||||
missing_per_hash = defaultdict(list) | missing_per_hash = defaultdict(list) | ||||
for i in range(256): | for i in range(256): | ||||
test_content = missing_cont.copy() | test_content = missing_cont.to_dict() | ||||
for hash in algos: | for hash in algos: | ||||
test_content[hash] = bytes([i]) + test_content[hash][1:] | test_content[hash] = bytes([i]) + test_content[hash][1:] | ||||
missing_per_hash[hash].append(test_content[hash]) | missing_per_hash[hash].append(test_content[hash]) | ||||
test_contents.append(test_content) | test_contents.append(test_content) | ||||
assert set(swh_storage.content_missing(test_contents)) == set( | assert set(swh_storage.content_missing(test_contents)) == set( | ||||
missing_per_hash["sha1"] | missing_per_hash["sha1"] | ||||
) | ) | ||||
for hash in algos: | for hash in algos: | ||||
assert set( | assert set( | ||||
swh_storage.content_missing(test_contents, key_hash=hash) | swh_storage.content_missing(test_contents, key_hash=hash) | ||||
) == set(missing_per_hash[hash]) | ) == set(missing_per_hash[hash]) | ||||
@pytest.mark.property_based | @pytest.mark.property_based | ||||
@given( | @given( | ||||
strategies.sets( | strategies.sets( | ||||
elements=strategies.sampled_from(["sha256", "sha1_git", "blake2s256"]), | elements=strategies.sampled_from(["sha256", "sha1_git", "blake2s256"]), | ||||
min_size=0, | min_size=0, | ||||
) | ) | ||||
) | ) | ||||
def test_content_missing_unknown_algo(self, swh_storage, algos): | def test_content_missing_unknown_algo(self, swh_storage, algos): | ||||
algos |= {"sha1"} | algos |= {"sha1"} | ||||
cont2 = data.cont2 | cont = Content.from_dict(data.cont2) | ||||
missing_cont = data.missing_cont | missing_cont = SkippedContent.from_dict(data.missing_cont) | ||||
swh_storage.content_add([cont2]) | swh_storage.content_add([cont]) | ||||
test_contents = [cont2] | |||||
test_contents = [cont.to_dict()] | |||||
missing_per_hash = defaultdict(list) | missing_per_hash = defaultdict(list) | ||||
for i in range(16): | for i in range(16): | ||||
test_content = missing_cont.copy() | test_content = missing_cont.to_dict() | ||||
for hash in algos: | for hash in algos: | ||||
test_content[hash] = bytes([i]) + test_content[hash][1:] | test_content[hash] = bytes([i]) + test_content[hash][1:] | ||||
missing_per_hash[hash].append(test_content[hash]) | missing_per_hash[hash].append(test_content[hash]) | ||||
test_content["nonexisting_algo"] = b"\x00" | test_content["nonexisting_algo"] = b"\x00" | ||||
test_contents.append(test_content) | test_contents.append(test_content) | ||||
assert set(swh_storage.content_missing(test_contents)) == set( | assert set(swh_storage.content_missing(test_contents)) == set( | ||||
missing_per_hash["sha1"] | missing_per_hash["sha1"] | ||||
) | ) | ||||
for hash in algos: | for hash in algos: | ||||
assert set( | assert set( | ||||
swh_storage.content_missing(test_contents, key_hash=hash) | swh_storage.content_missing(test_contents, key_hash=hash) | ||||
) == set(missing_per_hash[hash]) | ) == set(missing_per_hash[hash]) | ||||
def test_content_missing_per_sha1(self, swh_storage): | def test_content_missing_per_sha1(self, swh_storage, sample_data_model): | ||||
# given | # given | ||||
cont2 = data.cont2 | cont = sample_data_model["content"][0] | ||||
missing_cont = data.missing_cont | missing_cont = sample_data_model["skipped_content"][0] | ||||
swh_storage.content_add([cont2]) | swh_storage.content_add([cont]) | ||||
# when | # when | ||||
gen = swh_storage.content_missing_per_sha1( | gen = swh_storage.content_missing_per_sha1([cont.sha1, missing_cont.sha1]) | ||||
[cont2["sha1"], missing_cont["sha1"]] | |||||
) | |||||
# then | # then | ||||
assert list(gen) == [missing_cont["sha1"]] | assert list(gen) == [missing_cont.sha1] | ||||
def test_content_missing_per_sha1_git(self, swh_storage): | def test_content_missing_per_sha1_git(self, swh_storage, sample_data_model): | ||||
cont = data.cont | cont, cont2 = sample_data_model["content"][:2] | ||||
cont2 = data.cont2 | missing_cont = sample_data_model["skipped_content"][0] | ||||
missing_cont = data.missing_cont | |||||
swh_storage.content_add([cont, cont2]) | swh_storage.content_add([cont, cont2]) | ||||
contents = [cont["sha1_git"], cont2["sha1_git"], missing_cont["sha1_git"]] | contents = [cont.sha1_git, cont2.sha1_git, missing_cont.sha1_git] | ||||
missing_contents = swh_storage.content_missing_per_sha1_git(contents) | missing_contents = swh_storage.content_missing_per_sha1_git(contents) | ||||
assert list(missing_contents) == [missing_cont["sha1_git"]] | assert list(missing_contents) == [missing_cont.sha1_git] | ||||
def test_content_get_partition(self, swh_storage, swh_contents): | def test_content_get_partition(self, swh_storage, swh_contents): | ||||
"""content_get_partition paginates results if limit exceeded""" | """content_get_partition paginates results if limit exceeded""" | ||||
expected_contents = [c for c in swh_contents if c["status"] != "absent"] | expected_contents = [c for c in swh_contents if c["status"] != "absent"] | ||||
actual_contents = [] | actual_contents = [] | ||||
for i in range(16): | for i in range(16): | ||||
actual_result = swh_storage.content_get_partition(i, 16) | actual_result = swh_storage.content_get_partition(i, 16) | ||||
▲ Show 20 Lines • Show All 3,659 Lines • Show Last 20 Lines |