Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/tests/test_storage.py
Show First 20 Lines • Show All 3,043 Lines • ▼ Show 20 Lines | |||||
def test_content_find_ctime(self, swh_storage, sample_data): | def test_content_find_ctime(self, swh_storage, sample_data): | ||||
origin_content = sample_data.content | origin_content = sample_data.content | ||||
ctime = round_to_milliseconds(now()) | ctime = round_to_milliseconds(now()) | ||||
content = attr.evolve(origin_content, data=None, ctime=ctime) | content = attr.evolve(origin_content, data=None, ctime=ctime) | ||||
swh_storage.content_add_metadata([content]) | swh_storage.content_add_metadata([content]) | ||||
actually_present = swh_storage.content_find({"sha1": content.sha1}) | actually_present = swh_storage.content_find({"sha1": content.sha1}) | ||||
assert actually_present[0] == content.to_dict() | assert actually_present[0] == content | ||||
def test_content_find_with_present_content(self, swh_storage, sample_data): | def test_content_find_with_present_content(self, swh_storage, sample_data): | ||||
content = sample_data.content | content = sample_data.content | ||||
expected_content = content.to_dict() | expected_content = attr.evolve(content, data=None) | ||||
del expected_content["data"] | |||||
del expected_content["ctime"] | |||||
# 1. with something to find | # 1. with something to find | ||||
swh_storage.content_add([content]) | swh_storage.content_add([content]) | ||||
actually_present = swh_storage.content_find({"sha1": content.sha1}) | actually_present = swh_storage.content_find({"sha1": content.sha1}) | ||||
assert 1 == len(actually_present) | assert 1 == len(actually_present) | ||||
actually_present[0].pop("ctime") | |||||
assert actually_present[0] == expected_content | assert actually_present[0] == expected_content | ||||
# 2. with something to find | # 2. with something to find | ||||
actually_present = swh_storage.content_find({"sha1_git": content.sha1_git}) | actually_present = swh_storage.content_find({"sha1_git": content.sha1_git}) | ||||
assert 1 == len(actually_present) | assert 1 == len(actually_present) | ||||
actually_present[0].pop("ctime") | |||||
assert actually_present[0] == expected_content | assert actually_present[0] == expected_content | ||||
# 3. with something to find | # 3. with something to find | ||||
actually_present = swh_storage.content_find({"sha256": content.sha256}) | actually_present = swh_storage.content_find({"sha256": content.sha256}) | ||||
assert 1 == len(actually_present) | assert 1 == len(actually_present) | ||||
actually_present[0].pop("ctime") | |||||
assert actually_present[0] == expected_content | assert actually_present[0] == expected_content | ||||
# 4. with something to find | # 4. with something to find | ||||
actually_present = swh_storage.content_find(content.hashes()) | actually_present = swh_storage.content_find(content.hashes()) | ||||
assert 1 == len(actually_present) | assert 1 == len(actually_present) | ||||
actually_present[0].pop("ctime") | |||||
assert actually_present[0] == expected_content | assert actually_present[0] == expected_content | ||||
def test_content_find_with_non_present_content(self, swh_storage, sample_data): | def test_content_find_with_non_present_content(self, swh_storage, sample_data): | ||||
missing_content = sample_data.skipped_content | missing_content = sample_data.skipped_content | ||||
# 1. with something that does not exist | # 1. with something that does not exist | ||||
actually_present = swh_storage.content_find({"sha1": missing_content.sha1}) | actually_present = swh_storage.content_find({"sha1": missing_content.sha1}) | ||||
assert actually_present == [] | assert actually_present == [] | ||||
# 2. with something that does not exist | # 2. with something that does not exist | ||||
actually_present = swh_storage.content_find( | actually_present = swh_storage.content_find( | ||||
{"sha1_git": missing_content.sha1_git} | {"sha1_git": missing_content.sha1_git} | ||||
) | ) | ||||
assert actually_present == [] | assert actually_present == [] | ||||
Show All 11 Lines | def test_content_find_with_duplicate_input(self, swh_storage, sample_data): | ||||
sha1git_array[0] += 1 | sha1git_array[0] += 1 | ||||
duplicated_content = attr.evolve( | duplicated_content = attr.evolve( | ||||
content, sha1=bytes(sha1_array), sha1_git=bytes(sha1git_array) | content, sha1=bytes(sha1_array), sha1_git=bytes(sha1git_array) | ||||
) | ) | ||||
# Inject the data | # Inject the data | ||||
swh_storage.content_add([content, duplicated_content]) | swh_storage.content_add([content, duplicated_content]) | ||||
actual_result = list( | actual_result = swh_storage.content_find( | ||||
swh_storage.content_find( | |||||
{ | { | ||||
"blake2s256": duplicated_content.blake2s256, | "blake2s256": duplicated_content.blake2s256, | ||||
"sha256": duplicated_content.sha256, | "sha256": duplicated_content.sha256, | ||||
} | } | ||||
) | ) | ||||
) | |||||
expected_content = content.to_dict() | expected_content = attr.evolve(content, data=None) | ||||
expected_duplicated_content = duplicated_content.to_dict() | expected_duplicated_content = attr.evolve(duplicated_content, data=None) | ||||
for key in ["data", "ctime"]: # so we can compare | for result in actual_result: | ||||
for dict_ in [ | assert result in [expected_content, expected_duplicated_content] | ||||
expected_content, | |||||
expected_duplicated_content, | |||||
actual_result[0], | |||||
actual_result[1], | |||||
]: | |||||
dict_.pop(key, None) | |||||
expected_result = [expected_content, expected_duplicated_content] | |||||
for result in expected_result: | |||||
assert result in actual_result | |||||
def test_content_find_with_duplicate_sha256(self, swh_storage, sample_data): | def test_content_find_with_duplicate_sha256(self, swh_storage, sample_data): | ||||
content = sample_data.content | content = sample_data.content | ||||
hashes = {} | hashes = {} | ||||
# Create fake data with colliding sha256 | # Create fake data with colliding sha256 | ||||
for hashalgo in ("sha1", "sha1_git", "blake2s256"): | for hashalgo in ("sha1", "sha1_git", "blake2s256"): | ||||
value = bytearray(getattr(content, hashalgo)) | value = bytearray(getattr(content, hashalgo)) | ||||
value[0] += 1 | value[0] += 1 | ||||
hashes[hashalgo] = bytes(value) | hashes[hashalgo] = bytes(value) | ||||
duplicated_content = attr.evolve( | duplicated_content = attr.evolve( | ||||
content, | content, | ||||
sha1=hashes["sha1"], | sha1=hashes["sha1"], | ||||
sha1_git=hashes["sha1_git"], | sha1_git=hashes["sha1_git"], | ||||
blake2s256=hashes["blake2s256"], | blake2s256=hashes["blake2s256"], | ||||
) | ) | ||||
swh_storage.content_add([content, duplicated_content]) | swh_storage.content_add([content, duplicated_content]) | ||||
actual_result = list( | actual_result = swh_storage.content_find({"sha256": duplicated_content.sha256}) | ||||
swh_storage.content_find({"sha256": duplicated_content.sha256}) | |||||
) | |||||
assert len(actual_result) == 2 | assert len(actual_result) == 2 | ||||
expected_content = content.to_dict() | expected_content = attr.evolve(content, data=None) | ||||
expected_duplicated_content = duplicated_content.to_dict() | expected_duplicated_content = attr.evolve(duplicated_content, data=None) | ||||
for key in ["data", "ctime"]: # so we can compare | for result in actual_result: | ||||
for dict_ in [ | assert result in [expected_content, expected_duplicated_content] | ||||
expected_content, | |||||
expected_duplicated_content, | |||||
actual_result[0], | |||||
actual_result[1], | |||||
]: | |||||
dict_.pop(key, None) | |||||
assert sorted(actual_result, key=lambda x: x["sha1"]) == [ | |||||
expected_content, | |||||
expected_duplicated_content, | |||||
] | |||||
# Find with both sha256 and blake2s256 | # Find with both sha256 and blake2s256 | ||||
actual_result = list( | actual_result = swh_storage.content_find( | ||||
swh_storage.content_find( | |||||
{ | { | ||||
"sha256": duplicated_content.sha256, | "sha256": duplicated_content.sha256, | ||||
"blake2s256": duplicated_content.blake2s256, | "blake2s256": duplicated_content.blake2s256, | ||||
} | } | ||||
) | ) | ||||
) | |||||
assert len(actual_result) == 1 | assert len(actual_result) == 1 | ||||
actual_result[0].pop("ctime") | |||||
assert actual_result == [expected_duplicated_content] | assert actual_result == [expected_duplicated_content] | ||||
def test_content_find_with_duplicate_blake2s256(self, swh_storage, sample_data): | def test_content_find_with_duplicate_blake2s256(self, swh_storage, sample_data): | ||||
content = sample_data.content | content = sample_data.content | ||||
# Create fake data with colliding sha256 and blake2s256 | # Create fake data with colliding sha256 and blake2s256 | ||||
sha1_array = bytearray(content.sha1) | sha1_array = bytearray(content.sha1) | ||||
sha1_array[0] += 1 | sha1_array[0] += 1 | ||||
sha1git_array = bytearray(content.sha1_git) | sha1git_array = bytearray(content.sha1_git) | ||||
sha1git_array[0] += 1 | sha1git_array[0] += 1 | ||||
sha256_array = bytearray(content.sha256) | sha256_array = bytearray(content.sha256) | ||||
sha256_array[0] += 1 | sha256_array[0] += 1 | ||||
duplicated_content = attr.evolve( | duplicated_content = attr.evolve( | ||||
content, | content, | ||||
sha1=bytes(sha1_array), | sha1=bytes(sha1_array), | ||||
sha1_git=bytes(sha1git_array), | sha1_git=bytes(sha1git_array), | ||||
sha256=bytes(sha256_array), | sha256=bytes(sha256_array), | ||||
) | ) | ||||
swh_storage.content_add([content, duplicated_content]) | swh_storage.content_add([content, duplicated_content]) | ||||
actual_result = list( | actual_result = swh_storage.content_find( | ||||
swh_storage.content_find({"blake2s256": duplicated_content.blake2s256}) | {"blake2s256": duplicated_content.blake2s256} | ||||
) | ) | ||||
expected_content = content.to_dict() | expected_content = attr.evolve(content, data=None) | ||||
expected_duplicated_content = duplicated_content.to_dict() | expected_duplicated_content = attr.evolve(duplicated_content, data=None) | ||||
for key in ["data", "ctime"]: # so we can compare | for result in actual_result: | ||||
for dict_ in [ | assert result in [expected_content, expected_duplicated_content] | ||||
expected_content, | |||||
expected_duplicated_content, | |||||
actual_result[0], | |||||
actual_result[1], | |||||
]: | |||||
dict_.pop(key, None) | |||||
expected_result = [expected_content, expected_duplicated_content] | |||||
for result in expected_result: | |||||
assert result in actual_result | |||||
# Find with both sha256 and blake2s256 | # Find with both sha256 and blake2s256 | ||||
actual_result = list( | actual_result = swh_storage.content_find( | ||||
swh_storage.content_find( | |||||
{ | { | ||||
"sha256": duplicated_content.sha256, | "sha256": duplicated_content.sha256, | ||||
"blake2s256": duplicated_content.blake2s256, | "blake2s256": duplicated_content.blake2s256, | ||||
} | } | ||||
) | ) | ||||
) | |||||
actual_result[0].pop("ctime") | |||||
assert actual_result == [expected_duplicated_content] | assert actual_result == [expected_duplicated_content] | ||||
def test_content_find_bad_input(self, swh_storage): | def test_content_find_bad_input(self, swh_storage): | ||||
# 1. with bad input | # 1. with no hash to lookup | ||||
with pytest.raises(StorageArgumentException): | with pytest.raises(StorageArgumentException): | ||||
swh_storage.content_find({}) # empty is bad | swh_storage.content_find({}) # need at least one hash | ||||
# 2. with bad input | # 2. with bad hash | ||||
with pytest.raises(StorageArgumentException): | with pytest.raises(StorageArgumentException): | ||||
swh_storage.content_find({"unknown-sha1": "something"}) # not the right key | swh_storage.content_find({"unknown-sha1": "something"}) # not the right key | ||||
def test_object_find_by_sha1_git(self, swh_storage, sample_data): | def test_object_find_by_sha1_git(self, swh_storage, sample_data): | ||||
content = sample_data.content | content = sample_data.content | ||||
directory = sample_data.directory | directory = sample_data.directory | ||||
revision = sample_data.revision | revision = sample_data.revision | ||||
release = sample_data.release | release = sample_data.release | ||||
▲ Show 20 Lines • Show All 1,057 Lines • Show Last 20 Lines |