Changeset View
Standalone View
swh/storage/tests/storage_tests.py
Show All 25 Lines | from swh.model.model import ( | ||||
Directory, | Directory, | ||||
ExtID, | ExtID, | ||||
Origin, | Origin, | ||||
OriginVisit, | OriginVisit, | ||||
OriginVisitStatus, | OriginVisitStatus, | ||||
Person, | Person, | ||||
RawExtrinsicMetadata, | RawExtrinsicMetadata, | ||||
Revision, | Revision, | ||||
RevisionType, | |||||
SkippedContent, | SkippedContent, | ||||
Snapshot, | Snapshot, | ||||
SnapshotBranch, | SnapshotBranch, | ||||
TargetType, | TargetType, | ||||
Timestamp, | Timestamp, | ||||
TimestampWithTimezone, | TimestampWithTimezone, | ||||
) | ) | ||||
from swh.model.swhids import CoreSWHID, ObjectType | from swh.model.swhids import CoreSWHID, ObjectType | ||||
▲ Show 20 Lines • Show All 674 Lines • ▼ Show 20 Lines | def test_directory_add(self, swh_storage, sample_data): | ||||
assert after_missing == [] | assert after_missing == [] | ||||
if isinstance(swh_storage, InMemoryStorage) or not isinstance( | if isinstance(swh_storage, InMemoryStorage) or not isinstance( | ||||
swh_storage, CassandraStorage | swh_storage, CassandraStorage | ||||
): | ): | ||||
swh_storage.refresh_stat_counters() | swh_storage.refresh_stat_counters() | ||||
assert swh_storage.stat_counters()["directory"] == 1 | assert swh_storage.stat_counters()["directory"] == 1 | ||||
def test_directory_add_with_raw_manifest(self, swh_storage, sample_data): | |||||
content = sample_data.content | |||||
directory = sample_data.directory | |||||
directory = attr.evolve(directory, raw_manifest=b"foo") | |||||
directory = attr.evolve(directory, id=directory.compute_hash()) | |||||
assert directory.entries[0].target == content.sha1_git | |||||
swh_storage.content_add([content]) | |||||
init_missing = list(swh_storage.directory_missing([directory.id])) | |||||
assert [directory.id] == init_missing | |||||
actual_result = swh_storage.directory_add([directory]) | |||||
assert actual_result == {"directory:add": 1} | |||||
assert ("directory", directory) in list( | |||||
swh_storage.journal_writer.journal.objects | |||||
) | |||||
actual_data = list(swh_storage.directory_ls(directory.id)) | |||||
expected_data = list(transform_entries(swh_storage, directory)) | |||||
for data in actual_data: | |||||
assert data in expected_data | |||||
after_missing = list(swh_storage.directory_missing([directory.id])) | |||||
assert after_missing == [] | |||||
# TODO: check the recorded manifest | |||||
@settings( | @settings( | ||||
suppress_health_check=[HealthCheck.too_slow, HealthCheck.data_too_large] | suppress_health_check=[HealthCheck.too_slow, HealthCheck.data_too_large] | ||||
+ function_scoped_fixture_check, | + function_scoped_fixture_check, | ||||
) | ) | ||||
@given( | @given( | ||||
strategies.lists(hypothesis_strategies.directories(), min_size=1, max_size=10) | strategies.lists(hypothesis_strategies.directories(), min_size=1, max_size=10) | ||||
) | ) | ||||
def test_directory_add_get_arbitrary(self, swh_storage, directories): | def test_directory_add_get_arbitrary(self, swh_storage, directories): | ||||
swh_storage.directory_add(directories) | swh_storage.directory_add(directories) | ||||
for directory in directories: | for directory in directories: | ||||
if directory.raw_manifest is None: | |||||
assert swh_storage.directory_get_entries(directory.id) == PagedResult( | assert swh_storage.directory_get_entries(directory.id) == PagedResult( | ||||
results=list(directory.entries), next_page_token=None, | results=list(directory.entries), next_page_token=None, | ||||
) | ) | ||||
else: | |||||
# TODO: compare the manifests are the same (currently, we can't | |||||
# because there is no way to get the raw_manifest of a directory) | |||||
# we can't compare the other fields, because they become non-intrinsic, | |||||
# so they may clash between hypothesis runs | |||||
olasd: Do you have a (`swh.storage`) API extension in mind to retrieve this field? Something like… | |||||
Done Inline ActionsI don't like it very much because there is no way to paginate :/ But yes, that's the best option I can see vlorentz: I don't like it very much because there is no way to paginate :/
But yes, that's the best… | |||||
pass | |||||
def test_directory_add_twice(self, swh_storage, sample_data): | def test_directory_add_twice(self, swh_storage, sample_data): | ||||
directory = sample_data.directories[1] | directory = sample_data.directories[1] | ||||
actual_result = swh_storage.directory_add([directory]) | actual_result = swh_storage.directory_add([directory]) | ||||
assert actual_result == {"directory:add": 1} | assert actual_result == {"directory:add": 1} | ||||
assert list(swh_storage.journal_writer.journal.objects) == [ | assert list(swh_storage.journal_writer.journal.objects) == [ | ||||
▲ Show 20 Lines • Show All 318 Lines • ▼ Show 20 Lines | def test_revision_add_fractional_timezone(self, swh_storage, sample_data): | ||||
assert list(end_missing) == [] | assert list(end_missing) == [] | ||||
assert list(swh_storage.journal_writer.journal.objects) == [ | assert list(swh_storage.journal_writer.journal.objects) == [ | ||||
("revision", revision) | ("revision", revision) | ||||
] | ] | ||||
assert swh_storage.revision_get([revision.id])[0] == revision | assert swh_storage.revision_get([revision.id])[0] == revision | ||||
def test_revision_add_with_raw_manifest(self, swh_storage, sample_data): | |||||
revision = sample_data.revision | |||||
revision = attr.evolve(revision, raw_manifest=b"foo") | |||||
revision = attr.evolve(revision, id=revision.compute_hash()) | |||||
init_missing = swh_storage.revision_missing([revision.id]) | |||||
assert list(init_missing) == [revision.id] | |||||
actual_result = swh_storage.revision_add([revision]) | |||||
assert actual_result == {"revision:add": 1} | |||||
end_missing = swh_storage.revision_missing([revision.id]) | |||||
assert list(end_missing) == [] | |||||
assert list(swh_storage.journal_writer.journal.objects) == [ | |||||
("revision", revision) | |||||
] | |||||
assert swh_storage.revision_get([revision.id]) == [revision] | |||||
@settings( | @settings( | ||||
suppress_health_check=[HealthCheck.too_slow, HealthCheck.data_too_large] | suppress_health_check=[HealthCheck.too_slow, HealthCheck.data_too_large] | ||||
+ function_scoped_fixture_check, | + function_scoped_fixture_check, | ||||
) | ) | ||||
@given( | @given( | ||||
strategies.lists(hypothesis_strategies.revisions(), min_size=1, max_size=10,) | strategies.lists(hypothesis_strategies.revisions(), min_size=1, max_size=10,) | ||||
) | ) | ||||
def test_revision_add_get_arbitrary(self, swh_storage, revisions): | def test_revision_add_get_arbitrary(self, swh_storage, revisions): | ||||
# remove non-intrinsic data, so releases inserted with different hypothesis | # remove non-intrinsic data, so releases inserted with different hypothesis | ||||
# data can't clash with each other | # data can't clash with each other | ||||
revisions = [ | revisions = [ | ||||
attr.evolve( | attr.evolve( | ||||
revision, | revision, | ||||
synthetic=False, | synthetic=False, | ||||
metadata=None, | metadata=None, | ||||
committer=attr.evolve(revision.committer, name=None, email=None), | committer=attr.evolve(revision.committer, name=None, email=None), | ||||
author=attr.evolve(revision.author, name=None, email=None), | author=attr.evolve(revision.author, name=None, email=None), | ||||
type=RevisionType.GIT, | |||||
) | ) | ||||
for revision in revisions | for revision in revisions | ||||
] | ] | ||||
swh_storage.revision_add(revisions) | swh_storage.revision_add(revisions) | ||||
revs = swh_storage.revision_get([revision.id for revision in revisions]) | for revision in revisions: | ||||
assert set(revs) == set(revisions) | (rev,) = swh_storage.revision_get([revision.id]) | ||||
if rev.raw_manifest is None: | |||||
assert rev == revision | |||||
else: | |||||
assert rev.raw_manifest == revision.raw_manifest | |||||
# we can't compare the other fields, because they become non-intrinsic, | |||||
Not Done Inline ActionsShouldn't we expect all the fields be passed back and forth unchanged by swh.storage, even if the raw_manifest is not None (i.e. check that the equality holds in all cases)? olasd: Shouldn't we expect all the fields be passed back and forth unchanged by `swh.storage`, even if… | |||||
Done Inline ActionsWe should, but we can't until https://github.com/pytest-dev/pytest/issues/916 is solved. Imagine the following scenario:
Both have exactly the same id (because it was computed from the manifest rather than the fields), and the storage isn't reset between calls to test_revision_add_get_arbitrary by Hypothesis. So you don't know which revision is in the storage in the end, because it's left undefined by design (it's the one with msg1 when using a pg storage, and the one with msg2 with cass/in-mem) I could add a new test that doesn't rely on Hypothesis, though... vlorentz: We should, but we can't until https://github.com/pytest-dev/pytest/issues/916 is solved. | |||||
Not Done Inline ActionsUgh, yeah, of course. I really think having such an explicit test would be useful though (assuming we always insert consistent entries, which should be true...) olasd: Ugh, yeah, of course.
I really think having such an explicit test would be useful though… | |||||
# so they may clash between hypothesis runs | |||||
def test_revision_add_name_clash(self, swh_storage, sample_data): | def test_revision_add_name_clash(self, swh_storage, sample_data): | ||||
revision, revision2 = sample_data.revisions[:2] | revision, revision2 = sample_data.revisions[:2] | ||||
revision1 = attr.evolve( | revision1 = attr.evolve( | ||||
revision, | revision, | ||||
author=Person( | author=Person( | ||||
fullname=b"John Doe <john.doe@example.com>", | fullname=b"John Doe <john.doe@example.com>", | ||||
▲ Show 20 Lines • Show All 412 Lines • ▼ Show 20 Lines | def test_release_add(self, swh_storage, sample_data): | ||||
assert actual_result == {"release:add": 0} | assert actual_result == {"release:add": 0} | ||||
if isinstance(swh_storage, InMemoryStorage) or not isinstance( | if isinstance(swh_storage, InMemoryStorage) or not isinstance( | ||||
swh_storage, CassandraStorage | swh_storage, CassandraStorage | ||||
): | ): | ||||
swh_storage.refresh_stat_counters() | swh_storage.refresh_stat_counters() | ||||
assert swh_storage.stat_counters()["release"] == 2 | assert swh_storage.stat_counters()["release"] == 2 | ||||
def test_release_add_with_raw_manifest(self, swh_storage, sample_data): | |||||
release = sample_data.releases[0] | |||||
release = attr.evolve(release, raw_manifest=b"foo") | |||||
release = attr.evolve(release, id=release.compute_hash()) | |||||
init_missing = swh_storage.release_missing([release.id]) | |||||
assert list(init_missing) == [release.id] | |||||
actual_result = swh_storage.release_add([release]) | |||||
assert actual_result == {"release:add": 1} | |||||
end_missing = swh_storage.release_missing([release.id]) | |||||
assert list(end_missing) == [] | |||||
assert list(swh_storage.journal_writer.journal.objects) == [ | |||||
("release", release), | |||||
] | |||||
assert swh_storage.release_get([release.id]) == [release] | |||||
@settings( | @settings( | ||||
suppress_health_check=[HealthCheck.too_slow, HealthCheck.data_too_large] | suppress_health_check=[HealthCheck.too_slow, HealthCheck.data_too_large] | ||||
+ function_scoped_fixture_check, | + function_scoped_fixture_check, | ||||
) | ) | ||||
@given(strategies.lists(hypothesis_strategies.releases(), min_size=1, max_size=10,)) | @given(strategies.lists(hypothesis_strategies.releases(), min_size=1, max_size=10,)) | ||||
def test_release_add_get_arbitrary(self, swh_storage, releases): | def test_release_add_get_arbitrary(self, swh_storage, releases): | ||||
# remove non-intrinsic data, so releases inserted with different hypothesis | # remove non-intrinsic data, so releases inserted with different hypothesis | ||||
# data can't clash with each other | # data can't clash with each other | ||||
releases = [ | releases = [ | ||||
attr.evolve( | attr.evolve( | ||||
release, | release, | ||||
synthetic=False, | synthetic=False, | ||||
metadata=None, | metadata=None, | ||||
author=attr.evolve(release.author, name=None, email=None) | author=attr.evolve(release.author, name=None, email=None) | ||||
if release.author | if release.author | ||||
else None, | else None, | ||||
) | ) | ||||
for release in releases | for release in releases | ||||
] | ] | ||||
swh_storage.release_add(releases) | swh_storage.release_add(releases) | ||||
assert set( | for release in releases: | ||||
swh_storage.release_get([release.id for release in releases]) | (rev,) = swh_storage.release_get([release.id]) | ||||
) == set(releases) | if rev.raw_manifest is None: | ||||
assert rev == release | |||||
else: | |||||
assert rev.raw_manifest == release.raw_manifest | |||||
# we can't compare the other fields, because they become non-intrinsic, | |||||
Not Done Inline Actionssame question here olasd: same question here | |||||
# so they may clash between hypothesis runs | |||||
def test_release_add_no_author_date(self, swh_storage, sample_data): | def test_release_add_no_author_date(self, swh_storage, sample_data): | ||||
full_release = sample_data.release | full_release = sample_data.release | ||||
release = attr.evolve(full_release, author=None, date=None) | release = attr.evolve(full_release, author=None, date=None) | ||||
actual_result = swh_storage.release_add([release]) | actual_result = swh_storage.release_add([release]) | ||||
assert actual_result == {"release:add": 1} | assert actual_result == {"release:add": 1} | ||||
▲ Show 20 Lines • Show All 3,331 Lines • Show Last 20 Lines |
Do you have a (swh.storage) API extension in mind to retrieve this field? Something like directory_get_manifest?