Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/tests/test_storage.py
Show All 20 Lines | |||||
import pytest | import pytest | ||||
from hypothesis import given, strategies, settings, HealthCheck | from hypothesis import given, strategies, settings, HealthCheck | ||||
from typing import ClassVar, Optional | from typing import ClassVar, Optional | ||||
from swh.model import from_disk, identifiers | from swh.model import from_disk, identifiers | ||||
from swh.model.hashutil import hash_to_bytes | from swh.model.hashutil import hash_to_bytes | ||||
from swh.model.model import Release, Revision | |||||
from swh.model.hypothesis_strategies import objects | from swh.model.hypothesis_strategies import objects | ||||
from swh.storage import HashCollision | from swh.storage import HashCollision, get_storage | ||||
from swh.storage.converters import origin_url_to_sha1 as sha1 | from swh.storage.converters import origin_url_to_sha1 as sha1 | ||||
from swh.storage.exc import StorageArgumentException | from swh.storage.exc import StorageArgumentException | ||||
from swh.storage.interface import StorageInterface | from swh.storage.interface import StorageInterface | ||||
from .storage_data import data | from .storage_data import data | ||||
@contextmanager | @contextmanager | ||||
▲ Show 20 Lines • Show All 54 Lines • ▼ Show 20 Lines | class TestStorage: | ||||
test_remote_storage.py. | test_remote_storage.py. | ||||
We need to have the two classes inherit from this base class | We need to have the two classes inherit from this base class | ||||
separately to avoid nosetests running the tests from the base | separately to avoid nosetests running the tests from the base | ||||
class twice. | class twice. | ||||
""" | """ | ||||
maxDiff = None # type: ClassVar[Optional[int]] | maxDiff = None # type: ClassVar[Optional[int]] | ||||
def test_types(self, swh_storage): | def test_types(self, swh_storage_backend_config): | ||||
"""Checks all methods of StorageInterface are implemented by this | """Checks all methods of StorageInterface are implemented by this | ||||
backend, and that they have the same signature.""" | backend, and that they have the same signature.""" | ||||
# Create an instance of the protocol (which cannot be instantiated | # Create an instance of the protocol (which cannot be instantiated | ||||
# directly, so this creates a subclass, then instantiates it) | # directly, so this creates a subclass, then instantiates it) | ||||
interface = type('_', (StorageInterface,), {})() | interface = type('_', (StorageInterface,), {})() | ||||
storage = get_storage(**swh_storage_backend_config) | |||||
assert 'content_add' in dir(interface) | assert 'content_add' in dir(interface) | ||||
missing_methods = [] | missing_methods = [] | ||||
for meth_name in dir(interface): | for meth_name in dir(interface): | ||||
if meth_name.startswith('_'): | if meth_name.startswith('_'): | ||||
continue | continue | ||||
interface_meth = getattr(interface, meth_name) | interface_meth = getattr(interface, meth_name) | ||||
try: | try: | ||||
concrete_meth = getattr(swh_storage, meth_name) | concrete_meth = getattr(storage, meth_name) | ||||
except AttributeError: | except AttributeError: | ||||
if not getattr(interface_meth, 'deprecated_endpoint', False): | if not getattr(interface_meth, 'deprecated_endpoint', False): | ||||
# The backend is missing a (non-deprecated) endpoint | # The backend is missing a (non-deprecated) endpoint | ||||
missing_methods.append(meth_name) | missing_methods.append(meth_name) | ||||
continue | continue | ||||
expected_signature = inspect.signature(interface_meth) | expected_signature = inspect.signature(interface_meth) | ||||
actual_signature = inspect.signature(concrete_meth) | actual_signature = inspect.signature(concrete_meth) | ||||
▲ Show 20 Lines • Show All 135 Lines • ▼ Show 20 Lines | def test_content_add_collision(self, swh_storage): | ||||
cont1b['sha256'] = bytes(sha256_array) | cont1b['sha256'] = bytes(sha256_array) | ||||
with pytest.raises(HashCollision) as cm: | with pytest.raises(HashCollision) as cm: | ||||
swh_storage.content_add([cont1, cont1b]) | swh_storage.content_add([cont1, cont1b]) | ||||
assert cm.value.args[0] in ['sha1', 'sha1_git', 'blake2s256'] | assert cm.value.args[0] in ['sha1', 'sha1_git', 'blake2s256'] | ||||
def test_content_update(self, swh_storage): | def test_content_update(self, swh_storage): | ||||
swh_storage.journal_writer = None # TODO, not supported | if hasattr(swh_storage, 'storage'): | ||||
swh_storage.storage.journal_writer = None # TODO, not supported | |||||
cont = copy.deepcopy(data.cont) | cont = copy.deepcopy(data.cont) | ||||
swh_storage.content_add([cont]) | swh_storage.content_add([cont]) | ||||
# alter the sha1_git for example | # alter the sha1_git for example | ||||
cont['sha1_git'] = hash_to_bytes( | cont['sha1_git'] = hash_to_bytes( | ||||
'3a60a5275d0333bf13468e8b3dcab90f4046e654') | '3a60a5275d0333bf13468e8b3dcab90f4046e654') | ||||
▲ Show 20 Lines • Show All 252 Lines • ▼ Show 20 Lines | def test_content_get_metadata_missing_sha1(self, swh_storage): | ||||
swh_storage.content_add([cont1, cont2]) | swh_storage.content_add([cont1, cont2]) | ||||
actual_contents = swh_storage.content_get_metadata( | actual_contents = swh_storage.content_get_metadata( | ||||
[missing_cont['sha1']]) | [missing_cont['sha1']]) | ||||
assert actual_contents == {missing_cont['sha1']: []} | assert actual_contents == {missing_cont['sha1']: []} | ||||
def test_content_get_random(self, swh_storage): | def test_content_get_random(self, swh_storage): | ||||
print(data.cont, data.cont2, data.cont3) | |||||
swh_storage.content_add([data.cont, data.cont2, data.cont3]) | swh_storage.content_add([data.cont, data.cont2, data.cont3]) | ||||
assert swh_storage.content_get_random() in { | assert swh_storage.content_get_random() in { | ||||
data.cont['sha1_git'], data.cont2['sha1_git'], | data.cont['sha1_git'], data.cont2['sha1_git'], | ||||
data.cont3['sha1_git']} | data.cont3['sha1_git']} | ||||
def test_directory_add(self, swh_storage): | def test_directory_add(self, swh_storage): | ||||
init_missing = list(swh_storage.directory_missing([data.dir['id']])) | init_missing = list(swh_storage.directory_missing([data.dir['id']])) | ||||
▲ Show 20 Lines • Show All 211 Lines • ▼ Show 20 Lines | def test_revision_add(self, swh_storage): | ||||
assert list(init_missing) == [data.revision['id']] | assert list(init_missing) == [data.revision['id']] | ||||
actual_result = swh_storage.revision_add([data.revision]) | actual_result = swh_storage.revision_add([data.revision]) | ||||
assert actual_result == {'revision:add': 1} | assert actual_result == {'revision:add': 1} | ||||
end_missing = swh_storage.revision_missing([data.revision['id']]) | end_missing = swh_storage.revision_missing([data.revision['id']]) | ||||
assert list(end_missing) == [] | assert list(end_missing) == [] | ||||
normalized_revision = Revision.from_dict(data.revision).to_dict() | |||||
assert list(swh_storage.journal_writer.objects) \ | assert list(swh_storage.journal_writer.objects) \ | ||||
== [('revision', data.revision)] | == [('revision', normalized_revision)] | ||||
# already there so nothing added | # already there so nothing added | ||||
actual_result = swh_storage.revision_add([data.revision]) | actual_result = swh_storage.revision_add([data.revision]) | ||||
assert actual_result == {'revision:add': 0} | assert actual_result == {'revision:add': 0} | ||||
swh_storage.refresh_stat_counters() | swh_storage.refresh_stat_counters() | ||||
assert swh_storage.stat_counters()['revision'] == 1 | assert swh_storage.stat_counters()['revision'] == 1 | ||||
Show All 37 Lines | def test_revision_add_validation(self, swh_storage): | ||||
if type(cm.value) == psycopg2.DataError: | if type(cm.value) == psycopg2.DataError: | ||||
assert cm.value.pgcode == \ | assert cm.value.pgcode == \ | ||||
psycopg2.errorcodes.INVALID_TEXT_REPRESENTATION | psycopg2.errorcodes.INVALID_TEXT_REPRESENTATION | ||||
def test_revision_add_twice(self, swh_storage): | def test_revision_add_twice(self, swh_storage): | ||||
actual_result = swh_storage.revision_add([data.revision]) | actual_result = swh_storage.revision_add([data.revision]) | ||||
assert actual_result == {'revision:add': 1} | assert actual_result == {'revision:add': 1} | ||||
normalized_revision = Revision.from_dict(data.revision).to_dict() | |||||
normalized_revision2 = Revision.from_dict(data.revision2).to_dict() | |||||
assert list(swh_storage.journal_writer.objects) \ | assert list(swh_storage.journal_writer.objects) \ | ||||
== [('revision', data.revision)] | == [('revision', normalized_revision)] | ||||
actual_result = swh_storage.revision_add( | actual_result = swh_storage.revision_add( | ||||
[data.revision, data.revision2]) | [data.revision, data.revision2]) | ||||
assert actual_result == {'revision:add': 1} | assert actual_result == {'revision:add': 1} | ||||
assert list(swh_storage.journal_writer.objects) \ | assert list(swh_storage.journal_writer.objects) \ | ||||
== [('revision', data.revision), | == [('revision', normalized_revision), | ||||
('revision', data.revision2)] | ('revision', normalized_revision2)] | ||||
def test_revision_add_name_clash(self, swh_storage): | def test_revision_add_name_clash(self, swh_storage): | ||||
revision1 = data.revision | revision1 = data.revision | ||||
revision2 = data.revision2 | revision2 = data.revision2 | ||||
revision1['author'] = { | revision1['author'] = { | ||||
'fullname': b'John Doe <john.doe@example.com>', | 'fullname': b'John Doe <john.doe@example.com>', | ||||
'name': b'John Doe', | 'name': b'John Doe', | ||||
Show All 23 Lines | def test_revision_log(self, swh_storage): | ||||
del actual_result['author']['id'] | del actual_result['author']['id'] | ||||
if 'id' in actual_result['committer']: | if 'id' in actual_result['committer']: | ||||
del actual_result['committer']['id'] | del actual_result['committer']['id'] | ||||
assert len(actual_results) == 2 # rev4 -child-> rev3 | assert len(actual_results) == 2 # rev4 -child-> rev3 | ||||
assert actual_results[0] == normalize_entity(data.revision4) | assert actual_results[0] == normalize_entity(data.revision4) | ||||
assert actual_results[1] == normalize_entity(data.revision3) | assert actual_results[1] == normalize_entity(data.revision3) | ||||
normalized_revision3 = Revision.from_dict(data.revision3).to_dict() | |||||
normalized_revision4 = Revision.from_dict(data.revision4).to_dict() | |||||
assert list(swh_storage.journal_writer.objects) == [ | assert list(swh_storage.journal_writer.objects) == [ | ||||
('revision', data.revision3), | ('revision', normalized_revision3), | ||||
('revision', data.revision4)] | ('revision', normalized_revision4)] | ||||
def test_revision_log_with_limit(self, swh_storage): | def test_revision_log_with_limit(self, swh_storage): | ||||
# given | # given | ||||
# data.revision4 -is-child-of-> data.revision3 | # data.revision4 -is-child-of-> data.revision3 | ||||
swh_storage.revision_add([data.revision3, | swh_storage.revision_add([data.revision3, | ||||
data.revision4]) | data.revision4]) | ||||
actual_results = list(swh_storage.revision_log( | actual_results = list(swh_storage.revision_log( | ||||
[data.revision4['id']], 1)) | [data.revision4['id']], 1)) | ||||
▲ Show 20 Lines • Show All 64 Lines • ▼ Show 20 Lines | class TestStorage: | ||||
def test_revision_get_random(self, swh_storage): | def test_revision_get_random(self, swh_storage): | ||||
swh_storage.revision_add( | swh_storage.revision_add( | ||||
[data.revision, data.revision2, data.revision3]) | [data.revision, data.revision2, data.revision3]) | ||||
assert swh_storage.revision_get_random() in \ | assert swh_storage.revision_get_random() in \ | ||||
{data.revision['id'], data.revision2['id'], data.revision3['id']} | {data.revision['id'], data.revision2['id'], data.revision3['id']} | ||||
def test_release_add(self, swh_storage): | def test_release_add(self, swh_storage): | ||||
normalized_release = Release.from_dict(data.release).to_dict() | |||||
normalized_release2 = Release.from_dict(data.release2).to_dict() | |||||
init_missing = swh_storage.release_missing([data.release['id'], | init_missing = swh_storage.release_missing([data.release['id'], | ||||
data.release2['id']]) | data.release2['id']]) | ||||
assert [data.release['id'], data.release2['id']] == list(init_missing) | assert [data.release['id'], data.release2['id']] == list(init_missing) | ||||
actual_result = swh_storage.release_add([data.release, data.release2]) | actual_result = swh_storage.release_add([data.release, data.release2]) | ||||
assert actual_result == {'release:add': 2} | assert actual_result == {'release:add': 2} | ||||
end_missing = swh_storage.release_missing([data.release['id'], | end_missing = swh_storage.release_missing([data.release['id'], | ||||
data.release2['id']]) | data.release2['id']]) | ||||
assert list(end_missing) == [] | assert list(end_missing) == [] | ||||
assert list(swh_storage.journal_writer.objects) == [ | assert list(swh_storage.journal_writer.objects) == [ | ||||
('release', data.release), | ('release', normalized_release), | ||||
('release', data.release2)] | ('release', normalized_release2)] | ||||
# already present so nothing added | # already present so nothing added | ||||
actual_result = swh_storage.release_add([data.release, data.release2]) | actual_result = swh_storage.release_add([data.release, data.release2]) | ||||
assert actual_result == {'release:add': 0} | assert actual_result == {'release:add': 0} | ||||
swh_storage.refresh_stat_counters() | swh_storage.refresh_stat_counters() | ||||
assert swh_storage.stat_counters()['release'] == 2 | assert swh_storage.stat_counters()['release'] == 2 | ||||
def test_release_add_from_generator(self, swh_storage): | def test_release_add_from_generator(self, swh_storage): | ||||
def _rel_gen(): | def _rel_gen(): | ||||
yield data.release | yield data.release | ||||
yield data.release2 | yield data.release2 | ||||
normalized_release = Release.from_dict(data.release).to_dict() | |||||
normalized_release2 = Release.from_dict(data.release2).to_dict() | |||||
actual_result = swh_storage.release_add(_rel_gen()) | actual_result = swh_storage.release_add(_rel_gen()) | ||||
assert actual_result == {'release:add': 2} | assert actual_result == {'release:add': 2} | ||||
assert list(swh_storage.journal_writer.objects) == [ | assert list(swh_storage.journal_writer.objects) == [ | ||||
('release', data.release), | ('release', normalized_release), | ||||
('release', data.release2)] | ('release', normalized_release2)] | ||||
swh_storage.refresh_stat_counters() | swh_storage.refresh_stat_counters() | ||||
assert swh_storage.stat_counters()['release'] == 2 | assert swh_storage.stat_counters()['release'] == 2 | ||||
def test_release_add_no_author_date(self, swh_storage): | def test_release_add_no_author_date(self, swh_storage): | ||||
release = data.release | release = data.release | ||||
release['author'] = None | release['author'] = None | ||||
Show All 27 Lines | def test_release_add_validation(self, swh_storage): | ||||
if type(cm.value) == psycopg2.IntegrityError: | if type(cm.value) == psycopg2.IntegrityError: | ||||
assert cm.value.pgcode == psycopg2.errorcodes.CHECK_VIOLATION | assert cm.value.pgcode == psycopg2.errorcodes.CHECK_VIOLATION | ||||
def test_release_add_twice(self, swh_storage): | def test_release_add_twice(self, swh_storage): | ||||
actual_result = swh_storage.release_add([data.release]) | actual_result = swh_storage.release_add([data.release]) | ||||
assert actual_result == {'release:add': 1} | assert actual_result == {'release:add': 1} | ||||
normalized_release = Release.from_dict(data.release).to_dict() | |||||
normalized_release2 = Release.from_dict(data.release2).to_dict() | |||||
assert list(swh_storage.journal_writer.objects) \ | assert list(swh_storage.journal_writer.objects) \ | ||||
== [('release', data.release)] | == [('release', normalized_release)] | ||||
actual_result = swh_storage.release_add([data.release, data.release2]) | actual_result = swh_storage.release_add([data.release, data.release2]) | ||||
assert actual_result == {'release:add': 1} | assert actual_result == {'release:add': 1} | ||||
assert list(swh_storage.journal_writer.objects) \ | assert list(swh_storage.journal_writer.objects) \ | ||||
== [('release', data.release), | == [('release', normalized_release), | ||||
('release', data.release2)] | ('release', normalized_release2)] | ||||
def test_release_add_name_clash(self, swh_storage): | def test_release_add_name_clash(self, swh_storage): | ||||
release1 = data.release.copy() | release1 = data.release.copy() | ||||
release2 = data.release2.copy() | release2 = data.release2.copy() | ||||
release1['author'] = { | release1['author'] = { | ||||
'fullname': b'John Doe <john.doe@example.com>', | 'fullname': b'John Doe <john.doe@example.com>', | ||||
'name': b'John Doe', | 'name': b'John Doe', | ||||
▲ Show 20 Lines • Show All 2,634 Lines • ▼ Show 20 Lines | class TestPgStorage: | ||||
"""This class is dedicated for the rare case where the schema needs to | """This class is dedicated for the rare case where the schema needs to | ||||
be altered dynamically. | be altered dynamically. | ||||
Otherwise, the tests could be blocking when ran altogether. | Otherwise, the tests could be blocking when ran altogether. | ||||
""" | """ | ||||
def test_content_update_with_new_cols(self, swh_storage): | def test_content_update_with_new_cols(self, swh_storage): | ||||
swh_storage.journal_writer = None # TODO, not supported | swh_storage.storage.journal_writer = None # TODO, not supported | ||||
with db_transaction(swh_storage) as (_, cur): | with db_transaction(swh_storage) as (_, cur): | ||||
cur.execute("""alter table content | cur.execute("""alter table content | ||||
add column test text default null, | add column test text default null, | ||||
add column test2 text default null""") | add column test2 text default null""") | ||||
cont = copy.deepcopy(data.cont2) | cont = copy.deepcopy(data.cont2) | ||||
swh_storage.content_add([cont]) | swh_storage.content_add([cont]) | ||||
▲ Show 20 Lines • Show All 98 Lines • Show Last 20 Lines |