Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/tests/test_cassandra.py
# Copyright (C) 2018-2019 The Software Heritage developers | # Copyright (C) 2018-2019 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from collections import namedtuple | |||||
import datetime | |||||
import os | import os | ||||
import signal | import signal | ||||
import socket | import socket | ||||
import subprocess | import subprocess | ||||
import time | import time | ||||
import pytest | import pytest | ||||
from swh.storage import get_storage | from swh.storage import get_storage | ||||
from swh.storage.cassandra import create_keyspace | from swh.storage.cassandra import create_keyspace | ||||
from swh.storage.cassandra.schema import TABLES | from swh.storage.cassandra.schema import TABLES, HASH_ALGORITHMS | ||||
from swh.storage.tests.test_storage import TestStorage as _TestStorage | from swh.storage.tests.test_storage import TestStorage as _TestStorage | ||||
from swh.storage.tests.test_storage import TestStorageGeneratedData \ | from swh.storage.tests.test_storage import TestStorageGeneratedData \ | ||||
as _TestStorageGeneratedData | as _TestStorageGeneratedData | ||||
from .storage_data import data | |||||
CONFIG_TEMPLATE = ''' | CONFIG_TEMPLATE = ''' | ||||
data_file_directories: | data_file_directories: | ||||
- {data_dir}/data | - {data_dir}/data | ||||
commitlog_directory: {data_dir}/commitlog | commitlog_directory: {data_dir}/commitlog | ||||
hints_directory: {data_dir}/hints | hints_directory: {data_dir}/hints | ||||
saved_caches_directory: {data_dir}/saved_caches | saved_caches_directory: {data_dir}/saved_caches | ||||
▲ Show 20 Lines • Show All 141 Lines • ▼ Show 20 Lines | def swh_storage_backend_config(cassandra_cluster, keyspace): | ||||
storage = get_storage(**storage_config) | storage = get_storage(**storage_config) | ||||
for table in TABLES: | for table in TABLES: | ||||
storage._cql_runner._session.execute('TRUNCATE TABLE "%s"' % table) | storage._cql_runner._session.execute('TRUNCATE TABLE "%s"' % table) | ||||
@pytest.mark.cassandra | @pytest.mark.cassandra | ||||
class TestCassandraStorage(_TestStorage): | class TestCassandraStorage(_TestStorage): | ||||
def test_content_add_murmur3_collision(self, swh_storage, mocker): | |||||
"""The Murmur3 token is used as link from index tables to the main | |||||
table; and non-matching contents with colliding murmur3-hash | |||||
are filtered-out when reading the main table. | |||||
This test checks the content methods do filter out these collision. | |||||
""" | |||||
called = 0 | |||||
# always return a token | |||||
def mock_cgtfsh(algo, hash_): | |||||
nonlocal called | |||||
called += 1 | |||||
assert algo in ('sha1', 'sha1_git') | |||||
return [123456] | |||||
mocker.patch.object( | |||||
swh_storage.storage._cql_runner, | |||||
'content_get_tokens_from_single_hash', | |||||
mock_cgtfsh) | |||||
# For all tokens, always return data.cont | |||||
Row = namedtuple('Row', HASH_ALGORITHMS) | |||||
def mock_cgft(token): | |||||
nonlocal called | |||||
called += 1 | |||||
return [Row(**{algo: data.cont[algo] for algo in HASH_ALGORITHMS})] | |||||
mocker.patch.object( | |||||
swh_storage.storage._cql_runner, | |||||
'content_get_from_token', | |||||
mock_cgft) | |||||
actual_result = swh_storage.content_add([data.cont2]) | |||||
assert called == 4 | |||||
assert actual_result == { | |||||
'content:add': 1, | |||||
'content:add:bytes': data.cont2['length'], | |||||
} | |||||
def test_content_get_metadata_murmur3_collision(self, swh_storage, mocker): | |||||
"""The Murmur3 token is used as link from index tables to the main | |||||
table; and non-matching contents with colliding murmur3-hash | |||||
are filtered-out when reading the main table. | |||||
This test checks the content methods do filter out these collision. | |||||
""" | |||||
called = 0 | |||||
# always return a token | |||||
def mock_cgtfsh(algo, hash_): | |||||
nonlocal called | |||||
called += 1 | |||||
assert algo in ('sha1', 'sha1_git') | |||||
return [123456] | |||||
mocker.patch.object( | |||||
swh_storage.storage._cql_runner, | |||||
'content_get_tokens_from_single_hash', | |||||
mock_cgtfsh) | |||||
# For all tokens, always return data.cont and data.cont2 | |||||
cols = list(set(data.cont) - {'data'}) | |||||
Row = namedtuple('Row', cols + ['ctime']) | |||||
def mock_cgft(token): | |||||
nonlocal called | |||||
called += 1 | |||||
return [Row(ctime=42, **{col: cont[col] for col in cols}) | |||||
for cont in [data.cont, data.cont2]] | |||||
mocker.patch.object( | |||||
swh_storage.storage._cql_runner, | |||||
'content_get_from_token', | |||||
mock_cgft) | |||||
expected_cont = data.cont.copy() | |||||
del expected_cont['data'] | |||||
actual_result = swh_storage.content_get_metadata([data.cont['sha1']]) | |||||
assert called == 2 | |||||
# but data.cont2 should be filtered out | |||||
assert actual_result == { | |||||
data.cont['sha1']: [expected_cont] | |||||
} | |||||
def test_content_find_murmur3_collision(self, swh_storage, mocker): | |||||
"""The Murmur3 token is used as link from index tables to the main | |||||
table; and non-matching contents with colliding murmur3-hash | |||||
are filtered-out when reading the main table. | |||||
This test checks the content methods do filter out these collision. | |||||
""" | |||||
called = 0 | |||||
# always return a token | |||||
def mock_cgtfsh(algo, hash_): | |||||
nonlocal called | |||||
called += 1 | |||||
assert algo in ('sha1', 'sha1_git') | |||||
return [123456] | |||||
mocker.patch.object( | |||||
swh_storage.storage._cql_runner, | |||||
'content_get_tokens_from_single_hash', | |||||
mock_cgtfsh) | |||||
# For all tokens, always return data.cont and data.cont2 | |||||
cols = list(set(data.cont) - {'data'}) | |||||
Row = namedtuple('Row', cols + ['ctime']) | |||||
def mock_cgft(token): | |||||
nonlocal called | |||||
called += 1 | |||||
return [Row(ctime=datetime.datetime.now(), | |||||
**{col: cont[col] for col in cols}) | |||||
for cont in [data.cont, data.cont2]] | |||||
mocker.patch.object( | |||||
swh_storage.storage._cql_runner, | |||||
'content_get_from_token', | |||||
mock_cgft) | |||||
expected_cont = data.cont.copy() | |||||
del expected_cont['data'] | |||||
actual_result = swh_storage.content_find({'sha1': data.cont['sha1']}) | |||||
assert called == 2 | |||||
# but data.cont2 should be filtered out | |||||
del actual_result[0]['ctime'] | |||||
assert actual_result == [expected_cont] | |||||
@pytest.mark.skip('content_update is not yet implemented for Cassandra') | @pytest.mark.skip('content_update is not yet implemented for Cassandra') | ||||
def test_content_update(self): | def test_content_update(self): | ||||
pass | pass | ||||
@pytest.mark.skip( | @pytest.mark.skip( | ||||
'The "person" table of the pgsql is a legacy thing, and not ' | 'The "person" table of the pgsql is a legacy thing, and not ' | ||||
'supported by the cassandra backend.') | 'supported by the cassandra backend.') | ||||
def test_person_fullname_unicity(self): | def test_person_fullname_unicity(self): | ||||
▲ Show 20 Lines • Show All 86 Lines • Show Last 20 Lines |