Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/tests/test_cassandra.py
# Copyright (C) 2018-2020 The Software Heritage developers | # Copyright (C) 2018-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import attr | import attr | ||||
import os | import os | ||||
import signal | import signal | ||||
import socket | import socket | ||||
import subprocess | import subprocess | ||||
import time | import time | ||||
from collections import namedtuple | from collections import namedtuple | ||||
from typing import Dict | |||||
import pytest | import pytest | ||||
from swh.core.api.classes import stream_results | |||||
from swh.storage import get_storage | from swh.storage import get_storage | ||||
from swh.storage.cassandra import create_keyspace | from swh.storage.cassandra import create_keyspace | ||||
from swh.storage.cassandra.schema import TABLES, HASH_ALGORITHMS | from swh.storage.cassandra.schema import TABLES, HASH_ALGORITHMS | ||||
from swh.storage.utils import now | from swh.storage.utils import now | ||||
from swh.storage.tests.test_storage import TestStorage as _TestStorage | from swh.storage.tests.test_storage import TestStorage as _TestStorage | ||||
from swh.storage.tests.test_storage import ( | from swh.storage.tests.test_storage import ( | ||||
TestStorageGeneratedData as _TestStorageGeneratedData, | TestStorageGeneratedData as _TestStorageGeneratedData, | ||||
▲ Show 20 Lines • Show All 292 Lines • ▼ Show 20 Lines | def test_content_find_murmur3_collision(self, swh_storage, mocker, sample_data): | ||||
actual_result = swh_storage.content_find({"sha1": cont.sha1}) | actual_result = swh_storage.content_find({"sha1": cont.sha1}) | ||||
assert called == 2 | assert called == 2 | ||||
# but cont2 should be filtered out | # but cont2 should be filtered out | ||||
assert actual_result == [expected_content] | assert actual_result == [expected_content] | ||||
def test_content_get_partition_murmur3_collision( | |||||
self, swh_storage, mocker, sample_data | |||||
): | |||||
"""The Murmur3 token is used as link from index tables to the main table; and | |||||
non-matching contents with colliding murmur3-hash are filtered-out when reading | |||||
the main table. | |||||
This test checks the content_get_partition endpoints return all contents, even | |||||
the collisions. | |||||
""" | |||||
called = 0 | |||||
vlorentz: rename the variable "rows", as tokens are only the keys, not the values. | |||||
rows: Dict[int, Dict] = {} | |||||
for tok, content in enumerate(sample_data.contents): | |||||
cont = attr.evolve(content, data=None) | |||||
row_d = {**cont.to_dict(), "tok": tok} | |||||
rows[tok] = row_d | |||||
# For all tokens, always return cont | |||||
keys = set(["tok"] + list(content.to_dict().keys())).difference(set(["data"])) | |||||
Row = namedtuple("Row", keys) | |||||
def mock_content_get_token_range(range_start, range_end, limit): | |||||
nonlocal called | |||||
called += 1 | |||||
Done Inline Actions@vlorentz is that good enough? ardumont: @vlorentz is that good enough? | |||||
for tok in list(rows.keys()) * 3: # yield multiple times the same tok | |||||
row_d = rows[tok] | |||||
yield Row(**row_d) | |||||
mocker.patch.object( | |||||
swh_storage._cql_runner, | |||||
"content_get_token_range", | |||||
mock_content_get_token_range, | |||||
) | |||||
actual_results = list( | |||||
stream_results( | |||||
Done Inline Actionsjsyk, here the partition is not really relevant as we always return everything in the mock functions... ardumont: jsyk, here the partition is not really relevant as we always return everything in the mock… | |||||
swh_storage.content_get_partition, partition_id=0, nb_partitions=1 | |||||
) | |||||
) | |||||
assert called > 0 | |||||
# everything is listed, even collisions | |||||
assert len(actual_results) == 3 * len(sample_data.contents) | |||||
# as we duplicated the returned results, dropping duplicate should yield | |||||
# the original length | |||||
assert len(set(actual_results)) == len(sample_data.contents) | |||||
@pytest.mark.skip("content_update is not yet implemented for Cassandra") | @pytest.mark.skip("content_update is not yet implemented for Cassandra") | ||||
def test_content_update(self): | def test_content_update(self): | ||||
pass | pass | ||||
@pytest.mark.skip( | @pytest.mark.skip( | ||||
'The "person" table of the pgsql is a legacy thing, and not ' | 'The "person" table of the pgsql is a legacy thing, and not ' | ||||
"supported by the cassandra backend." | "supported by the cassandra backend." | ||||
) | ) | ||||
Show All 32 Lines |
rename the variable "rows", as tokens are only the keys, not the values.