Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/tests/test_cassandra.py
# Copyright (C) 2018-2020 The Software Heritage developers | # Copyright (C) 2018-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import attr | import datetime | ||||
import os | import os | ||||
import signal | import signal | ||||
import socket | import socket | ||||
import subprocess | import subprocess | ||||
import time | import time | ||||
from collections import namedtuple | |||||
from typing import Dict | from typing import Dict | ||||
import attr | |||||
import pytest | import pytest | ||||
from swh.core.api.classes import stream_results | from swh.core.api.classes import stream_results | ||||
from swh.storage import get_storage | from swh.storage import get_storage | ||||
from swh.storage.cassandra import create_keyspace | from swh.storage.cassandra import create_keyspace | ||||
from swh.storage.cassandra.schema import TABLES, HASH_ALGORITHMS | from swh.storage.cassandra.schema import TABLES, HASH_ALGORITHMS | ||||
from swh.storage.cassandra.model import ContentRow | |||||
from swh.storage.utils import now | from swh.storage.utils import now | ||||
from swh.storage.tests.test_storage import TestStorage as _TestStorage | from swh.storage.tests.test_storage import TestStorage as _TestStorage | ||||
from swh.storage.tests.test_storage import ( | from swh.storage.tests.test_storage import ( | ||||
TestStorageGeneratedData as _TestStorageGeneratedData, | TestStorageGeneratedData as _TestStorageGeneratedData, | ||||
) | ) | ||||
▲ Show 20 Lines • Show All 174 Lines • ▼ Show 20 Lines | def test_content_add_murmur3_collision(self, swh_storage, mocker, sample_data): | ||||
assert algo in ("sha1", "sha1_git") | assert algo in ("sha1", "sha1_git") | ||||
return [123456] | return [123456] | ||||
mocker.patch.object( | mocker.patch.object( | ||||
swh_storage._cql_runner, "content_get_tokens_from_single_hash", mock_cgtfsh, | swh_storage._cql_runner, "content_get_tokens_from_single_hash", mock_cgtfsh, | ||||
) | ) | ||||
# For all tokens, always return cont | # For all tokens, always return cont | ||||
Row = namedtuple("Row", HASH_ALGORITHMS) | |||||
def mock_cgft(token): | def mock_cgft(token): | ||||
nonlocal called | nonlocal called | ||||
called += 1 | called += 1 | ||||
return [Row(**{algo: getattr(cont, algo) for algo in HASH_ALGORITHMS})] | return [ | ||||
ContentRow( | |||||
length=10, | |||||
ctime=datetime.datetime.now(), | |||||
status="present", | |||||
**{algo: getattr(cont, algo) for algo in HASH_ALGORITHMS}, | |||||
) | |||||
] | |||||
mocker.patch.object( | mocker.patch.object( | ||||
swh_storage._cql_runner, "content_get_from_token", mock_cgft | swh_storage._cql_runner, "content_get_from_token", mock_cgft | ||||
) | ) | ||||
actual_result = swh_storage.content_add([cont2]) | actual_result = swh_storage.content_add([cont2]) | ||||
assert called == 4 | assert called == 4 | ||||
Show All 22 Lines | ): | ||||
return [123456] | return [123456] | ||||
mocker.patch.object( | mocker.patch.object( | ||||
swh_storage._cql_runner, "content_get_tokens_from_single_hash", mock_cgtfsh, | swh_storage._cql_runner, "content_get_tokens_from_single_hash", mock_cgtfsh, | ||||
) | ) | ||||
# For all tokens, always return cont and cont2 | # For all tokens, always return cont and cont2 | ||||
cols = list(set(cont.to_dict()) - {"data"}) | cols = list(set(cont.to_dict()) - {"data"}) | ||||
Row = namedtuple("Row", cols) | |||||
def mock_cgft(token): | def mock_cgft(token): | ||||
nonlocal called | nonlocal called | ||||
called += 1 | called += 1 | ||||
return [ | return [ | ||||
Row(**{col: getattr(cont, col) for col in cols}) | ContentRow(**{col: getattr(cont, col) for col in cols},) | ||||
for cont in [cont, cont2] | for cont in [cont, cont2] | ||||
] | ] | ||||
mocker.patch.object( | mocker.patch.object( | ||||
swh_storage._cql_runner, "content_get_from_token", mock_cgft | swh_storage._cql_runner, "content_get_from_token", mock_cgft | ||||
) | ) | ||||
actual_result = swh_storage.content_get([cont.sha1]) | actual_result = swh_storage.content_get([cont.sha1]) | ||||
Show All 23 Lines | def test_content_find_murmur3_collision(self, swh_storage, mocker, sample_data): | ||||
return [123456] | return [123456] | ||||
mocker.patch.object( | mocker.patch.object( | ||||
swh_storage._cql_runner, "content_get_tokens_from_single_hash", mock_cgtfsh, | swh_storage._cql_runner, "content_get_tokens_from_single_hash", mock_cgtfsh, | ||||
) | ) | ||||
# For all tokens, always return cont and cont2 | # For all tokens, always return cont and cont2 | ||||
cols = list(set(cont.to_dict()) - {"data"}) | cols = list(set(cont.to_dict()) - {"data"}) | ||||
Row = namedtuple("Row", cols) | |||||
def mock_cgft(token): | def mock_cgft(token): | ||||
nonlocal called | nonlocal called | ||||
called += 1 | called += 1 | ||||
return [ | return [ | ||||
Row(**{col: getattr(cont, col) for col in cols}) | ContentRow(**{col: getattr(cont, col) for col in cols}) | ||||
for cont in [cont, cont2] | for cont in [cont, cont2] | ||||
] | ] | ||||
mocker.patch.object( | mocker.patch.object( | ||||
swh_storage._cql_runner, "content_get_from_token", mock_cgft | swh_storage._cql_runner, "content_get_from_token", mock_cgft | ||||
) | ) | ||||
expected_content = attr.evolve(cont, data=None) | expected_content = attr.evolve(cont, data=None) | ||||
Show All 20 Lines | ): | ||||
rows: Dict[int, Dict] = {} | rows: Dict[int, Dict] = {} | ||||
for tok, content in enumerate(sample_data.contents): | for tok, content in enumerate(sample_data.contents): | ||||
cont = attr.evolve(content, data=None, ctime=now()) | cont = attr.evolve(content, data=None, ctime=now()) | ||||
row_d = {**cont.to_dict(), "tok": tok} | row_d = {**cont.to_dict(), "tok": tok} | ||||
rows[tok] = row_d | rows[tok] = row_d | ||||
# For all tokens, always return cont | # For all tokens, always return cont | ||||
keys = set(["tok"] + list(content.to_dict().keys())).difference(set(["data"])) | |||||
Row = namedtuple("Row", keys) | |||||
def mock_content_get_token_range(range_start, range_end, limit): | def mock_content_get_token_range(range_start, range_end, limit): | ||||
nonlocal called | nonlocal called | ||||
called += 1 | called += 1 | ||||
for tok in list(rows.keys()) * 3: # yield multiple times the same tok | for tok in list(rows.keys()) * 3: # yield multiple times the same tok | ||||
row_d = rows[tok] | row_d = dict(rows[tok].items()) | ||||
yield Row(**row_d) | row_d.pop("tok") | ||||
yield (tok, ContentRow(**row_d)) | |||||
mocker.patch.object( | mocker.patch.object( | ||||
swh_storage._cql_runner, | swh_storage._cql_runner, | ||||
"content_get_token_range", | "content_get_token_range", | ||||
mock_content_get_token_range, | mock_content_get_token_range, | ||||
) | ) | ||||
actual_results = list( | actual_results = list( | ||||
▲ Show 20 Lines • Show All 53 Lines • Show Last 20 Lines |