Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7124280
D3554.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
7 KB
Subscribers
None
D3554.diff
View Options
diff --git a/swh/storage/tests/test_cassandra.py b/swh/storage/tests/test_cassandra.py
--- a/swh/storage/tests/test_cassandra.py
+++ b/swh/storage/tests/test_cassandra.py
@@ -1,29 +1,29 @@
-# Copyright (C) 2018-2019 The Software Heritage developers
+# Copyright (C) 2018-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-from collections import namedtuple
-import datetime
+import attr
import os
import signal
import socket
import subprocess
import time
+from collections import namedtuple
+
import pytest
from swh.storage import get_storage
from swh.storage.cassandra import create_keyspace
from swh.storage.cassandra.schema import TABLES, HASH_ALGORITHMS
+from swh.storage.utils import now
from swh.storage.tests.test_storage import TestStorage as _TestStorage
from swh.storage.tests.test_storage import (
TestStorageGeneratedData as _TestStorageGeneratedData,
)
-from .storage_data import data
-
CONFIG_TEMPLATE = """
data_file_directories:
@@ -185,7 +185,9 @@
@pytest.mark.cassandra
class TestCassandraStorage(_TestStorage):
- def test_content_add_murmur3_collision(self, swh_storage, mocker):
+ def test_content_add_murmur3_collision(
+ self, swh_storage, mocker, sample_data_model
+ ):
"""The Murmur3 token is used as link from index tables to the main
table; and non-matching contents with colliding murmur3-hash
are filtered-out when reading the main table.
@@ -193,6 +195,8 @@
"""
called = 0
+ cont, cont2 = sample_data_model["content"][:2]
+
# always return a token
def mock_cgtfsh(algo, hash_):
nonlocal called
@@ -206,34 +210,40 @@
mock_cgtfsh,
)
- # For all tokens, always return data.cont
+ # For all tokens, always return cont
Row = namedtuple("Row", HASH_ALGORITHMS)
def mock_cgft(token):
nonlocal called
called += 1
- return [Row(**{algo: data.cont[algo] for algo in HASH_ALGORITHMS})]
+ return [Row(**{algo: getattr(cont, algo) for algo in HASH_ALGORITHMS})]
mocker.patch.object(
swh_storage.storage._cql_runner, "content_get_from_token", mock_cgft
)
- actual_result = swh_storage.content_add([data.cont2])
+ actual_result = swh_storage.content_add([cont2])
assert called == 4
assert actual_result == {
"content:add": 1,
- "content:add:bytes": data.cont2["length"],
+ "content:add:bytes": cont2.length,
}
- def test_content_get_metadata_murmur3_collision(self, swh_storage, mocker):
+ def test_content_get_metadata_murmur3_collision(
+ self, swh_storage, mocker, sample_data_model
+ ):
"""The Murmur3 token is used as link from index tables to the main
table; and non-matching contents with colliding murmur3-hash
are filtered-out when reading the main table.
- This test checks the content methods do filter out these collision.
+ This test checks the content methods do filter out these collisions.
"""
called = 0
+ cont, cont2 = [
+ attr.evolve(c, ctime=now()) for c in sample_data_model["content"][:2]
+ ]
+
# always return a token
def mock_cgtfsh(algo, hash_):
nonlocal called
@@ -247,40 +257,47 @@
mock_cgtfsh,
)
- # For all tokens, always return data.cont and data.cont2
- cols = list(set(data.cont) - {"data"})
- Row = namedtuple("Row", cols + ["ctime"])
+ # For all tokens, always return cont and cont2
+ cols = list(set(cont.to_dict()) - {"data"})
+ Row = namedtuple("Row", cols)
def mock_cgft(token):
nonlocal called
called += 1
return [
- Row(ctime=42, **{col: cont[col] for col in cols})
- for cont in [data.cont, data.cont2]
+ Row(**{col: getattr(cont, col) for col in cols})
+ for cont in [cont, cont2]
]
mocker.patch.object(
swh_storage.storage._cql_runner, "content_get_from_token", mock_cgft
)
- expected_cont = data.cont.copy()
- del expected_cont["data"]
-
- actual_result = swh_storage.content_get_metadata([data.cont["sha1"]])
+ actual_result = swh_storage.content_get_metadata([cont.sha1])
assert called == 2
- # but data.cont2 should be filtered out
- assert actual_result == {data.cont["sha1"]: [expected_cont]}
+ # dropping extra column not returned
+ expected_cont = attr.evolve(cont, data=None, ctime=None).to_dict()
+ del expected_cont["ctime"] # forced to pop it as to_dict does not
+
+ # but cont2 should be filtered out
+ assert actual_result == {cont.sha1: [expected_cont]}
- def test_content_find_murmur3_collision(self, swh_storage, mocker):
+ def test_content_find_murmur3_collision(
+ self, swh_storage, mocker, sample_data_model
+ ):
"""The Murmur3 token is used as link from index tables to the main
table; and non-matching contents with colliding murmur3-hash
are filtered-out when reading the main table.
- This test checks the content methods do filter out these collision.
+ This test checks the content methods do filter out these collisions.
"""
called = 0
+ cont, cont2 = [
+ attr.evolve(c, ctime=now()) for c in sample_data_model["content"][:2]
+ ]
+
# always return a token
def mock_cgtfsh(algo, hash_):
nonlocal called
@@ -294,31 +311,29 @@
mock_cgtfsh,
)
- # For all tokens, always return data.cont and data.cont2
- cols = list(set(data.cont) - {"data"})
- Row = namedtuple("Row", cols + ["ctime"])
+ # For all tokens, always return cont and cont2
+ cols = list(set(cont.to_dict()) - {"data"})
+ Row = namedtuple("Row", cols)
def mock_cgft(token):
nonlocal called
called += 1
return [
- Row(ctime=datetime.datetime.now(), **{col: cont[col] for col in cols})
- for cont in [data.cont, data.cont2]
+ Row(**{col: getattr(cont, col) for col in cols})
+ for cont in [cont, cont2]
]
mocker.patch.object(
swh_storage.storage._cql_runner, "content_get_from_token", mock_cgft
)
- expected_cont = data.cont.copy()
- del expected_cont["data"]
+ expected_cont = attr.evolve(cont, data=None).to_dict()
- actual_result = swh_storage.content_find({"sha1": data.cont["sha1"]})
+ actual_result = swh_storage.content_find({"sha1": cont.sha1})
assert called == 2
- # but data.cont2 should be filtered out
- del actual_result[0]["ctime"]
+ # but cont2 should be filtered out
assert actual_result == [expected_cont]
@pytest.mark.skip("content_update is not yet implemented for Cassandra")
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Dec 21 2024, 4:28 AM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3224774
Attached To
D3554: test_cassandra: Use data model object
Event Timeline
Log In to Comment