Page MenuHomeSoftware Heritage

D5720.id.diff
No OneTemporary

D5720.id.diff

diff --git a/swh/storage/tests/test_cassandra.py b/swh/storage/tests/test_cassandra.py
--- a/swh/storage/tests/test_cassandra.py
+++ b/swh/storage/tests/test_cassandra.py
@@ -16,7 +16,7 @@
import pytest
from swh.core.api.classes import stream_results
-from swh.model.model import Directory, DirectoryEntry, Snapshot
+from swh.model.model import Directory, DirectoryEntry, Snapshot, SnapshotBranch
from swh.storage import get_storage
from swh.storage.cassandra import create_keyspace
from swh.storage.cassandra.model import ContentRow, ExtIDRow
@@ -415,6 +415,83 @@
)
assert extids == [extid]
+ def test_directory_add_atomic(self, swh_storage, sample_data, mocker):
+ """Checks that a crash occurring after some directory entries were written
+ does not cause the directory to be (partially) visible.
+ ie. checks directories are added somewhat atomically."""
+ # Disable the journal writer, it would detect the CrashyEntry exception too
+ # early for this test to be relevant
+ swh_storage.journal_writer.journal = None
+
+ class MyException(Exception):
+ pass
+
+ class CrashyEntry(DirectoryEntry):
+ def __init__(self):
+ pass
+
+ def to_dict(self):
+ raise MyException()
+
+ directory = sample_data.directory3
+ entries = directory.entries
+ directory = attr.evolve(directory, entries=entries + (CrashyEntry(),))
+
+ with pytest.raises(MyException):
+ swh_storage.directory_add([directory])
+
+ # This should have written some of the entries to the database:
+ entry_rows = swh_storage._cql_runner.directory_entry_get([directory.id])
+ assert {row.name for row in entry_rows} == {entry.name for entry in entries}
+
+ # BUT, because not all the entries were written, the directory should
+ # be considered not written.
+ assert swh_storage.directory_missing([directory.id]) == [directory.id]
+ assert list(swh_storage.directory_ls(directory.id)) == []
+
+ def test_snapshot_add_atomic(self, swh_storage, sample_data, mocker):
+ """Checks that a crash occurring after some snapshot branches were written
+ does not cause the snapshot to be (partially) visible.
+ ie. checks snapshots are added somewhat atomically."""
+ # Disable the journal writer, it would detect the CrashyBranch exception too
+ # early for this test to be relevant
+ swh_storage.journal_writer.journal = None
+
+ class MyException(Exception):
+ pass
+
+ class CrashyBranch(SnapshotBranch):
+ def __getattribute__(self, name):
+ if name == "target" and should_raise:
+ raise MyException()
+ else:
+ return super().__getattribute__(name)
+
+ snapshot = sample_data.complete_snapshot
+ branches = snapshot.branches
+
+ should_raise = False # just so that we can construct the object
+ crashy_branch = CrashyBranch.from_dict(branches[b"directory"].to_dict())
+ should_raise = True
+
+ snapshot = attr.evolve(
+ snapshot, branches={**branches, b"crashy": crashy_branch,},
+ )
+
+ with pytest.raises(MyException):
+ swh_storage.snapshot_add([snapshot])
+
+ # This should have written some of the branches to the database:
+ branch_rows = swh_storage._cql_runner.snapshot_branch_get(snapshot.id, b"", 10)
+ assert {row.name for row in branch_rows} == set(branches)
+
+ # BUT, because not all the branches were written, the snapshot should
+ # be considered not written.
+ assert swh_storage.snapshot_missing([snapshot.id]) == [snapshot.id]
+ assert swh_storage.snapshot_get(snapshot.id) is None
+ assert swh_storage.snapshot_count_branches(snapshot.id) is None
+ assert swh_storage.snapshot_get_branches(snapshot.id) is None
+
@pytest.mark.skip(
'The "person" table of the pgsql is a legacy thing, and not '
"supported by the cassandra backend."

File Metadata

Mime Type
text/plain
Expires
Nov 5 2024, 7:14 AM (8 w, 2 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3225504

Event Timeline