Changeset View
Changeset View
Standalone View
Standalone View
swh/journal/tests/test_write_replay.py
# Copyright (C) 2019 The Software Heritage developers | # Copyright (C) 2019 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import functools | import functools | ||||
from unittest.mock import patch | |||||
import attr | import attr | ||||
from hypothesis import given, settings, HealthCheck | from hypothesis import given, settings, HealthCheck | ||||
from hypothesis.strategies import lists | from hypothesis.strategies import lists | ||||
from swh.model.hypothesis_strategies import object_dicts | from swh.model.hypothesis_strategies import object_dicts | ||||
from swh.storage.in_memory import InMemoryStorage | from swh.storage import get_storage, HashCollision | ||||
from swh.storage import HashCollision | |||||
from swh.journal.replay import process_replay_objects | from swh.journal.replay import process_replay_objects | ||||
from swh.journal.replay import process_replay_objects_content | from swh.journal.replay import process_replay_objects_content | ||||
from .utils import MockedJournalClient, MockedKafkaWriter | from .utils import MockedJournalClient, MockedKafkaWriter | ||||
storage_config = { | |||||
'cls': 'pipeline', | |||||
'steps': [ | |||||
{'cls': 'validate'}, | |||||
{'cls': 'memory', 'journal_writer': {'cls': 'memory'}}, | |||||
] | |||||
} | |||||
def empty_person_name_email(rev_or_rel): | def empty_person_name_email(rev_or_rel): | ||||
"""Empties the 'name' and 'email' fields of the author/committer fields | """Empties the 'name' and 'email' fields of the author/committer fields | ||||
of a revision or release; leaving only the fullname.""" | of a revision or release; leaving only the fullname.""" | ||||
if getattr(rev_or_rel, 'author', None): | if getattr(rev_or_rel, 'author', None): | ||||
rev_or_rel = attr.evolve( | rev_or_rel = attr.evolve( | ||||
rev_or_rel, | rev_or_rel, | ||||
author=attr.evolve( | author=attr.evolve( | ||||
rev_or_rel.author, | rev_or_rel.author, | ||||
Show All 16 Lines | |||||
@given(lists(object_dicts(), min_size=1)) | @given(lists(object_dicts(), min_size=1)) | ||||
@settings(suppress_health_check=[HealthCheck.too_slow]) | @settings(suppress_health_check=[HealthCheck.too_slow]) | ||||
def test_write_replay_same_order_batches(objects): | def test_write_replay_same_order_batches(objects): | ||||
queue = [] | queue = [] | ||||
replayer = MockedJournalClient(queue) | replayer = MockedJournalClient(queue) | ||||
storage1 = InMemoryStorage() | with patch('swh.journal.writer.inmemory.InMemoryJournalWriter', | ||||
storage1.journal_writer = MockedKafkaWriter(queue) | return_value=MockedKafkaWriter(queue)): | ||||
storage1 = get_storage(**storage_config) | |||||
for (obj_type, obj) in objects: | for (obj_type, obj) in objects: | ||||
obj = obj.copy() | obj = obj.copy() | ||||
if obj_type == 'origin_visit': | if obj_type == 'origin_visit': | ||||
storage1.origin_add_one({'url': obj['origin']}) | storage1.origin_add_one({'url': obj['origin']}) | ||||
storage1.origin_visit_upsert([obj]) | storage1.origin_visit_upsert([obj]) | ||||
else: | else: | ||||
if obj_type == 'content' and obj.get('status') == 'absent': | if obj_type == 'content' and obj.get('status') == 'absent': | ||||
obj_type = 'skipped_content' | obj_type = 'skipped_content' | ||||
method = getattr(storage1, obj_type + '_add') | method = getattr(storage1, obj_type + '_add') | ||||
try: | try: | ||||
method([obj]) | method([obj]) | ||||
except HashCollision: | except HashCollision: | ||||
pass | pass | ||||
queue_size = len(queue) | queue_size = len(queue) | ||||
assert replayer.max_messages == 0 | assert replayer.max_messages == 0 | ||||
replayer.max_messages = queue_size | replayer.max_messages = queue_size | ||||
storage2 = InMemoryStorage() | storage2 = get_storage(**storage_config) | ||||
worker_fn = functools.partial(process_replay_objects, storage=storage2) | worker_fn = functools.partial(process_replay_objects, storage=storage2) | ||||
nb_messages = 0 | nb_messages = 0 | ||||
while nb_messages < queue_size: | while nb_messages < queue_size: | ||||
nb_messages += replayer.process(worker_fn) | nb_messages += replayer.process(worker_fn) | ||||
assert replayer.consumer.committed | assert replayer.consumer.committed | ||||
for attr_name in ('_contents', '_directories', | for attr_name in ('_contents', '_directories', | ||||
Show All 21 Lines | |||||
@given(lists(object_dicts(), min_size=1)) | @given(lists(object_dicts(), min_size=1)) | ||||
@settings(suppress_health_check=[HealthCheck.too_slow]) | @settings(suppress_health_check=[HealthCheck.too_slow]) | ||||
def test_write_replay_content(objects): | def test_write_replay_content(objects): | ||||
queue = [] | queue = [] | ||||
replayer = MockedJournalClient(queue) | replayer = MockedJournalClient(queue) | ||||
storage1 = InMemoryStorage() | with patch('swh.journal.writer.inmemory.InMemoryJournalWriter', | ||||
storage1.journal_writer = MockedKafkaWriter(queue) | return_value=MockedKafkaWriter(queue)): | ||||
storage1 = get_storage(**storage_config) | |||||
contents = [] | contents = [] | ||||
for (obj_type, obj) in objects: | for (obj_type, obj) in objects: | ||||
obj = obj.copy() | obj = obj.copy() | ||||
if obj_type == 'content': | if obj_type == 'content': | ||||
# avoid hash collision | # avoid hash collision | ||||
if not storage1.content_find(obj): | if not storage1.content_find(obj): | ||||
if obj.get('status') != 'absent': | if obj.get('status') != 'absent': | ||||
storage1.content_add([obj]) | storage1.content_add([obj]) | ||||
contents.append(obj) | contents.append(obj) | ||||
queue_size = len(queue) | queue_size = len(queue) | ||||
assert replayer.max_messages == 0 | assert replayer.max_messages == 0 | ||||
replayer.max_messages = queue_size | replayer.max_messages = queue_size | ||||
storage2 = InMemoryStorage() | storage2 = get_storage(**storage_config) | ||||
worker_fn = functools.partial(process_replay_objects_content, | worker_fn = functools.partial(process_replay_objects_content, | ||||
src=storage1.objstorage, | src=storage1.objstorage, | ||||
dst=storage2.objstorage) | dst=storage2.objstorage) | ||||
nb_messages = 0 | nb_messages = 0 | ||||
while nb_messages < queue_size: | while nb_messages < queue_size: | ||||
nb_messages += replayer.process(worker_fn) | nb_messages += replayer.process(worker_fn) | ||||
# only content with status visible will be copied in storage2 | # only content with status visible will be copied in storage2 | ||||
expected_objstorage_state = { | expected_objstorage_state = { | ||||
c['sha1']: c['data'] for c in contents if c['status'] == 'visible' | c['sha1']: c['data'] for c in contents if c['status'] == 'visible' | ||||
} | } | ||||
assert expected_objstorage_state == storage2.objstorage.state | assert expected_objstorage_state == storage2.objstorage.state |