replayer_storage_and_client = (<swh.storage.in_memory.InMemoryStorage object at 0x7f56f7bb6be0>, <swh.journal.client.JournalClient object at 0x7f56f7b5e198>)
caplog = <_pytest.logging.LogCaptureFixture object at 0x7f594d63c9e8>
def test_storage_replay_with_collision(replayer_storage_and_client, caplog):
"""Another replayer scenario with collisions.
This:
- writes objects to the topic, including colliding contents
- replayer consumes objects from the topic and replay them
- This drops the colliding contents from the replay when detected
"""
src, replayer = replayer_storage_and_client
# Fill Kafka using a source storage
nb_sent = 0
for object_type, objects in TEST_OBJECTS.items():
method = getattr(src, object_type + "_add")
method(objects)
if object_type == "origin_visit":
nb_sent += len(objects) # origin-visit-add adds origin-visit-status as well
nb_sent += len(objects)
# Create collision in input data
# These should not be written in the destination
producer = src.journal_writer.journal.producer
prefix = src.journal_writer.journal._prefix
for content in DUPLICATE_CONTENTS:
topic = f"{prefix}.content"
key = content.sha1
now = datetime.datetime.now(tz=UTC)
content = attr.evolve(content, ctime=now)
producer.produce(
topic=topic,
key=key_to_kafka(key),
value=value_to_kafka(content.to_dict()),
)
nb_sent += 1
producer.flush()
caplog.set_level(logging.ERROR, "swh.journal.replay")
# Fill the destination storage from Kafka
dst = get_storage(cls="memory")
worker_fn = functools.partial(process_replay_objects, storage=dst)
nb_inserted = replayer.process(worker_fn)
> assert nb_sent == nb_inserted
E assert 60 == 55
.tox/py3/lib/python3.7/site-packages/swh/storage/tests/test_replay.py:177: AssertionError
TEST RESULT
TEST RESULT
- Run At
- Jul 6 2022, 4:23 PM