Page MenuHomeSoftware Heritage

Jenkins > .tox.py3.lib.python3.7.site-packages.swh.storage.tests.test_replay::test_storage_play_with_collision
Failed

TEST RESULT

Run At
Jun 12 2020, 3:31 PM
Details
replayer_storage_and_client = (<swh.storage.in_memory.InMemoryStorage object at 0x7f5fe60b6198>, <swh.journal.client.JournalClient object at 0x7f5fe60b6400>) caplog = <_pytest.logging.LogCaptureFixture object at 0x7f5fe7122080> def test_storage_play_with_collision(replayer_storage_and_client, caplog): """Another replayer scenario with collisions. This: - writes objects to the topic, including colliding contents - replayer consumes objects from the topic and replay them - This drops the colliding contents from the replay when detected """ src, replayer = replayer_storage_and_client # Fill Kafka using a source storage nb_sent = 0 for object_type, objects in TEST_OBJECTS.items(): method = getattr(src, object_type + "_add") method(objects) if object_type == "origin_visit": nb_sent += len(objects) # origin-visit-add adds origin-visit-status as well nb_sent += len(objects) # Create collision in input data # These should not be written in the destination producer = src.journal_writer.journal.producer prefix = src.journal_writer.journal._prefix for content in DUPLICATE_CONTENTS: topic = f"{prefix}.content" key = content["sha1"] producer.produce( topic=topic, key=key_to_kafka(key), value=value_to_kafka(content), ) nb_sent += 1 producer.flush() caplog.set_level(logging.ERROR, "swh.journal.replay") # Fill the destination storage from Kafka dst = get_storage(cls="memory") worker_fn = functools.partial(process_replay_objects, storage=dst) nb_inserted = replayer.process(worker_fn) assert nb_sent == nb_inserted # check the logs for the collision being properly detected nb_collisions = 0 actual_collision: Dict for record in caplog.records: logtext = record.getMessage() if "Collision detected:" in logtext: nb_collisions += 1 actual_collision = record.args["collision"] assert nb_collisions == 1, "1 collision should be detected" algo = "sha1" assert actual_collision["algo"] == algo expected_colliding_hash = hash_to_hex(DUPLICATE_CONTENTS[0][algo]) assert actual_collision["hash"] == expected_colliding_hash actual_colliding_hashes = actual_collision["objects"] assert len(actual_colliding_hashes) == len(DUPLICATE_CONTENTS) for content in DUPLICATE_CONTENTS: expected_content_hashes = { k: hash_to_hex(v) for k, v in Content.from_dict(content).hashes().items() } assert expected_content_hashes in actual_colliding_hashes # all objects from the src should exists in the dst storage > _check_replayed(src, dst, exclude=["contents"]) .tox/py3/lib/python3.7/site-packages/swh/storage/tests/test_replay.py:165: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ src = <swh.storage.in_memory.InMemoryStorage object at 0x7f5fe60b6198> dst = <swh.storage.in_memory.InMemoryStorage object at 0x7f61b9419908> exclude = ['contents'] def _check_replayed( src: InMemoryStorage, dst: InMemoryStorage, exclude: Optional[Container] = None ): """Simple utility function to compare the content of 2 in_memory storages """ expected_persons = set(src._persons.values()) got_persons = set(dst._persons.values()) assert got_persons == expected_persons for attr in ( "contents", "skipped_contents", "directories", "revisions", "releases", "snapshots", "origins", "origin_visits", "origin_visit_statuses", ): if exclude and attr in exclude: continue expected_objects = sorted(getattr(src, f"_{attr}").items()) got_objects = sorted(getattr(dst, f"_{attr}").items()) > assert got_objects == expected_objects, f"Mismatch object list for {attr}" E AssertionError: Mismatch object list for origin_visit_statuses E assert [(('https://o...z': 'qux'})])] == [(('https://o...z': 'qux'})])] E At index 0 diff: (('https://overtherainbow.org/fox/den', 1), [OriginVisitStatus(origin='https://overtherainbow.org/fox/den', visit=1, date=datetime.datetime(2014, 11, 27, 17, 20, 39, tzinfo=tzlocal()), status='ongoing', snapshot=None, metadata={'baz': 'qux'}), OriginVisitStatus(origin='https://overtherainbow.org/fox/den', visit=1, date=datetime.datetime(2014, 11, 27, 17, 20, 39, tzinfo=datetime.timezone(datetime.timedelta(0), '+00:00')), status='ongoing', snapshot=None, metadata={'baz': 'qux'}), OriginVisitStatus(origin='https://overtherainbow.org/fox/den', visit=1, dat... E E ...Full output truncated (59 lines hidden), use '-vv' to show .tox/py3/lib/python3.7/site-packages/swh/storage/tests/test_replay.py:210: AssertionError