diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py --- a/swh/storage/in_memory.py +++ b/swh/storage/in_memory.py @@ -831,13 +831,15 @@ return all_visits def _origin_visit_status_add_one(self, visit_status: OriginVisitStatus) -> None: - """Add an origin visit status without checks. + """Add an origin visit status without checks. If already present, do nothing. """ self.journal_writer.origin_visit_status_add([visit_status]) visit_key = (visit_status.origin, visit_status.visit) self._origin_visit_statuses.setdefault(visit_key, []) - self._origin_visit_statuses[visit_key].append(visit_status) + visit_statuses = self._origin_visit_statuses[visit_key] + if visit_status not in visit_statuses: + visit_statuses.append(visit_status) def origin_visit_status_add( self, visit_statuses: Iterable[OriginVisitStatus], diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py --- a/swh/storage/tests/test_storage.py +++ b/swh/storage/tests/test_storage.py @@ -1702,21 +1702,24 @@ origin2 = Origin(url="new-origin") swh_storage.origin_add([origin1, origin2]) - visit1 = OriginVisit( - origin=origin1.url, - date=data.date_visit1, - type=data.type_visit1, - status="ongoing", - snapshot=None, - ) - visit2 = OriginVisit( - origin=origin2.url, - date=data.date_visit2, - type=data.type_visit2, - status="ongoing", - snapshot=None, + ov1, ov2 = swh_storage.origin_visit_add( + [ + OriginVisit( + origin=origin1.url, + date=data.date_visit1, + type=data.type_visit1, + status="ongoing", + snapshot=None, + ), + OriginVisit( + origin=origin2.url, + date=data.date_visit2, + type=data.type_visit2, + status="ongoing", + snapshot=None, + ), + ] ) - ov1, ov2 = swh_storage.origin_visit_add([visit1, visit2]) snapshot_id = data.snapshot["id"] date_visit_now = now() @@ -1738,6 +1741,8 @@ metadata={"intrinsic": "something"}, ) swh_storage.origin_visit_status_add([visit_status1, visit_status2]) + # second call will ignore existing entries (will send to storage though) + swh_storage.origin_visit_status_add([visit_status1, visit_status2]) origin_visit1 = swh_storage.origin_visit_get_latest( origin1.url, require_snapshot=True @@ -1755,22 +1760,27 @@ assert origin_visit2["snapshot"] is None assert origin_visit2["metadata"] == {"intrinsic": "something"} - objects = list(swh_storage.journal_writer.journal.objects) - visit_status1_dict = ov1.to_dict() - visit_status1_dict.pop("type") - visit_status2_dict = ov2.to_dict() - visit_status2_dict.pop("type") - - assert objects == [ - ("origin", origin1), - ("origin", origin2), - ("origin_visit", ov1), - ("origin_visit_status", OriginVisitStatus.from_dict(visit_status1_dict)), - ("origin_visit", ov2), - ("origin_visit_status", OriginVisitStatus.from_dict(visit_status2_dict)), - ("origin_visit_status", visit_status1), - ("origin_visit_status", visit_status2), - ] + actual_objects = list(swh_storage.journal_writer.journal.objects) + + expected_origins = [origin1, origin2] + expected_visits = [ov1, ov2] + expected_visit_statuses = [] + for visit in expected_visits: # out of origin-visit-add calls + visit_status = visit.to_dict() + visit_status.pop("type") + expected_visit_statuses.append(OriginVisitStatus.from_dict(visit_status)) + + # write twice in the journal + expected_visit_statuses += [visit_status1, visit_status2] * 2 + expected_objects = ( + [("origin", o) for o in expected_origins] + + [("origin_visit", v) for v in expected_visits] + + [("origin_visit_status", ovs) for ovs in expected_visit_statuses] + ) + + assert len(actual_objects) == len(expected_objects) + for obj in expected_objects: + assert obj in actual_objects def test_origin_visit_update(self, swh_storage): # given