diff --git a/swh/storage/cassandra/storage.py b/swh/storage/cassandra/storage.py --- a/swh/storage/cassandra/storage.py +++ b/swh/storage/cassandra/storage.py @@ -838,49 +838,6 @@ for visit_status in visit_statuses: self._origin_visit_status_add(visit_status) - def origin_visit_update( - self, - origin: str, - visit_id: int, - status: str, - metadata: Optional[Dict] = None, - snapshot: Optional[bytes] = None, - date: Optional[datetime.datetime] = None, - ): - origin_url = origin # TODO: rename the argument - - # Get the existing data of the visit - visit_ = self.origin_visit_get_by(origin_url, visit_id) - if not visit_: - raise StorageArgumentException("This origin visit does not exist.") - with convert_validation_exceptions(): - visit = OriginVisit.from_dict(visit_) - - updates: Dict[str, Any] = {"status": status} - if metadata and metadata != visit.metadata: - updates["metadata"] = metadata - if snapshot and snapshot != visit.snapshot: - updates["snapshot"] = snapshot - - with convert_validation_exceptions(): - visit = attr.evolve(visit, **updates) - - self.journal_writer.origin_visit_update([visit]) - - last_visit_update = self._origin_visit_get_updated(visit.origin, visit.visit) - assert last_visit_update is not None - - with convert_validation_exceptions(): - visit_status = OriginVisitStatus( - origin=origin_url, - visit=visit_id, - date=date or now(), - status=status, - snapshot=snapshot or last_visit_update["snapshot"], - metadata=metadata or last_visit_update["metadata"], - ) - self._origin_visit_status_add(visit_status) - def _origin_visit_merge( self, visit: Dict[str, Any], visit_status: Dict[str, Any] ) -> Dict[str, Any]: diff --git a/swh/storage/db.py b/swh/storage/db.py --- a/swh/storage/db.py +++ b/swh/storage/db.py @@ -477,31 +477,6 @@ + [jsonize(visit_status.metadata)], ) - def origin_visit_update(self, origin_id, visit_id, updates, cur=None): - """Update origin_visit's status.""" - cur = self._cursor(cur) - update_cols = [] - values = [] - where = ["origin.id = origin_visit.origin", "origin.url=%s", "visit=%s"] - where_values = [origin_id, visit_id] - if "status" in updates: - update_cols.append("status=%s") - values.append(updates.pop("status")) - if "metadata" in updates: - update_cols.append("metadata=%s") - values.append(jsonize(updates.pop("metadata"))) - if "snapshot" in updates: - update_cols.append("snapshot=%s") - values.append(updates.pop("snapshot")) - assert not updates, "Unknown fields: %r" % updates - query = """UPDATE origin_visit - SET {update_cols} - FROM origin - WHERE {where}""".format( - **{"update_cols": ", ".join(update_cols), "where": " AND ".join(where)} - ) - cur.execute(query, (*values, *where_values)) - def origin_visit_upsert(self, origin_visit: OriginVisit, cur=None) -> None: # doing an extra query like this is way simpler than trying to join # the origin id in the query below diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py --- a/swh/storage/in_memory.py +++ b/swh/storage/in_memory.py @@ -851,56 +851,6 @@ for visit_status in visit_statuses: self._origin_visit_status_add_one(visit_status) - def origin_visit_update( - self, - origin: str, - visit_id: int, - status: str, - metadata: Optional[Dict] = None, - snapshot: Optional[bytes] = None, - date: Optional[datetime.datetime] = None, - ): - origin_url = self._get_origin_url(origin) - if origin_url is None: - raise StorageArgumentException("Unknown origin.") - - try: - visit = self._origin_visits[origin_url][visit_id - 1] - except IndexError: - raise StorageArgumentException("Unknown visit_id for this origin") from None - - updates: Dict[str, Any] = { - "status": status, - } - if metadata and metadata != visit.metadata: - updates["metadata"] = metadata - if snapshot and snapshot != visit.snapshot: - updates["snapshot"] = snapshot - - if updates: - with convert_validation_exceptions(): - updated_visit = OriginVisit.from_dict({**visit.to_dict(), **updates}) - self.journal_writer.origin_visit_update([updated_visit]) - - self._origin_visits[origin_url][visit_id - 1] = updated_visit - - # Retrieve the previous visit status - assert visit.visit is not None - - last_visit_status = self._origin_visit_get_updated(origin, visit_id) - assert last_visit_status is not None - - with convert_validation_exceptions(): - visit_status = OriginVisitStatus( - origin=origin_url, - visit=visit_id, - date=date or now(), - status=status, - snapshot=snapshot or last_visit_status.snapshot, - metadata=metadata or last_visit_status.metadata, - ) - self._origin_visit_status_add_one(visit_status) - def origin_visit_upsert(self, visits: Iterable[OriginVisit]) -> None: for visit in visits: if visit.visit is None: diff --git a/swh/storage/interface.py b/swh/storage/interface.py --- a/swh/storage/interface.py +++ b/swh/storage/interface.py @@ -810,33 +810,6 @@ """ ... - @remote_api_endpoint("origin/visit/update") - def origin_visit_update( - self, - origin: str, - visit_id: int, - status: str, - metadata: Optional[Dict] = None, - snapshot: Optional[bytes] = None, - date: Optional[datetime.datetime] = None, - ): - """Update an origin_visit's status. - - Args: - origin (str): visited origin's URL - visit_id: Visit's id - status: Visit's new status - metadata: Data associated to the visit - snapshot (sha1_git): identifier of the snapshot to add to - the visit - date: Update date - - Returns: - None - - """ - ... - @remote_api_endpoint("origin/visit/upsert") def origin_visit_upsert(self, visits: Iterable[OriginVisit]) -> None: """Add a origin_visits with a specific id and with all its data. diff --git a/swh/storage/storage.py b/swh/storage/storage.py --- a/swh/storage/storage.py +++ b/swh/storage/storage.py @@ -878,64 +878,6 @@ for visit_status in visit_statuses: self._origin_visit_status_add(visit_status, db, cur) - @timed - @db_transaction() - def origin_visit_update( - self, - origin: str, - visit_id: int, - status: str, - metadata: Optional[Dict] = None, - snapshot: Optional[bytes] = None, - date: Optional[datetime.datetime] = None, - db=None, - cur=None, - ): - if not isinstance(origin, str): - raise StorageArgumentException( - "origin must be a string, not %r" % (origin,) - ) - origin_url = origin - visit = db.origin_visit_get(origin_url, visit_id, cur=cur) - - if not visit: - raise StorageArgumentException("Invalid visit_id for this origin.") - - visit = dict(zip(db.origin_visit_get_cols, visit)) - - updates: Dict[str, Any] = { - "status": status, - } - if metadata and metadata != visit["metadata"]: - updates["metadata"] = metadata - if snapshot and snapshot != visit["snapshot"]: - updates["snapshot"] = snapshot - - if updates: - with convert_validation_exceptions(): - updated_visit = OriginVisit.from_dict({**visit, **updates}) - self.journal_writer.origin_visit_update([updated_visit]) - - # Write updates to origin visit (backward compatibility) - db.origin_visit_update(origin, visit_id, updates) - - # Add new origin visit status - last_visit_status = self._origin_visit_get_updated( - origin, visit_id, db=db, cur=cur - ) - assert last_visit_status is not None - - with convert_validation_exceptions(): - visit_status = OriginVisitStatus( - origin=origin_url, - visit=visit_id, - date=date or now(), - status=status, - snapshot=snapshot or last_visit_status["snapshot"], - metadata=metadata or last_visit_status["metadata"], - ) - self._origin_visit_status_add(visit_status, db=db, cur=cur) - def _origin_visit_get_updated( self, origin: str, visit_id: int, db, cur ) -> Optional[Dict[str, Any]]: diff --git a/swh/storage/tests/test_kafka_writer.py b/swh/storage/tests/test_kafka_writer.py --- a/swh/storage/tests/test_kafka_writer.py +++ b/swh/storage/tests/test_kafka_writer.py @@ -11,7 +11,7 @@ from swh.journal.pytest_plugin import consume_messages, assert_all_objects_consumed from swh.journal.tests.journal_data import TEST_OBJECTS -from swh.model.model import Person +from swh.model.model import Person, OriginVisitStatus from attr import asdict, has from hypothesis import given from hypothesis.strategies import lists @@ -56,11 +56,15 @@ visit = method([obj])[0] expected_messages += 1 + 1 # 1 visit + 1 visit status + # QUESTION: + # is the following block line needed at all (in my mind, + # origin_visit_add did the work already)? obj_d = obj.to_dict() - for k in ("visit", "origin", "date", "type"): - del obj_d[k] - storage.origin_visit_update(obj.origin, visit.visit, **obj_d) - expected_messages += 1 + 1 # 1 visit update + 1 visit status + del obj_d["type"] + storage.origin_visit_status_add( + [OriginVisitStatus.from_dict({**obj_d, "visit": visit.visit})] + ) + expected_messages += 1 else: assert False, obj_type diff --git a/swh/storage/tests/test_retry.py b/swh/storage/tests/test_retry.py --- a/swh/storage/tests/test_retry.py +++ b/swh/storage/tests/test_retry.py @@ -679,87 +679,6 @@ assert mock_memory.call_count == 1 -def test_retrying_proxy_swh_storage_origin_visit_update(swh_storage, sample_data): - """Standard origin_visit_update works as before - - """ - sample_origin = sample_data["origin"][0] - origin_url = swh_storage.origin_add_one(sample_origin) - visit = OriginVisit( - origin=origin_url, date=date_visit1, type="hg", status="ongoing", snapshot=None - ) - - origin_visit = swh_storage.origin_visit_add([visit])[0] - - ov = next(swh_storage.origin_visit_get(origin_url)) - assert ov["origin"] == origin_url - assert ov["visit"] == origin_visit.visit - assert ov["status"] == "ongoing" - assert ov["snapshot"] is None - assert ov["metadata"] is None - - swh_storage.origin_visit_update(origin_url, origin_visit.visit, status="full") - - ov = next(swh_storage.origin_visit_get(origin_url)) - assert ov["origin"] == origin_url - assert ov["visit"] == origin_visit.visit - assert ov["status"] == "full" - assert ov["snapshot"] is None - assert ov["metadata"] is None - - -def test_retrying_proxy_swh_storage_origin_visit_update_retry( - monkeypatch_sleep, swh_storage, sample_data, mocker, fake_hash_collision -): - """Multiple retries for hash collision and psycopg2 error but finally ok - - """ - sample_origin = sample_data["origin"][1] - origin_url = sample_origin["url"] - - mock_memory = mocker.patch( - "swh.storage.in_memory.InMemoryStorage.origin_visit_update" - ) - mock_memory.side_effect = [ - # first try goes ko - fake_hash_collision, - # second try goes ko - psycopg2.IntegrityError("origin already inserted"), - # ok then! - {"origin": origin_url, "visit": 1}, - ] - - visit_id = 1 - swh_storage.origin_visit_update(origin_url, visit_id, status="full") - - mock_memory.assert_has_calls( - [ - call(origin_url, visit_id, metadata=None, snapshot=None, status="full"), - call(origin_url, visit_id, metadata=None, snapshot=None, status="full"), - call(origin_url, visit_id, metadata=None, snapshot=None, status="full"), - ] - ) - - -def test_retrying_proxy_swh_storage_origin_visit_update_failure( - swh_storage, sample_data, mocker -): - """Unfiltered errors are raising without retry - - """ - mock_memory = mocker.patch( - "swh.storage.in_memory.InMemoryStorage.origin_visit_update" - ) - mock_memory.side_effect = StorageArgumentException("Refuse to add origin always!") - origin_url = sample_data["origin"][0]["url"] - visit_id = 9 - - with pytest.raises(StorageArgumentException, match="Refuse to add"): - swh_storage.origin_visit_update(origin_url, visit_id, "partial") - - assert mock_memory.call_count == 1 - - def test_retrying_proxy_storage_directory_add(swh_storage, sample_data): """Standard directory_add works as before diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py --- a/swh/storage/tests/test_storage.py +++ b/swh/storage/tests/test_storage.py @@ -1423,8 +1423,16 @@ ) ] )[0] - swh_storage.origin_visit_update( - origin_url, visit_id=visit.visit, status="full" + swh_storage.origin_visit_status_add( + [ + OriginVisitStatus( + origin=origin_url, + visit=visit.visit, + date=now(), + status="full", + snapshot=None, + ) + ] ) swh_storage.refresh_stat_counters() @@ -1459,7 +1467,17 @@ ) ] )[0] - swh_storage.origin_visit_update(origin_url, visit.visit, status="full") + swh_storage.origin_visit_status_add( + [ + OriginVisitStatus( + origin=origin_url, + visit=visit.visit, + date=now(), + status="full", + snapshot=None, + ) + ] + ) random_origin_visit = swh_storage.origin_visit_get_random(visit_type) assert random_origin_visit is None @@ -1807,194 +1825,6 @@ assert origin_visit2["snapshot"] is None assert origin_visit2["metadata"] == {"intrinsic": "something"} - def test_origin_visit_update(self, swh_storage): - # given - origin_url = swh_storage.origin_add_one(data.origin) - origin_url2 = swh_storage.origin_add_one(data.origin2) - # date_visit < date_visit2 - date_visit = data.date_visit1 - date_visit2 = data.date_visit2 - - # Round to milliseconds before insertion, so equality doesn't fail - # after a round-trip through a DB (eg. Cassandra) - date_visit = date_visit.replace(microsecond=round(date_visit.microsecond, -3)) - date_visit2 = date_visit2.replace( - microsecond=round(date_visit2.microsecond, -3) - ) - - visit1 = OriginVisit( - origin=origin_url, - date=date_visit, - type=data.type_visit1, - status="ongoing", - snapshot=None, - ) - visit2 = OriginVisit( - origin=origin_url, - date=date_visit2, - type=data.type_visit2, - status="ongoing", - snapshot=None, - ) - visit3 = OriginVisit( - origin=origin_url2, - date=date_visit2, - type=data.type_visit3, # noqa - status="ongoing", - snapshot=None, - ) - ov1, ov2, ov3 = swh_storage.origin_visit_add([visit1, visit2, visit3]) - - # when - visit1_metadata = { - "contents": 42, - "directories": 22, - } - swh_storage.origin_visit_update( - origin_url, ov1.visit, status="full", metadata=visit1_metadata - ) - swh_storage.origin_visit_update(origin_url2, ov3.visit, status="partial") - - # then - actual_origin_visits = list(swh_storage.origin_visit_get(origin_url)) - expected_visits = [ - { - "origin": origin_url, - "date": date_visit, - "visit": ov1.visit, - "type": data.type_visit1, - "status": "full", - "metadata": visit1_metadata, - "snapshot": None, - }, - { - "origin": origin_url, - "date": date_visit2, - "visit": ov2.visit, - "type": data.type_visit2, - "status": "ongoing", - "metadata": None, - "snapshot": None, - }, - ] - for visit in expected_visits: - assert visit in actual_origin_visits - - actual_origin_visits_bis = list( - swh_storage.origin_visit_get(origin_url, limit=1) - ) - assert actual_origin_visits_bis == [ - { - "origin": origin_url, - "date": date_visit, - "visit": ov1.visit, - "type": data.type_visit1, - "status": "full", - "metadata": visit1_metadata, - "snapshot": None, - } - ] - - actual_origin_visits_ter = list( - swh_storage.origin_visit_get(origin_url, last_visit=ov1.visit) - ) - assert actual_origin_visits_ter == [ - { - "origin": origin_url, - "date": date_visit2, - "visit": ov2.visit, - "type": data.type_visit2, - "status": "ongoing", - "metadata": None, - "snapshot": None, - } - ] - - actual_origin_visits2 = list(swh_storage.origin_visit_get(origin_url2)) - assert actual_origin_visits2 == [ - { - "origin": origin_url2, - "date": date_visit2, - "visit": ov3.visit, - "type": data.type_visit3, - "status": "partial", - "metadata": None, - "snapshot": None, - } - ] - - data1 = { - "origin": origin_url, - "date": date_visit, - "visit": ov1.visit, - "type": data.type_visit1, - "status": "ongoing", - "metadata": None, - "snapshot": None, - } - data2 = { - "origin": origin_url, - "date": date_visit2, - "visit": ov2.visit, - "type": data.type_visit2, - "status": "ongoing", - "metadata": None, - "snapshot": None, - } - data3 = { - "origin": origin_url2, - "date": date_visit2, - "visit": ov3.visit, - "type": data.type_visit3, - "status": "ongoing", - "metadata": None, - "snapshot": None, - } - data4 = { - "origin": origin_url, - "date": date_visit, - "visit": ov1.visit, - "type": data.type_visit1, - "metadata": visit1_metadata, - "status": "full", - "snapshot": None, - } - data5 = { - "origin": origin_url2, - "date": date_visit2, - "visit": ov3.visit, - "type": data.type_visit3, - "status": "partial", - "metadata": None, - "snapshot": None, - } - objects = list(swh_storage.journal_writer.journal.objects) - assert ("origin", Origin.from_dict(data.origin)) in objects - assert ("origin", Origin.from_dict(data.origin2)) in objects - assert ("origin_visit", OriginVisit.from_dict(data1)) in objects - assert ("origin_visit", OriginVisit.from_dict(data2)) in objects - assert ("origin_visit", OriginVisit.from_dict(data3)) in objects - assert ("origin_visit", OriginVisit.from_dict(data4)) in objects - assert ("origin_visit", OriginVisit.from_dict(data5)) in objects - - def test_origin_visit_update_validation(self, swh_storage): - origin_url = swh_storage.origin_add_one(data.origin) - visit = OriginVisit( - origin=origin_url, - date=data.date_visit2, - type=data.type_visit2, - status="ongoing", - snapshot=None, - ) - visit = swh_storage.origin_visit_add([visit])[0] - with pytest.raises( - (StorageArgumentException, ValueError), match="status" - ) as cm: - swh_storage.origin_visit_update(origin_url, visit.visit, status="foobar") - - if type(cm.value) == psycopg2.DataError: - assert cm.value.pgcode == psycopg2.errorcodes.INVALID_TEXT_REPRESENTATION - def test_origin_visit_find_by_date(self, swh_storage): # given origin_url = swh_storage.origin_add_one(data.origin) @@ -2033,36 +1863,6 @@ def test_origin_visit_find_by_date__unknown_origin(self, swh_storage): swh_storage.origin_visit_find_by_date("foo", data.date_visit2) - def test_origin_visit_update_missing_snapshot(self, swh_storage): - # given - origin_url = swh_storage.origin_add_one(data.origin) - visit = OriginVisit( - origin=origin_url, - date=data.date_visit1, - type=data.type_visit1, - status="ongoing", - snapshot=None, - ) - origin_visit = swh_storage.origin_visit_add([visit])[0] - - # when - swh_storage.origin_visit_update( - origin_url, - origin_visit.visit, - status="ongoing", - snapshot=data.snapshot["id"], - ) # snapshot does not exist yet - - # then - actual_origin_visit = swh_storage.origin_visit_get_by( - origin_url, origin_visit.visit - ) - assert actual_origin_visit["snapshot"] == data.snapshot["id"] - - # when - swh_storage.snapshot_add([data.snapshot]) - assert actual_origin_visit["snapshot"] == data.snapshot["id"] - def test_origin_visit_get_by(self, swh_storage): origin_url = swh_storage.origin_add_one(data.origin) origin_url2 = swh_storage.origin_add_one(data.origin2) @@ -2076,11 +1876,16 @@ origin_visit1 = swh_storage.origin_visit_add([visit])[0] swh_storage.snapshot_add([data.snapshot]) - swh_storage.origin_visit_update( - origin_url, - origin_visit1.visit, - status="ongoing", - snapshot=data.snapshot["id"], + swh_storage.origin_visit_status_add( + [ + OriginVisitStatus( + origin=origin_url, + visit=origin_visit1.visit, + date=now(), + status="ongoing", + snapshot=data.snapshot["id"], + ) + ] ) # Add some other {origin, visit} entries @@ -2106,8 +1911,17 @@ "directories": 22, } - swh_storage.origin_visit_update( - origin_url, origin_visit1.visit, status="full", metadata=visit1_metadata + swh_storage.origin_visit_status_add( + [ + OriginVisitStatus( + origin=origin_url, + visit=origin_visit1.visit, + date=now(), + status="full", + snapshot=data.snapshot["id"], + metadata=visit1_metadata, + ) + ] ) expected_origin_visit = origin_visit1.to_dict() @@ -2423,11 +2237,16 @@ # Add snapshot to visit1; require_snapshot=True makes it return # visit1 and require_snapshot=False still returns visit2 swh_storage.snapshot_add([data.complete_snapshot]) - swh_storage.origin_visit_update( - origin_url, - ov1.visit, - status="ongoing", - snapshot=data.complete_snapshot["id"], + swh_storage.origin_visit_status_add( + [ + OriginVisitStatus( + origin=origin_url, + visit=ov1.visit, + date=now(), + status="ongoing", + snapshot=data.complete_snapshot["id"], + ) + ] ) assert { **origin_visit1, @@ -2444,7 +2263,18 @@ ) # Mark the first visit as completed and check status filter again - swh_storage.origin_visit_update(origin_url, ov1.visit, status="full") + swh_storage.origin_visit_status_add( + [ + OriginVisitStatus( + origin=origin_url, + visit=ov1.visit, + date=now(), + status="full", + snapshot=data.complete_snapshot["id"], + ) + ] + ) + assert { **origin_visit1, "snapshot": data.complete_snapshot["id"], @@ -2455,8 +2285,17 @@ # Add snapshot to visit2 and check that the new snapshot is returned swh_storage.snapshot_add([data.empty_snapshot]) - swh_storage.origin_visit_update( - origin_url, ov2.visit, status="ongoing", snapshot=data.empty_snapshot["id"] + + swh_storage.origin_visit_status_add( + [ + OriginVisitStatus( + origin=origin_url, + visit=ov2.visit, + date=now(), + status="ongoing", + snapshot=data.empty_snapshot["id"], + ) + ] ) assert { **origin_visit2, @@ -2474,11 +2313,17 @@ # Add snapshot to visit3 (same date as visit2) swh_storage.snapshot_add([data.complete_snapshot]) - swh_storage.origin_visit_update( - origin_url, - ov3.visit, - status="ongoing", - snapshot=data.complete_snapshot["id"], + + swh_storage.origin_visit_status_add( + [ + OriginVisitStatus( + origin=origin_url, + visit=ov3.visit, + date=now(), + status="ongoing", + snapshot=data.complete_snapshot["id"], + ) + ] ) assert { **origin_visit1, @@ -2524,48 +2369,54 @@ def test_snapshot_add_get_empty(self, swh_storage): origin_url = swh_storage.origin_add_one(data.origin) - visit = OriginVisit( - origin=origin_url, - date=data.date_visit1, - type=data.type_visit1, - status="ongoing", - snapshot=None, - ) - origin_visit1 = swh_storage.origin_visit_add([visit])[0] - visit_id = origin_visit1.visit + ov1 = swh_storage.origin_visit_add( + [ + OriginVisit( + origin=origin_url, + date=data.date_visit1, + type=data.type_visit1, + status="ongoing", + snapshot=None, + ) + ] + )[0] actual_result = swh_storage.snapshot_add([data.empty_snapshot]) assert actual_result == {"snapshot:add": 1} date_now = now() - swh_storage.origin_visit_update( - origin_url, - origin_visit1.visit, - status="ongoing", - snapshot=data.empty_snapshot["id"], - date=date_now, + swh_storage.origin_visit_status_add( + [ + OriginVisitStatus( + origin=origin_url, + visit=ov1.visit, + date=date_now, + status="full", + snapshot=data.empty_snapshot["id"], + ) + ] ) by_id = swh_storage.snapshot_get(data.empty_snapshot["id"]) assert by_id == {**data.empty_snapshot, "next_branch": None} - by_ov = swh_storage.snapshot_get_by_origin_visit(origin_url, visit_id) + by_ov = swh_storage.snapshot_get_by_origin_visit(origin_url, ov1.visit) assert by_ov == {**data.empty_snapshot, "next_branch": None} data1 = { "origin": origin_url, "date": data.date_visit1, - "visit": origin_visit1.visit, + "visit": ov1.visit, "status": "ongoing", "metadata": None, "snapshot": None, } data2 = { "origin": origin_url, - "date": data.date_visit1, - "visit": origin_visit1.visit, - "status": "ongoing", + "date": date_now, + "visit": ov1.visit, + "status": "full", "metadata": None, "snapshot": data.empty_snapshot["id"], } @@ -2578,14 +2429,7 @@ ), ("origin_visit_status", OriginVisitStatus.from_dict(data1)), ("snapshot", Snapshot.from_dict(data.empty_snapshot)), - ( - "origin_visit", - OriginVisit.from_dict({**data2, "type": data.type_visit1,}), - ), - ( - "origin_visit_status", - OriginVisitStatus.from_dict({**data2, "date": date_now}), - ), + ("origin_visit_status", OriginVisitStatus.from_dict(data2),), ] def test_snapshot_add_get_complete(self, swh_storage): @@ -2602,11 +2446,16 @@ visit_id = origin_visit1.visit actual_result = swh_storage.snapshot_add([data.complete_snapshot]) - swh_storage.origin_visit_update( - origin_url, - origin_visit1.visit, - status="ongoing", - snapshot=data.complete_snapshot["id"], + swh_storage.origin_visit_status_add( + [ + OriginVisitStatus( + origin=origin_url, + visit=origin_visit1.visit, + date=now(), + status="ongoing", + snapshot=data.complete_snapshot["id"], + ) + ] ) assert actual_result == {"snapshot:add": 1} @@ -2768,11 +2617,16 @@ origin_visit1 = swh_storage.origin_visit_add([visit])[0] swh_storage.snapshot_add([data.complete_snapshot]) - swh_storage.origin_visit_update( - origin_url, - origin_visit1.visit, - status="ongoing", - snapshot=data.complete_snapshot["id"], + swh_storage.origin_visit_status_add( + [ + OriginVisitStatus( + origin=origin_url, + visit=origin_visit1.visit, + date=now(), + status="ongoing", + snapshot=data.complete_snapshot["id"], + ) + ] ) snp_id = data.complete_snapshot["id"] @@ -2888,11 +2742,16 @@ visit_id = origin_visit1.visit swh_storage.snapshot_add([data.snapshot]) - swh_storage.origin_visit_update( - origin_url, - origin_visit1.visit, - status="ongoing", - snapshot=data.snapshot["id"], + swh_storage.origin_visit_status_add( + [ + OriginVisitStatus( + origin=origin_url, + visit=origin_visit1.visit, + date=now(), + status="ongoing", + snapshot=data.snapshot["id"], + ) + ] ) by_id = swh_storage.snapshot_get(data.snapshot["id"]) @@ -2904,82 +2763,78 @@ origin_visit_info = swh_storage.origin_visit_get_by(origin_url, visit_id) assert origin_visit_info["snapshot"] == data.snapshot["id"] - def test_snapshot_add_nonexistent_visit(self, swh_storage): - origin_url = swh_storage.origin_add_one(data.origin) - # unknown visit - visit_id = 54164461156 - swh_storage.journal_writer.journal.objects[:] = [] - - swh_storage.snapshot_add([data.snapshot]) - - with pytest.raises(StorageArgumentException): - swh_storage.origin_visit_update( - origin_url, visit_id, status="ongoing", snapshot=data.snapshot["id"] - ) - - assert list(swh_storage.journal_writer.journal.objects) == [ - ("snapshot", Snapshot.from_dict(data.snapshot)) - ] - def test_snapshot_add_twice__by_origin_visit(self, swh_storage): origin_url = swh_storage.origin_add_one(data.origin) - visit = OriginVisit( - origin=origin_url, - date=data.date_visit1, - type=data.type_visit1, - status="ongoing", - snapshot=None, - ) - origin_visit1 = swh_storage.origin_visit_add([visit])[0] - visit1_id = origin_visit1.visit + ov1 = swh_storage.origin_visit_add( + [ + OriginVisit( + origin=origin_url, + date=data.date_visit1, + type=data.type_visit1, + status="ongoing", + snapshot=None, + ) + ] + )[0] swh_storage.snapshot_add([data.snapshot]) date_now2 = now() - swh_storage.origin_visit_update( - origin_url, - origin_visit1.visit, - status="ongoing", - snapshot=data.snapshot["id"], - date=date_now2, + + swh_storage.origin_visit_status_add( + [ + OriginVisitStatus( + origin=origin_url, + visit=ov1.visit, + date=date_now2, + status="ongoing", + snapshot=data.snapshot["id"], + ) + ] ) - by_ov1 = swh_storage.snapshot_get_by_origin_visit(origin_url, visit1_id) + by_ov1 = swh_storage.snapshot_get_by_origin_visit(origin_url, ov1.visit) assert by_ov1 == {**data.snapshot, "next_branch": None} - visit2 = OriginVisit( - origin=origin_url, - date=data.date_visit2, - type=data.type_visit2, - status="ongoing", - snapshot=None, - ) - origin_visit2 = swh_storage.origin_visit_add([visit2])[0] - visit2_id = origin_visit2.visit + ov2 = swh_storage.origin_visit_add( + [ + OriginVisit( + origin=origin_url, + date=data.date_visit2, + type=data.type_visit2, + status="ongoing", + snapshot=None, + ) + ] + )[0] swh_storage.snapshot_add([data.snapshot]) date_now4 = now() - swh_storage.origin_visit_update( - origin_url, - origin_visit2.visit, - status="ongoing", - snapshot=data.snapshot["id"], - date=date_now4, + swh_storage.origin_visit_status_add( + [ + OriginVisitStatus( + origin=origin_url, + visit=ov2.visit, + date=date_now4, + status="ongoing", + snapshot=data.snapshot["id"], + ) + ] ) - by_ov2 = swh_storage.snapshot_get_by_origin_visit(origin_url, visit2_id) + by_ov2 = swh_storage.snapshot_get_by_origin_visit(origin_url, ov2.visit) assert by_ov2 == {**data.snapshot, "next_branch": None} data1 = { "origin": origin_url, "date": data.date_visit1, - "visit": origin_visit1.visit, + "visit": ov1.visit, "status": "ongoing", "metadata": None, "snapshot": None, } data2 = { "origin": origin_url, - "date": data.date_visit1, - "visit": origin_visit1.visit, + "date": date_now2, + "visit": ov1.visit, "status": "ongoing", "metadata": None, "snapshot": data.snapshot["id"], @@ -2987,15 +2842,15 @@ data3 = { "origin": origin_url, "date": data.date_visit2, - "visit": origin_visit2.visit, + "visit": ov2.visit, "status": "ongoing", "metadata": None, "snapshot": None, } data4 = { "origin": origin_url, - "date": data.date_visit2, - "visit": origin_visit2.visit, + "date": date_now4, + "visit": ov2.visit, "status": "ongoing", "metadata": None, "snapshot": data.snapshot["id"], @@ -3009,27 +2864,13 @@ ), ("origin_visit_status", OriginVisitStatus.from_dict(data1)), ("snapshot", Snapshot.from_dict(data.snapshot)), - ( - "origin_visit", - OriginVisit.from_dict({**data2, "type": data.type_visit1}), - ), - ( - "origin_visit_status", - OriginVisitStatus.from_dict({**data2, "date": date_now2}), - ), + ("origin_visit_status", OriginVisitStatus.from_dict(data2),), ( "origin_visit", OriginVisit.from_dict({**data3, "type": data.type_visit2}), ), ("origin_visit_status", OriginVisitStatus.from_dict(data3)), - ( - "origin_visit", - OriginVisit.from_dict({**data4, "type": data.type_visit2}), - ), - ( - "origin_visit_status", - OriginVisitStatus.from_dict({**data4, "date": date_now4}), - ), + ("origin_visit_status", OriginVisitStatus.from_dict(data4),), ] def test_snapshot_get_latest(self, swh_storage): @@ -3062,11 +2903,16 @@ # Add snapshot to visit1, latest snapshot = visit 1 snapshot swh_storage.snapshot_add([data.complete_snapshot]) - swh_storage.origin_visit_update( - origin_url, - ov1.visit, - status="ongoing", - snapshot=data.complete_snapshot["id"], + swh_storage.origin_visit_status_add( + [ + OriginVisitStatus( + origin=origin_url, + visit=ov1.visit, + date=now(), + status="ongoing", + snapshot=data.complete_snapshot["id"], + ) + ] ) assert { **data.complete_snapshot, @@ -3081,7 +2927,17 @@ ) # Mark the first visit as completed and check status filter again - swh_storage.origin_visit_update(origin_url, ov1.visit, status="full") + swh_storage.origin_visit_status_add( + [ + OriginVisitStatus( + origin=origin_url, + visit=ov1.visit, + date=now(), + status="full", + snapshot=data.complete_snapshot["id"], + ) + ] + ) assert { **data.complete_snapshot, "next_branch": None, @@ -3089,8 +2945,16 @@ # Add snapshot to visit2 and check that the new snapshot is returned swh_storage.snapshot_add([data.empty_snapshot]) - swh_storage.origin_visit_update( - origin_url, ov2.visit, status="ongoing", snapshot=data.empty_snapshot["id"], + swh_storage.origin_visit_status_add( + [ + OriginVisitStatus( + origin=origin_url, + visit=ov2.visit, + date=now(), + status="ongoing", + snapshot=data.empty_snapshot["id"], + ) + ] ) assert { **data.empty_snapshot, @@ -3106,11 +2970,16 @@ # Add snapshot to visit3 (same date as visit2) and check that # the new snapshot is returned swh_storage.snapshot_add([data.complete_snapshot]) - swh_storage.origin_visit_update( - origin_url, - ov3.visit, - status="ongoing", - snapshot=data.complete_snapshot["id"], + swh_storage.origin_visit_status_add( + [ + OriginVisitStatus( + origin=origin_url, + visit=ov3.visit, + date=now(), + status="ongoing", + snapshot=data.complete_snapshot["id"], + ) + ] ) assert { **data.complete_snapshot, @@ -3141,11 +3010,16 @@ # Add unknown snapshot to visit1, check that the inconsistency is # detected - swh_storage.origin_visit_update( - origin_url, - ov1.visit, - status="ongoing", - snapshot=data.complete_snapshot["id"], + swh_storage.origin_visit_status_add( + [ + OriginVisitStatus( + origin=origin_url, + visit=ov1.visit, + date=now(), + status="ongoing", + snapshot=data.complete_snapshot["id"], + ) + ] ) with pytest.raises(Exception): # XXX: should the exception be more specific than this? @@ -3159,7 +3033,18 @@ ) # Mark the first visit as completed and check status filter again - swh_storage.origin_visit_update(origin_url, ov1.visit, status="full") + swh_storage.origin_visit_status_add( + [ + OriginVisitStatus( + origin=origin_url, + visit=ov1.visit, + date=now(), + status="full", + snapshot=data.complete_snapshot["id"], + ) + ] + ) + with pytest.raises(Exception): # XXX: should the exception be more specific than this? swh_storage.snapshot_get_latest(origin_url, allowed_statuses=["full"]), @@ -3173,8 +3058,16 @@ # Add unknown snapshot to visit2 and check that the inconsistency # is detected - swh_storage.origin_visit_update( - origin_url, ov2.visit, status="ongoing", snapshot=data.snapshot["id"], + swh_storage.origin_visit_status_add( + [ + OriginVisitStatus( + origin=origin_url, + visit=ov2.visit, + date=now(), + status="ongoing", + snapshot=data.snapshot["id"], + ) + ] ) with pytest.raises(Exception): # XXX: should the exception be more specific than this? @@ -3245,11 +3138,16 @@ origin_visit1 = swh_storage.origin_visit_add([visit])[0] swh_storage.snapshot_add([data.snapshot]) - swh_storage.origin_visit_update( - origin_url, - origin_visit1.visit, - status="ongoing", - snapshot=data.snapshot["id"], + swh_storage.origin_visit_status_add( + [ + OriginVisitStatus( + origin=origin_url, + visit=origin_visit1.visit, + date=now(), + status="ongoing", + snapshot=data.snapshot["id"], + ) + ] ) swh_storage.directory_add([data.dir]) swh_storage.revision_add([data.revision]) @@ -3988,8 +3886,16 @@ origin=origin_url, date=now(), type="git", status="ongoing", snapshot=None ) visit = swh_storage.origin_visit_add([visit])[0] - swh_storage.origin_visit_update( - origin_url, visit.visit, status="ongoing", snapshot=data.snapshot["id"] + swh_storage.origin_visit_status_add( + [ + OriginVisitStatus( + origin=origin_url, + visit=visit.visit, + date=now(), + status="ongoing", + snapshot=data.snapshot["id"], + ) + ] ) assert swh_storage.origin_count("github", with_visit=False) == 3 diff --git a/swh/storage/writer.py b/swh/storage/writer.py --- a/swh/storage/writer.py +++ b/swh/storage/writer.py @@ -79,9 +79,6 @@ def origin_visit_add(self, visits: Iterable[OriginVisit]) -> None: self.write_additions("origin_visit", visits) - def origin_visit_update(self, visits: Iterable[OriginVisit]) -> None: - self.write_additions("origin_visit", visits) - def origin_visit_upsert(self, visits: Iterable[OriginVisit]) -> None: self.write_additions("origin_visit", visits)