diff --git a/swh/storage/cassandra/storage.py b/swh/storage/cassandra/storage.py --- a/swh/storage/cassandra/storage.py +++ b/swh/storage/cassandra/storage.py @@ -28,7 +28,6 @@ from swh.model.hashutil import DEFAULT_ALGORITHMS from swh.storage.objstorage import ObjStorage from swh.storage.writer import JournalWriter -from swh.storage.validate import convert_validation_exceptions from swh.storage.utils import now from ..exc import StorageArgumentException, HashCollision @@ -879,26 +878,6 @@ visit = self._format_origin_visit_row(row_visit) return self._origin_visit_apply_last_status(visit) - def origin_visit_upsert(self, visits: Iterable[OriginVisit]) -> None: - for visit in visits: - if visit.visit is None: - raise StorageArgumentException(f"Missing visit id for visit {visit}") - - self.journal_writer.origin_visit_upsert(visits) - for visit in visits: - assert visit.visit is not None - self._cql_runner.origin_visit_upsert(visit) - with convert_validation_exceptions(): - visit_status = OriginVisitStatus( - origin=visit.origin, - visit=visit.visit, - date=now(), - status=visit.status, - snapshot=visit.snapshot, - metadata=visit.metadata, - ) - self._origin_visit_status_add(visit_status) - @staticmethod def _format_origin_visit_row(visit): return { diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py --- a/swh/storage/in_memory.py +++ b/swh/storage/in_memory.py @@ -45,7 +45,6 @@ ) from swh.model.hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, hash_to_hex from swh.storage.objstorage import ObjStorage -from swh.storage.validate import convert_validation_exceptions from swh.storage.utils import now from .exc import StorageArgumentException, HashCollision @@ -853,48 +852,6 @@ for visit_status in visit_statuses: self._origin_visit_status_add_one(visit_status) - def origin_visit_upsert(self, visits: Iterable[OriginVisit]) -> None: - for visit in visits: - if visit.visit is None: - raise StorageArgumentException(f"Missing visit id for visit {visit}") - - self.journal_writer.origin_visit_upsert(visits) - - date = now() - - for visit in visits: - assert visit.visit is not None - assert visit.visit > 0 - origin_url = visit.origin - origin = self.origin_get({"url": origin_url}) - - if not origin: # Cannot add a visit without an origin - raise StorageArgumentException("Unknown origin %s", origin_url) - - if origin_url in self._origins: - origin = self._origins[origin_url] - # visit ids are in the range [1, +inf[ - assert visit.visit is not None - visit_key = (origin_url, visit.visit) - - with convert_validation_exceptions(): - visit_status = OriginVisitStatus( - origin=origin_url, - visit=visit.visit, - date=date, - status=visit.status, - snapshot=visit.snapshot, - metadata=visit.metadata, - ) - - while len(self._origin_visits[origin_url]) < visit.visit: - self._origin_visits[origin_url].append(None) - - self._origin_visits[origin_url][visit.visit - 1] = visit - self._origin_visit_status_add_one(visit_status) - - self._objects[visit_key].append(("origin_visit", None)) - def _origin_visit_get_updated(self, origin: str, visit_id: int) -> OriginVisit: """Merge origin visit and latest origin visit status diff --git a/swh/storage/interface.py b/swh/storage/interface.py --- a/swh/storage/interface.py +++ b/swh/storage/interface.py @@ -810,25 +810,6 @@ """ ... - @remote_api_endpoint("origin/visit/upsert") - def origin_visit_upsert(self, visits: Iterable[OriginVisit]) -> None: - """Add a origin_visits with a specific id and with all its data. - If there is already an origin_visit with the same - `(origin_id, visit_id)`, overwrites it. - - Args: - visits: iterable of dicts with keys: - - - **origin**: dict with keys either `id` or `url` - - **visit**: origin visit id - - **date**: timestamp of such visit - - **status**: Visit's new status - - **metadata**: Data associated to the visit - - **snapshot**: identifier of the snapshot to add to - the visit - """ - ... - @remote_api_endpoint("origin/visit/get") def origin_visit_get( self, origin: str, last_visit: Optional[int] = None, limit: Optional[int] = None diff --git a/swh/storage/storage.py b/swh/storage/storage.py --- a/swh/storage/storage.py +++ b/swh/storage/storage.py @@ -923,41 +923,6 @@ } ).to_dict() - @timed - @db_transaction() - def origin_visit_upsert( - self, visits: Iterable[OriginVisit], db=None, cur=None - ) -> None: - visit_statuses = [] - nb_visits = 0 - for visit in visits: - nb_visits += 1 - if visit.visit is None: - raise StorageArgumentException(f"Missing visit id for visit {visit}") - with convert_validation_exceptions(): - visit_statuses.append( - OriginVisitStatus( - origin=visit.origin, - visit=visit.visit, - date=now(), - status=visit.status, - snapshot=visit.snapshot, - metadata=visit.metadata, - ) - ) - - assert len(visit_statuses) == nb_visits - - # write in journal first - self.journal_writer.origin_visit_upsert(visits) - self.journal_writer.origin_visit_status_add(visit_statuses) - - # then sync to db - for i, visit in enumerate(visits): - assert visit.visit is not None - db.origin_visit_upsert(visit, cur=cur) - db.origin_visit_status_add(visit_statuses[i], cur=cur) - @timed @db_transaction_generator(statement_timeout=500) def origin_visit_get( diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py --- a/swh/storage/tests/test_storage.py +++ b/swh/storage/tests/test_storage.py @@ -23,12 +23,11 @@ from hypothesis import given, strategies, settings, HealthCheck -from typing import ClassVar, Iterable, Optional, Tuple +from typing import ClassVar, Optional from swh.model import from_disk, identifiers from swh.model.hashutil import hash_to_bytes from swh.model.model import ( - BaseModel, Content, Directory, Origin, @@ -1983,251 +1982,6 @@ def test_origin_visit_get_by__unknown_origin(self, swh_storage): assert swh_storage.origin_visit_get_by("foo", 10) is None - def assert_upsert_written_objects( - self, - actual_written_objects: Iterable[Tuple[str, BaseModel]], - expected_written_objects: Iterable[Tuple[str, BaseModel]], - ): - """Helper utility to ensure written upsert objects are as expected. - - OriginVisitStatus from the origin_visit_upsert call point of view need special - so we can compare actual and expected values. - - """ - written_objects_by = defaultdict(list) - for obj_type, obj in actual_written_objects: - written_objects_by[obj_type].append(obj) - - expected_objects_by = defaultdict(list) - for obj_type, obj in expected_written_objects: - expected_objects_by[obj_type].append(obj) - - # straightforward comparison for those (order does not matter) - for obj_type in ["origin", "origin_visit"]: - assert set(written_objects_by[obj_type]) == set( - expected_objects_by[obj_type] - ) - - # origin-visit-status is specific though, origin_visit_upsert writes now() date. - # We cannot mock as we use multiple implementations (fully qualified name is - # thus different), we cannot open a parameter field date (not part of the - # signature), so we overwrite both actual and expected visit_status to use the - # same date so the comparison works... - - obj_type = "origin_visit_status" - expected_visit_statuses = expected_objects_by[obj_type] - for i, actual_visit_status in enumerate(written_objects_by[obj_type]): - expected_visit_status = expected_visit_statuses[i] - - test_date = now() - actual_new = OriginVisitStatus.from_dict( - {**actual_visit_status.to_dict(), "date": test_date} - ) - - expected_new = OriginVisitStatus.from_dict( - {**expected_visit_status.to_dict(), "date": test_date} - ) - - assert actual_new == expected_new - - def test_origin_visit_upsert_new(self, swh_storage, mocker): - # given - origin_url = swh_storage.origin_add_one(data.origin2) - - # when - swh_storage.origin_visit_upsert( - [ - OriginVisit.from_dict( - { - "origin": origin_url, - "date": data.date_visit2, - "visit": 123, - "type": data.type_visit2, - "status": "full", - "metadata": None, - "snapshot": None, - } - ), - OriginVisit.from_dict( - { - "origin": origin_url, - "date": "2018-01-01 23:00:00+00", - "visit": 1234, - "type": data.type_visit2, - "status": "full", - "metadata": None, - "snapshot": None, - } - ), - ] - ) - - # then - actual_origin_visits = list(swh_storage.origin_visit_get(origin_url)) - assert actual_origin_visits == [ - { - "origin": origin_url, - "date": data.date_visit2, - "visit": 123, - "type": data.type_visit2, - "status": "full", - "metadata": None, - "snapshot": None, - }, - { - "origin": origin_url, - "date": data.date_visit3, - "visit": 1234, - "type": data.type_visit2, - "status": "full", - "metadata": None, - "snapshot": None, - }, - ] - - data1 = { - "origin": origin_url, - "date": data.date_visit2, - "visit": 123, - "status": "full", - "metadata": None, - "snapshot": None, - } - data2 = { - "origin": origin_url, - "date": data.date_visit3, - "visit": 1234, - "status": "full", - "metadata": None, - "snapshot": None, - } - actual_written_objects = list(swh_storage.journal_writer.journal.objects) - - # Ensure we have those written to journal - self.assert_upsert_written_objects( - actual_written_objects, - [ - ("origin", Origin.from_dict(data.origin2)), - ( - "origin_visit", - OriginVisit.from_dict({**data1, "type": data.type_visit2}), - ), - ( - "origin_visit", - OriginVisit.from_dict({**data2, "type": data.type_visit2,}), - ), - ("origin_visit_status", OriginVisitStatus.from_dict(data1)), - ("origin_visit_status", OriginVisitStatus.from_dict(data2)), - ], - ) - - def test_origin_visit_upsert_existing(self, swh_storage): - # given - origin_url = swh_storage.origin_add_one(data.origin2) - - # when - visit1 = OriginVisit( - origin=origin_url, - date=data.date_visit2, - type=data.type_visit1, - status="ongoing", - snapshot=None, - ) - origin_visit1 = swh_storage.origin_visit_add([visit1])[0] - - swh_storage.origin_visit_upsert( - [ - OriginVisit.from_dict( - { - "origin": origin_url, - "date": data.date_visit2, - "visit": origin_visit1.visit, - "type": data.type_visit1, - "status": "full", - "metadata": None, - "snapshot": None, - } - ) - ] - ) - - # then - assert origin_visit1.origin == origin_url - assert origin_visit1.visit is not None - - actual_origin_visits = list(swh_storage.origin_visit_get(origin_url)) - assert actual_origin_visits == [ - { - "origin": origin_url, - "date": data.date_visit2, - "visit": origin_visit1.visit, - "type": data.type_visit1, - "status": "full", - "metadata": None, - "snapshot": None, - } - ] - - data1 = { - "origin": origin_url, - "date": data.date_visit2, - "visit": origin_visit1.visit, - "status": "ongoing", - "metadata": None, - "snapshot": None, - } - data2 = { - "origin": origin_url, - "date": data.date_visit2, - "visit": origin_visit1.visit, - "status": "full", - "metadata": None, - "snapshot": None, - } - actual_written_objects = list(swh_storage.journal_writer.journal.objects) - self.assert_upsert_written_objects( - actual_written_objects, - [ - ("origin", Origin.from_dict(data.origin2)), - ( - "origin_visit", - OriginVisit.from_dict({**data1, "type": data.type_visit1,}), - ), - ("origin_visit_status", OriginVisitStatus.from_dict(data1)), - ( - "origin_visit", - OriginVisit.from_dict({**data2, "type": data.type_visit1}), - ), - ("origin_visit_status", OriginVisitStatus.from_dict(data2)), - ], - ) - - def test_origin_visit_upsert_missing_visit_id(self, swh_storage): - # given - origin_url = swh_storage.origin_add_one(data.origin2) - - # then - with pytest.raises(StorageArgumentException, match="Missing visit id"): - swh_storage.origin_visit_upsert( - [ - OriginVisit.from_dict( - { - "origin": origin_url, - "date": data.date_visit2, - "visit": None, # <- make the test raise - "type": data.type_visit1, - "status": "full", - "metadata": None, - "snapshot": None, - } - ) - ] - ) - - assert list(swh_storage.journal_writer.journal.objects) == [ - ("origin", Origin.from_dict(data.origin2)) - ] - def test_origin_visit_get_by_no_result(self, swh_storage): swh_storage.origin_add([data.origin]) actual_origin_visit = swh_storage.origin_visit_get_by(data.origin["url"], 999)