diff --git a/swh/storage/cassandra/storage.py b/swh/storage/cassandra/storage.py --- a/swh/storage/cassandra/storage.py +++ b/swh/storage/cassandra/storage.py @@ -856,7 +856,7 @@ def origin_visit_find_by_date( self, origin: str, visit_date: datetime.datetime - ) -> Optional[Dict[str, Any]]: + ) -> Optional[OriginVisit]: # Iterator over all the visits of the origin # This should be ok for now, as there aren't too many visits # per origin. @@ -867,9 +867,7 @@ return (abs(dt), -visit.visit) if rows: - row = min(rows, key=key) - visit = self._format_origin_visit_row(row) - return self._origin_visit_apply_last_status(visit) + return converters.row_to_visit(min(rows, key=key)) return None def origin_visit_get_by(self, origin: str, visit: int) -> Optional[Dict[str, Any]]: diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py --- a/swh/storage/in_memory.py +++ b/swh/storage/in_memory.py @@ -890,14 +890,12 @@ def origin_visit_find_by_date( self, origin: str, visit_date: datetime.datetime - ) -> Optional[Dict[str, Any]]: + ) -> Optional[OriginVisit]: origin_url = self._get_origin_url(origin) if origin_url in self._origin_visits: visits = self._origin_visits[origin_url] visit = min(visits, key=lambda v: (abs(v.date - visit_date), -v.visit)) - visit_update = self._origin_visit_get_updated(origin, visit.visit) - assert visit_update is not None - return visit_update + return visit return None def origin_visit_get_by(self, origin: str, visit: int) -> Optional[Dict[str, Any]]: diff --git a/swh/storage/interface.py b/swh/storage/interface.py --- a/swh/storage/interface.py +++ b/swh/storage/interface.py @@ -816,7 +816,7 @@ @remote_api_endpoint("origin/visit/find_by_date") def origin_visit_find_by_date( self, origin: str, visit_date: datetime.datetime - ) -> Optional[Dict[str, Any]]: + ) -> Optional[OriginVisit]: """Retrieves the origin visit whose date is closest to the provided timestamp. In case of a tie, the visit with largest id is selected. @@ -826,7 +826,7 @@ visit_date: expected visit date Returns: - A visit + A visit if found, None otherwise """ ... diff --git a/swh/storage/storage.py b/swh/storage/storage.py --- a/swh/storage/storage.py +++ b/swh/storage/storage.py @@ -898,8 +898,16 @@ @db_transaction(statement_timeout=500) def origin_visit_find_by_date( self, origin: str, visit_date: datetime.datetime, db=None, cur=None - ) -> Optional[Dict[str, Any]]: - return db.origin_visit_find_by_date(origin, visit_date, cur=cur) + ) -> Optional[OriginVisit]: + row_d = db.origin_visit_find_by_date(origin, visit_date, cur=cur) + if not row_d: + return None + return OriginVisit( + origin=row_d["origin"], + visit=row_d["visit"], + date=row_d["date"], + type=row_d["type"], + ) @timed @db_transaction(statement_timeout=500) diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py --- a/swh/storage/tests/test_storage.py +++ b/swh/storage/tests/test_storage.py @@ -1800,19 +1800,22 @@ swh_storage.origin_visit_status_add([ovs1, ovs2, ovs3]) # Simple case - visit = swh_storage.origin_visit_find_by_date( + actual_visit = swh_storage.origin_visit_find_by_date( origin.url, sample_data.date_visit3 ) - assert visit["visit"] == ov2.visit + assert actual_visit == ov2 # There are two visits at the same date, the latest must be returned - visit = swh_storage.origin_visit_find_by_date( + actual_visit = swh_storage.origin_visit_find_by_date( origin.url, sample_data.date_visit2 ) - assert visit["visit"] == ov3.visit + assert actual_visit == ov3 def test_origin_visit_find_by_date__unknown_origin(self, swh_storage, sample_data): - swh_storage.origin_visit_find_by_date("foo", sample_data.date_visit2) + actual_visit = swh_storage.origin_visit_find_by_date( + "foo", sample_data.date_visit2 + ) + assert actual_visit is None def test_origin_visit_get_by(self, swh_storage, sample_data): snapshot = sample_data.snapshot