Page MenuHomeSoftware Heritage

D3641.id12823.diff
No OneTemporary

D3641.id12823.diff

diff --git a/swh/storage/db.py b/swh/storage/db.py
--- a/swh/storage/db.py
+++ b/swh/storage/db.py
@@ -574,8 +574,14 @@
row = cur.fetchone()
return self._make_origin_visit_status(row)
- def origin_visit_get_all(
- self, origin_id, last_visit=None, order="asc", limit=None, cur=None
+ def origin_visit_status_get_range(
+ self,
+ origin: str,
+ visit: int,
+ date_from: Optional[datetime.datetime],
+ order: ListOrder,
+ limit: int,
+ cur=None,
):
"""Retrieve all visits for origin with id origin_id.
@@ -587,34 +593,29 @@
"""
cur = self._cursor(cur)
- assert order.lower() in ["asc", "desc"]
query_parts = [
- "SELECT DISTINCT ON (ov.visit) %s "
- % ", ".join(self.origin_visit_select_cols),
- "FROM origin_visit ov",
- "INNER JOIN origin o ON o.id = ov.origin",
- "INNER JOIN origin_visit_status ovs",
- "ON ov.origin = ovs.origin AND ov.visit = ovs.visit",
+ f"SELECT {', '.join(self.origin_visit_status_select_cols)} "
+ "FROM origin_visit_status ovs ",
+ "INNER JOIN origin o ON o.id = ovs.origin ",
]
- query_parts.append("WHERE o.url = %s")
- query_params: List[Any] = [origin_id]
+ query_parts.append("WHERE o.url = %s AND ovs.visit = %s ")
+ query_params: List[Any] = [origin, visit]
- if last_visit is not None:
- op_comparison = ">" if order == "asc" else "<"
- query_parts.append(f"and ov.visit {op_comparison} %s")
- query_params.append(last_visit)
+ if date_from is not None:
+ op_comparison = ">" if order == ListOrder.ASC else "<"
+ query_parts.append(f"and ovs.date {op_comparison} %s ")
+ query_params.append(date_from)
- if order == "asc":
- query_parts.append("ORDER BY ov.visit ASC, ovs.date DESC")
- elif order == "desc":
- query_parts.append("ORDER BY ov.visit DESC, ovs.date DESC")
+ if order == ListOrder.ASC:
+ query_parts.append("ORDER BY ovs.date ASC ")
+ elif order == ListOrder.DESC:
+ query_parts.append("ORDER BY ovs.date DESC ")
else:
assert False
- if limit is not None:
- query_parts.append("LIMIT %s")
- query_params.append(limit)
+ query_parts.append("LIMIT %s")
+ query_params.append(limit)
query = "\n".join(query_parts)
cur.execute(query, tuple(query_params))
diff --git a/swh/storage/interface.py b/swh/storage/interface.py
--- a/swh/storage/interface.py
+++ b/swh/storage/interface.py
@@ -891,6 +891,34 @@
"""
...
+ @remote_api_endpoint("origin/visit_status/get")
+ def origin_visit_status_get(
+ self,
+ origin: str,
+ visit: int,
+ page_token: Optional[str] = None,
+ order: ListOrder = ListOrder.ASC,
+ limit: int = 10,
+ ) -> PagedResult[OriginVisitStatus]:
+ """Retrieve page of OriginVisitStatus information.
+
+ Args:
+ origin: The visited origin
+ visit: The visit identifier
+ page_token: opaque string used to get the next results of a search
+ order: Order on visit id fields to list origin visits (default to asc)
+ limit: Number of visits to return
+
+ Raises:
+ StorageArgumentException if the order is wrong or the page_token type is
+ mistyped.
+
+ Returns: Page of OriginVisit data model objects. if next_page_token is None,
+ there is no longer data to retrieve.
+
+ """
+ ...
+
@remote_api_endpoint("origin/visit_status/get_latest")
def origin_visit_status_get_latest(
self,
diff --git a/swh/storage/storage.py b/swh/storage/storage.py
--- a/swh/storage/storage.py
+++ b/swh/storage/storage.py
@@ -976,6 +976,67 @@
return visit
return None
+ @timed
+ @db_transaction(statement_timeout=500)
+ def origin_visit_status_get(
+ self,
+ origin: str,
+ visit: int,
+ page_token: Optional[str] = None,
+ order: ListOrder = ListOrder.ASC,
+ limit: int = 10,
+ db=None,
+ cur=None,
+ ) -> PagedResult[OriginVisit]:
+ if not isinstance(order, ListOrder):
+ raise StorageArgumentException("order must be a ListOrder value")
+ if page_token and not isinstance(page_token, str):
+ raise StorageArgumentException("page_token must be a string.")
+
+ next_page_token = None
+ if page_token is None:
+ date_from = None
+ else:
+ date_from = datetime.datetime.fromisoformat(page_token)
+ visit_statuses: List[OriginVisitStatus] = []
+ extra_limit = limit + 1
+ for row in db.origin_visit_status_get_range(
+ origin,
+ visit=visit,
+ date_from=date_from,
+ order=order,
+ limit=extra_limit,
+ cur=cur,
+ ):
+ row_d = dict(zip(db.origin_visit_status_cols, row))
+ visit_statuses.append(
+ OriginVisitStatus(
+ origin=row_d["origin"],
+ visit=row_d["visit"],
+ date=row_d["date"],
+ status=row_d["status"],
+ snapshot=row_d["snapshot"],
+ metadata=row_d["metadata"],
+ )
+ )
+
+ assert len(visit_statuses) <= extra_limit
+
+ if len(visit_statuses) == extra_limit:
+ last_visit_status = visit_statuses[limit]
+ visit_statuses = visit_statuses[:limit]
+ assert last_visit_status is not None
+ if order == ListOrder.ASC:
+ next_page_token = str(
+ last_visit_status.date - datetime.timedelta(seconds=1)
+ )
+ else:
+ next_page_token = str(
+ last_visit_status.date + datetime.timedelta(seconds=1)
+ )
+
+ return PagedResult(results=visit_statuses, next_page_token=next_page_token)
+
@timed
@db_transaction()
def origin_visit_status_get_random(
diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py
--- a/swh/storage/tests/test_storage.py
+++ b/swh/storage/tests/test_storage.py
@@ -1295,6 +1295,195 @@
)
assert actual_page == PagedResult(results=[ov1])
+ def test_origin_visit_status_get__unknown_cases(self, swh_storage, sample_data):
+ origin = sample_data.origin
+ actual_page = swh_storage.origin_visit_status_get("foobar", 1)
+ assert actual_page.next_page_token is None
+ assert actual_page.results == []
+ assert actual_page == PagedResult()
+
+ actual_page = swh_storage.origin_visit_status_get(origin.url, 1)
+ assert actual_page.next_page_token is None
+ assert actual_page.results == []
+ assert actual_page == PagedResult()
+
+ origin = sample_data.origin
+ swh_storage.origin_add([origin])
+ ov1 = swh_storage.origin_visit_add(
+ [
+ OriginVisit(
+ origin=origin.url,
+ date=sample_data.date_visit1,
+ type=sample_data.type_visit1,
+ ),
+ ]
+ )[0]
+ actual_page = swh_storage.origin_visit_status_get(origin.url, ov1.visit + 10)
+ assert actual_page.next_page_token is None
+ assert actual_page.results == []
+ assert actual_page == PagedResult()
+
+ def test_origin_visit_status_get__validation_failure(
+ self, swh_storage, sample_data
+ ):
+ origin = sample_data.origin
+ swh_storage.origin_add([origin])
+ ov1 = swh_storage.origin_visit_add(
+ [
+ OriginVisit(
+ origin=origin.url,
+ date=sample_data.date_visit1,
+ type=sample_data.type_visit1,
+ ),
+ ]
+ )[0]
+
+ with pytest.raises(
+ StorageArgumentException, match="page_token must be a string"
+ ):
+ # page_token not str
+ swh_storage.origin_visit_status_get(origin.url, ov1.visit, page_token=10)
+
+ with pytest.raises(
+ StorageArgumentException, match="order must be a ListOrder value"
+ ):
+ # wrong order
+ swh_storage.origin_visit_status_get(origin.url, ov1.visit, order="foobar")
+
+ def test_origin_visit_status_get_all(self, swh_storage, sample_data):
+ origin = sample_data.origin
+ swh_storage.origin_add([origin])
+ date_visit3 = round_to_milliseconds(now())
+ date_visit1 = date_visit3 - datetime.timedelta(hours=2)
+ date_visit2 = date_visit3 - datetime.timedelta(hours=1)
+ assert date_visit1 < date_visit2
+ assert date_visit2 < date_visit3
+
+ ov1 = swh_storage.origin_visit_add(
+ [
+ OriginVisit(
+ origin=origin.url, date=date_visit1, type=sample_data.type_visit1,
+ ),
+ ]
+ )[0]
+
+ ovs1 = OriginVisitStatus(
+ origin=origin.url,
+ visit=ov1.visit,
+ date=date_visit1,
+ status="created",
+ snapshot=None,
+ )
+
+ ovs2 = OriginVisitStatus(
+ origin=origin.url,
+ visit=ov1.visit,
+ date=date_visit2,
+ status="partial",
+ snapshot=None,
+ )
+
+ ovs3 = OriginVisitStatus(
+ origin=origin.url,
+ visit=ov1.visit,
+ date=date_visit3,
+ status="full",
+ snapshot=sample_data.snapshot.id,
+ metadata={},
+ )
+
+ swh_storage.origin_visit_status_add([ovs2, ovs3])
+
+ # order asc, no pagination, no limit
+ actual_page = swh_storage.origin_visit_status_get(origin.url, ov1.visit)
+ assert actual_page.next_page_token is None
+ assert actual_page == PagedResult(results=[ovs1, ovs2, ovs3])
+
+ # order asc, no pagination, limit
+ actual_page = swh_storage.origin_visit_status_get(
+ origin.url, ov1.visit, limit=2
+ )
+ next_page_token = actual_page.next_page_token
+ assert next_page_token is not None
+ assert actual_page.results == [ovs1, ovs2]
+
+ # order asc, pagination, no limit
+ actual_page = swh_storage.origin_visit_status_get(
+ origin.url, ov1.visit, page_token=next_page_token
+ )
+ assert actual_page.next_page_token is None
+ assert actual_page.results == [ovs3]
+ assert actual_page == PagedResult(results=[ovs3])
+
+ next_page_token = str(ovs1.date)
+ actual_page = swh_storage.origin_visit_status_get(
+ origin.url, ov1.visit, page_token=next_page_token
+ )
+ assert actual_page.next_page_token is None
+ assert actual_page == PagedResult(results=[ovs2, ovs3])
+
+ # order asc, pagination, limit
+ actual_page = swh_storage.origin_visit_status_get(
+ origin.url, ov1.visit, page_token=next_page_token, limit=2
+ )
+ assert actual_page.next_page_token is None
+ assert actual_page.results == [ovs2, ovs3]
+ assert actual_page == PagedResult(results=[ovs2, ovs3])
+
+ next_page_token = str(ovs2.date)
+ actual_page = swh_storage.origin_visit_status_get(
+ origin.url, ov1.visit, page_token=next_page_token, limit=1
+ )
+ assert actual_page.next_page_token is None
+ assert actual_page == PagedResult(results=[ovs3])
+
+ # order desc, no pagination, no limit
+ actual_page = swh_storage.origin_visit_status_get(
+ origin.url, ov1.visit, order=ListOrder.DESC
+ )
+ assert actual_page.next_page_token is None
+ assert actual_page == PagedResult(results=[ovs3, ovs2, ovs1])
+
+ # order desc, no pagination, limit
+ actual_page = swh_storage.origin_visit_status_get(
+ origin.url, ov1.visit, limit=2, order=ListOrder.DESC
+ )
+ next_page_token = actual_page.next_page_token
+ assert next_page_token is not None
+ assert actual_page.results == [ovs3, ovs2]
+
+ actual_page = swh_storage.origin_visit_status_get(
+ origin.url, ov1.visit, page_token=next_page_token, order=ListOrder.DESC
+ )
+ assert actual_page.next_page_token is None
+ assert actual_page.results == [ovs1]
+ assert actual_page == PagedResult(results=[ovs1])
+
+ # order desc, pagination, no limit
+ next_page_token = str(ovs3.date)
+ actual_page = swh_storage.origin_visit_status_get(
+ origin.url, ov1.visit, page_token=next_page_token, order=ListOrder.DESC
+ )
+ assert actual_page.next_page_token is None
+ assert actual_page == PagedResult(results=[ovs2, ovs1])
+
+ # order desc, pagination, limit
+ actual_page = swh_storage.origin_visit_status_get(
+ origin.url,
+ ov1.visit,
+ page_token=next_page_token,
+ order=ListOrder.DESC,
+ limit=1,
+ )
+ next_page_token = actual_page.next_page_token
+ assert next_page_token is not None
+ assert actual_page.results == [ovs2]
+
+ actual_page = swh_storage.origin_visit_status_get(
+ origin.url, ov1.visit, page_token=next_page_token, order=ListOrder.DESC
+ )
+ assert actual_page == PagedResult(results=[ovs1])
+
def test_origin_visit_status_get_random(self, swh_storage, sample_data):
origins = sample_data.origins[:2]
swh_storage.origin_add(origins)

File Metadata

Mime Type
text/plain
Expires
Jul 3 2025, 8:44 AM (7 w, 1 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3232375

Event Timeline