diff --git a/swh/search/elasticsearch.py b/swh/search/elasticsearch.py --- a/swh/search/elasticsearch.py +++ b/swh/search/elasticsearch.py @@ -245,6 +245,7 @@ with_visit: bool = False, visit_types: Optional[List[str]] = None, nb_visit: int = None, + last_visit_date: str = None, page_token: Optional[str] = None, limit: int = 50, ) -> PagedResult[MinimalOriginDict]: @@ -300,6 +301,8 @@ query_clauses.append({"term": {"has_visits": True,}}) if nb_visit: query_clauses.append({"term": {"nb_visit": nb_visit,}}) + if last_visit_date: + query_clauses.append({"term": {"last_visit_date": last_visit_date,}}) if visit_types is not None: query_clauses.append({"terms": {"visit_types": visit_types}}) diff --git a/swh/search/in_memory.py b/swh/search/in_memory.py --- a/swh/search/in_memory.py +++ b/swh/search/in_memory.py @@ -89,6 +89,7 @@ visit_types: Optional[List[str]] = None, page_token: Optional[str] = None, nb_visit: int = None, + last_visit_date: str = None, limit: int = 50, ) -> PagedResult[MinimalOriginDict]: hits: Iterator[Dict[str, Any]] = ( @@ -142,6 +143,8 @@ hits = filter(lambda o: o.get("has_visits"), hits) if nb_visit: hits = filter(lambda o: o.get("nb_visit") == nb_visit, hits) + if last_visit_date: + hits = filter(lambda o: o.get("last_visit_date") == last_visit_date, hits) if visit_types is not None: visit_types_set = set(visit_types) diff --git a/swh/search/interface.py b/swh/search/interface.py --- a/swh/search/interface.py +++ b/swh/search/interface.py @@ -60,6 +60,7 @@ visit_types: Optional[List[str]] = None, page_token: Optional[str] = None, nb_visit: int = None, + last_visit_date: str = None, limit: int = 50, ) -> PagedResult[MinimalOriginDict]: """Searches for origins matching the `url_pattern`. diff --git a/swh/search/tests/test_search.py b/swh/search/tests/test_search.py --- a/swh/search/tests/test_search.py +++ b/swh/search/tests/test_search.py @@ -212,6 +212,35 @@ with pytest.raises(AssertionError): _check_nb_visit(3) + def test_origin_last_visit_date_update_search(self): + origin_url = "http://foobar.baz" + self.search.origin_update([{"url": origin_url}]) + self.search.flush() + + def _update_last_visit_date(last_visit_date): + self.search.origin_update( + [{"url": origin_url, "last_visit_date": last_visit_date}] + ) + self.search.flush() + + def _check_last_visit_date(last_visit_date): + actual_page = self.search.origin_search( + url_pattern=origin_url, last_visit_date=last_visit_date, + ) + assert actual_page.next_page_token is None + results = [r["url"] for r in actual_page.results] + expected_results = [origin_url] + assert sorted(results) == sorted(expected_results) + + _update_last_visit_date("2021-06-09T10:47:20.069684+00:00") + _check_last_visit_date("2021-06-09T10:47:20.069684+00:00") + + _update_last_visit_date("2015-06-09T10:47:20.069684+00:00") + + _check_last_visit_date("2021-06-09T10:47:20.069684+00:00") + with pytest.raises(AssertionError): + _check_last_visit_date("2015-06-09T10:47:20.069684+00:00") + def test_origin_update_with_no_visit_types(self): """ Update an origin with visit types first then with no visit types,