diff --git a/swh/search/tests/test_elasticsearch.py b/swh/search/tests/test_elasticsearch.py --- a/swh/search/tests/test_elasticsearch.py +++ b/swh/search/tests/test_elasticsearch.py @@ -184,6 +184,40 @@ "http://foobar.3.com", } + def test_search_ql_visited(self): + self.search.origin_update( + [ + { + "url": "http://foobar.1.com", + "has_visits": True, + "nb_visits": 1, + "last_visit_date": now_minus_5_hours, + "last_eventful_visit_date": now_minus_5_hours, + }, + {"url": "http://foobar.2.com",}, + {"url": "http://foobar.3.com", "has_visits": False,}, + ] + ) + self.search.flush() + + assert { + r["url"] for r in self.search.origin_search(query="visited = true").results + } == {"http://foobar.1.com"} + assert { + r["url"] for r in self.search.origin_search(query="visited = false").results + } == {"http://foobar.2.com", "http://foobar.3.com"} + + assert ( + self.search.origin_search( + query="visited = true and visited = false" + ).results + == [] + ) + assert ( + self.search.origin_search(query="visited = false", with_visit=True).results + == [] + ) + def test_query_syntax_error(self): self.search.origin_update(ORIGINS) self.search.flush() diff --git a/swh/search/tests/test_translator.py b/swh/search/tests/test_translator.py --- a/swh/search/tests/test_translator.py +++ b/swh/search/tests/test_translator.py @@ -42,8 +42,29 @@ _test_results(query, expected) +def test_visited(): + query = "visited = true" + expected = { + "filters": {"term": {"has_visits": True}}, + } + _test_results(query, expected) + + query = "visited = false" + expected = { + "filters": { + "bool": { + "should": [ + {"term": {"has_visits": False}}, + {"bool": {"must_not": {"exists": {"field": "has_visits"}}}}, + ] + } + } + } + _test_results(query, expected) + + def test_conjunction_op_precedence_override(): - query = "(visited = false or visits > 2) and visits < 5" + query = "(visited = true or visits > 2) and visits < 5" expected = { "filters": { "bool": { @@ -51,7 +72,7 @@ { "bool": { "should": [ - {"term": {"has_visits": False}}, + {"term": {"has_visits": True}}, {"range": {"nb_visits": {"gt": 2}}}, ] } diff --git a/swh/search/translator.py b/swh/search/translator.py --- a/swh/search/translator.py +++ b/swh/search/translator.py @@ -173,7 +173,23 @@ if category == "booleanFilter": if name == "visited": - return {"term": {"has_visits": value == "true"}} + if value == "true": + return {"term": {"has_visits": True}} + else: + # non-visited origins will typically not have "has_visits" set + # at all + return { + "bool": { + "should": [ + {"term": {"has_visits": False}}, + { + "bool": { + "must_not": {"exists": {"field": "has_visits"}} + } + }, + ] + } + } if category == "numericFilter": if name == "visits":