diff --git a/swh/search/elasticsearch.py b/swh/search/elasticsearch.py --- a/swh/search/elasticsearch.py +++ b/swh/search/elasticsearch.py @@ -1,4 +1,4 @@ -# Copyright (C) 2019-2021 The Software Heritage developers +# Copyright (C) 2019-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information diff --git a/swh/search/in_memory.py b/swh/search/in_memory.py --- a/swh/search/in_memory.py +++ b/swh/search/in_memory.py @@ -1,4 +1,4 @@ -# Copyright (C) 2019-2021 The Software Heritage developers +# Copyright (C) 2019-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -341,7 +341,7 @@ hits = filter(predicate, hits) - if not url_pattern and not metadata_pattern: + if url_pattern is None and metadata_pattern is None: raise ValueError( "At least one of url_pattern and metadata_pattern must be provided." ) diff --git a/swh/search/interface.py b/swh/search/interface.py --- a/swh/search/interface.py +++ b/swh/search/interface.py @@ -1,4 +1,4 @@ -# Copyright (C) 2020-2021 The Software Heritage developers +# Copyright (C) 2020-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -86,10 +86,11 @@ Args: query: Find origins according the queries written as per the - swh-search query language syntax. - url_pattern: Part of the URL to search for - metadata_pattern: Keywords to look for - (across all the fields of intrinsic_metadata) + swh-search query language syntax, if empty return all origins + url_pattern: Part of the URL to search for, if empty and no filter + parameters used return all origins + metadata_pattern: Keywords to look for (across all the fields of + intrinsic_metadata) with_visit: Whether origins with no visits are to be filtered out visit_types: Only origins having any of the provided visit types (e.g. git, svn, pypi) will be returned diff --git a/swh/search/tests/test_search.py b/swh/search/tests/test_search.py --- a/swh/search/tests/test_search.py +++ b/swh/search/tests/test_search.py @@ -1,4 +1,4 @@ -# Copyright (C) 2019-2021 The Software Heritage developers +# Copyright (C) 2019-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -1257,3 +1257,30 @@ self.search.flush() assert self.search.visit_types_count() == Counter(git=1, hg=2, svn=3) + + def test_origin_search_empty_url_pattern(self): + origins = [ + {"url": "http://foobar.baz", "visit_types": ["git"]}, + {"url": "http://barbaz.qux", "visit_types": ["svn"]}, + {"url": "http://qux.quux", "visit_types": ["hg"]}, + ] + + self.search.origin_update(origins) + self.search.flush() + + # should match all origins + actual_page = self.search.origin_search(url_pattern="") + assert actual_page.next_page_token is None + results = [r["url"] for r in actual_page.results] + expected_results = [origin["url"] for origin in origins] + assert sorted(results) == sorted(expected_results) + + # should match all origins with visit type + for origin in origins: + actual_page = self.search.origin_search( + url_pattern="", visit_types=origin["visit_types"] + ) + assert actual_page.next_page_token is None + results = [r["url"] for r in actual_page.results] + expected_results = [origin["url"]] + assert results == expected_results diff --git a/swh/search/tests/test_translator.py b/swh/search/tests/test_translator.py --- a/swh/search/tests/test_translator.py +++ b/swh/search/tests/test_translator.py @@ -1,10 +1,8 @@ -# Copyright (C) 2021 The Software Heritage developers +# Copyright (C) 2021-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -import pytest - from swh.search.translator import Translator from swh.search.utils import get_expansion @@ -16,8 +14,7 @@ def test_empty_query(): query = "" - with pytest.raises(Exception): - _test_results(query, {}) + _test_results(query, {"filters": {"match_all": {}}}) def test_conjunction_operators(): diff --git a/swh/search/translator.py b/swh/search/translator.py --- a/swh/search/translator.py +++ b/swh/search/translator.py @@ -41,14 +41,17 @@ self.query = "" def parse_query(self, query): - self.query = query.encode() - tree = self.parser.parse(self.query) - self.query_node = tree.root_node + if query: + self.query = query.encode() + tree = self.parser.parse(self.query) + self.query_node = tree.root_node - if self.query_node.has_error: - raise SearchQuerySyntaxError("Invalid query") + if self.query_node.has_error: + raise SearchQuerySyntaxError("Invalid query") - return self._traverse(self.query_node) + return self._traverse(self.query_node) + else: + return {"filters": {"match_all": {}}} def _traverse(self, node): if len(node.children) == 3 and node.children[1].type == "filters":