Page MenuHomeSoftware Heritage

D7755.diff
No OneTemporary

D7755.diff

diff --git a/swh/search/elasticsearch.py b/swh/search/elasticsearch.py
--- a/swh/search/elasticsearch.py
+++ b/swh/search/elasticsearch.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2019-2021 The Software Heritage developers
+# Copyright (C) 2019-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
diff --git a/swh/search/in_memory.py b/swh/search/in_memory.py
--- a/swh/search/in_memory.py
+++ b/swh/search/in_memory.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2019-2021 The Software Heritage developers
+# Copyright (C) 2019-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -341,7 +341,7 @@
hits = filter(predicate, hits)
- if not url_pattern and not metadata_pattern:
+ if url_pattern is None and metadata_pattern is None:
raise ValueError(
"At least one of url_pattern and metadata_pattern must be provided."
)
diff --git a/swh/search/interface.py b/swh/search/interface.py
--- a/swh/search/interface.py
+++ b/swh/search/interface.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2020-2021 The Software Heritage developers
+# Copyright (C) 2020-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -86,10 +86,11 @@
Args:
query: Find origins according the queries written as per the
- swh-search query language syntax.
- url_pattern: Part of the URL to search for
- metadata_pattern: Keywords to look for
- (across all the fields of intrinsic_metadata)
+ swh-search query language syntax, if empty return all origins
+ url_pattern: Part of the URL to search for, if empty and no filter
+ parameters used return all origins
+ metadata_pattern: Keywords to look for (across all the fields of
+ intrinsic_metadata)
with_visit: Whether origins with no visits are to be filtered out
visit_types: Only origins having any of the provided visit types
(e.g. git, svn, pypi) will be returned
diff --git a/swh/search/tests/test_search.py b/swh/search/tests/test_search.py
--- a/swh/search/tests/test_search.py
+++ b/swh/search/tests/test_search.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2019-2021 The Software Heritage developers
+# Copyright (C) 2019-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -1257,3 +1257,30 @@
self.search.flush()
assert self.search.visit_types_count() == Counter(git=1, hg=2, svn=3)
+
+ def test_origin_search_empty_url_pattern(self):
+ origins = [
+ {"url": "http://foobar.baz", "visit_types": ["git"]},
+ {"url": "http://barbaz.qux", "visit_types": ["svn"]},
+ {"url": "http://qux.quux", "visit_types": ["hg"]},
+ ]
+
+ self.search.origin_update(origins)
+ self.search.flush()
+
+ # should match all origins
+ actual_page = self.search.origin_search(url_pattern="")
+ assert actual_page.next_page_token is None
+ results = [r["url"] for r in actual_page.results]
+ expected_results = [origin["url"] for origin in origins]
+ assert sorted(results) == sorted(expected_results)
+
+ # should match all origins with visit type
+ for origin in origins:
+ actual_page = self.search.origin_search(
+ url_pattern="", visit_types=origin["visit_types"]
+ )
+ assert actual_page.next_page_token is None
+ results = [r["url"] for r in actual_page.results]
+ expected_results = [origin["url"]]
+ assert results == expected_results
diff --git a/swh/search/tests/test_translator.py b/swh/search/tests/test_translator.py
--- a/swh/search/tests/test_translator.py
+++ b/swh/search/tests/test_translator.py
@@ -1,10 +1,8 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-import pytest
-
from swh.search.translator import Translator
from swh.search.utils import get_expansion
@@ -16,8 +14,7 @@
def test_empty_query():
query = ""
- with pytest.raises(Exception):
- _test_results(query, {})
+ _test_results(query, {"filters": {"match_all": {}}})
def test_conjunction_operators():
diff --git a/swh/search/translator.py b/swh/search/translator.py
--- a/swh/search/translator.py
+++ b/swh/search/translator.py
@@ -41,14 +41,17 @@
self.query = ""
def parse_query(self, query):
- self.query = query.encode()
- tree = self.parser.parse(self.query)
- self.query_node = tree.root_node
+ if query:
+ self.query = query.encode()
+ tree = self.parser.parse(self.query)
+ self.query_node = tree.root_node
- if self.query_node.has_error:
- raise SearchQuerySyntaxError("Invalid query")
+ if self.query_node.has_error:
+ raise SearchQuerySyntaxError("Invalid query")
- return self._traverse(self.query_node)
+ return self._traverse(self.query_node)
+ else:
+ return {"filters": {"match_all": {}}}
def _traverse(self, node):
if len(node.children) == 3 and node.children[1].type == "filters":

File Metadata

Mime Type
text/plain
Expires
Thu, Jul 3, 1:03 PM (1 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3218477

Event Timeline