Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9342857
D7755.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
5 KB
Subscribers
None
D7755.diff
View Options
diff --git a/swh/search/elasticsearch.py b/swh/search/elasticsearch.py
--- a/swh/search/elasticsearch.py
+++ b/swh/search/elasticsearch.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2019-2021 The Software Heritage developers
+# Copyright (C) 2019-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
diff --git a/swh/search/in_memory.py b/swh/search/in_memory.py
--- a/swh/search/in_memory.py
+++ b/swh/search/in_memory.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2019-2021 The Software Heritage developers
+# Copyright (C) 2019-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -341,7 +341,7 @@
hits = filter(predicate, hits)
- if not url_pattern and not metadata_pattern:
+ if url_pattern is None and metadata_pattern is None:
raise ValueError(
"At least one of url_pattern and metadata_pattern must be provided."
)
diff --git a/swh/search/interface.py b/swh/search/interface.py
--- a/swh/search/interface.py
+++ b/swh/search/interface.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2020-2021 The Software Heritage developers
+# Copyright (C) 2020-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -86,10 +86,11 @@
Args:
query: Find origins according the queries written as per the
- swh-search query language syntax.
- url_pattern: Part of the URL to search for
- metadata_pattern: Keywords to look for
- (across all the fields of intrinsic_metadata)
+ swh-search query language syntax, if empty return all origins
+ url_pattern: Part of the URL to search for, if empty and no filter
+ parameters used return all origins
+ metadata_pattern: Keywords to look for (across all the fields of
+ intrinsic_metadata)
with_visit: Whether origins with no visits are to be filtered out
visit_types: Only origins having any of the provided visit types
(e.g. git, svn, pypi) will be returned
diff --git a/swh/search/tests/test_search.py b/swh/search/tests/test_search.py
--- a/swh/search/tests/test_search.py
+++ b/swh/search/tests/test_search.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2019-2021 The Software Heritage developers
+# Copyright (C) 2019-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -1257,3 +1257,30 @@
self.search.flush()
assert self.search.visit_types_count() == Counter(git=1, hg=2, svn=3)
+
+ def test_origin_search_empty_url_pattern(self):
+ origins = [
+ {"url": "http://foobar.baz", "visit_types": ["git"]},
+ {"url": "http://barbaz.qux", "visit_types": ["svn"]},
+ {"url": "http://qux.quux", "visit_types": ["hg"]},
+ ]
+
+ self.search.origin_update(origins)
+ self.search.flush()
+
+ # should match all origins
+ actual_page = self.search.origin_search(url_pattern="")
+ assert actual_page.next_page_token is None
+ results = [r["url"] for r in actual_page.results]
+ expected_results = [origin["url"] for origin in origins]
+ assert sorted(results) == sorted(expected_results)
+
+ # should match all origins with visit type
+ for origin in origins:
+ actual_page = self.search.origin_search(
+ url_pattern="", visit_types=origin["visit_types"]
+ )
+ assert actual_page.next_page_token is None
+ results = [r["url"] for r in actual_page.results]
+ expected_results = [origin["url"]]
+ assert results == expected_results
diff --git a/swh/search/tests/test_translator.py b/swh/search/tests/test_translator.py
--- a/swh/search/tests/test_translator.py
+++ b/swh/search/tests/test_translator.py
@@ -1,10 +1,8 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-import pytest
-
from swh.search.translator import Translator
from swh.search.utils import get_expansion
@@ -16,8 +14,7 @@
def test_empty_query():
query = ""
- with pytest.raises(Exception):
- _test_results(query, {})
+ _test_results(query, {"filters": {"match_all": {}}})
def test_conjunction_operators():
diff --git a/swh/search/translator.py b/swh/search/translator.py
--- a/swh/search/translator.py
+++ b/swh/search/translator.py
@@ -41,14 +41,17 @@
self.query = ""
def parse_query(self, query):
- self.query = query.encode()
- tree = self.parser.parse(self.query)
- self.query_node = tree.root_node
+ if query:
+ self.query = query.encode()
+ tree = self.parser.parse(self.query)
+ self.query_node = tree.root_node
- if self.query_node.has_error:
- raise SearchQuerySyntaxError("Invalid query")
+ if self.query_node.has_error:
+ raise SearchQuerySyntaxError("Invalid query")
- return self._traverse(self.query_node)
+ return self._traverse(self.query_node)
+ else:
+ return {"filters": {"match_all": {}}}
def _traverse(self, node):
if len(node.children) == 3 and node.children[1].type == "filters":
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Jul 3, 1:03 PM (1 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3218477
Attached To
D7755: origin_search: Allow to pass empty query or URL pattern parameters
Event Timeline
Log In to Comment