Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F6930386
D2396.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
5 KB
Subscribers
None
D2396.diff
View Options
diff --git a/swh/search/elasticsearch.py b/swh/search/elasticsearch.py
--- a/swh/search/elasticsearch.py
+++ b/swh/search/elasticsearch.py
@@ -145,6 +145,7 @@
'multi_match': {
'query': url_pattern,
'type': 'bool_prefix',
+ 'operator': 'and',
'fields': [
'url.as_you_type',
'url.as_you_type._2gram',
@@ -160,6 +161,7 @@
'query': {
'multi_match': {
'query': metadata_pattern,
+ 'operator': 'and',
'fields': ['intrinsic_metadata.*']
}
},
diff --git a/swh/search/tests/test_in_memory.py b/swh/search/tests/test_in_memory.py
--- a/swh/search/tests/test_in_memory.py
+++ b/swh/search/tests/test_in_memory.py
@@ -27,6 +27,10 @@
def test_origin_intrinsic_metadata_description(self):
pass
+ @pytest.mark.skip('Not implemented in the in-memory search')
+ def test_origin_intrinsic_metadata_all_terms(self):
+ pass
+
@pytest.mark.skip('Not implemented in the in-memory search')
def test_origin_intrinsic_metadata_nested(self):
pass
diff --git a/swh/search/tests/test_search.py b/swh/search/tests/test_search.py
--- a/swh/search/tests/test_search.py
+++ b/swh/search/tests/test_search.py
@@ -56,6 +56,19 @@
expected_results = ['http://barbaz.qux', 'http://qux.quux']
assert sorted(results) == sorted(expected_results)
+ def test_origin_url_all_terms(self):
+ self.search.origin_update([
+ {'url': 'http://foo.bar/baz'},
+ {'url': 'http://foo.bar/foo.bar'},
+ ])
+ self.search.flush()
+
+ # Only results containing all terms should be returned.
+ results = self.search.origin_search(url_pattern='foo bar baz')
+ assert results == {'next_page_token': None, 'results': [
+ {'url': 'http://foo.bar/baz'},
+ ]}
+
def test_origin_with_visit(self):
self.search.origin_update([
{'url': 'http://foobar.baz', 'has_visits': True},
@@ -114,14 +127,36 @@
assert results == {'next_page_token': None, 'results': [
{'url': 'http://origin2'}]}
- # ES returns both results, because blahblah
results = self.search.origin_search(metadata_pattern='foo bar')
assert results == {'next_page_token': None, 'results': [
- {'url': 'http://origin2'}, {'url': 'http://origin3'}]}
+ {'url': 'http://origin2'}]}
results = self.search.origin_search(metadata_pattern='bar baz')
assert results == {'next_page_token': None, 'results': [
- {'url': 'http://origin3'}, {'url': 'http://origin2'}]}
+ {'url': 'http://origin3'}]}
+
+ def test_origin_intrinsic_metadata_all_terms(self):
+ self.search.origin_update([
+ {
+ 'url': 'http://origin1',
+ 'intrinsic_metadata': {
+ '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
+ 'description': 'foo bar foo bar',
+ },
+ },
+ {
+ 'url': 'http://origin3',
+ 'intrinsic_metadata': {
+ '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
+ 'description': 'foo bar baz',
+ }
+ },
+ ])
+ self.search.flush()
+
+ results = self.search.origin_search(metadata_pattern='foo bar baz')
+ assert results == {'next_page_token': None, 'results': [
+ {'url': 'http://origin3'}]}
def test_origin_intrinsic_metadata_nested(self):
self.search.origin_update([
@@ -152,11 +187,11 @@
results = self.search.origin_search(metadata_pattern='foo bar')
assert results == {'next_page_token': None, 'results': [
- {'url': 'http://origin2'}, {'url': 'http://origin3'}]}
+ {'url': 'http://origin2'}]}
results = self.search.origin_search(metadata_pattern='bar baz')
assert results == {'next_page_token': None, 'results': [
- {'url': 'http://origin3'}, {'url': 'http://origin2'}]}
+ {'url': 'http://origin3'}]}
# TODO: add more tests with more codemeta terms
@@ -241,17 +276,14 @@
self.search.origin_search,
metadata_pattern='foo bar baz', count=count)
assert list(results) == [
- {'url': 'http://origin3'},
- {'url': 'http://origin2'},
- {'url': 'http://origin1'}]
+ {'url': 'http://origin3'}]
results = stream_results(
self.search.origin_search,
metadata_pattern='foo bar', count=count)
assert list(results) == [
{'url': 'http://origin2'},
- {'url': 'http://origin3'},
- {'url': 'http://origin1'}]
+ {'url': 'http://origin3'}]
results = stream_results(
self.search.origin_search,
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Jul 27 2024, 10:42 PM (11 w, 5 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3216049
Attached To
D2396: Only return results where all terms match.
Event Timeline
Log In to Comment