Page MenuHomeSoftware Heritage

D2396.diff
No OneTemporary

D2396.diff

diff --git a/swh/search/elasticsearch.py b/swh/search/elasticsearch.py
--- a/swh/search/elasticsearch.py
+++ b/swh/search/elasticsearch.py
@@ -145,6 +145,7 @@
'multi_match': {
'query': url_pattern,
'type': 'bool_prefix',
+ 'operator': 'and',
'fields': [
'url.as_you_type',
'url.as_you_type._2gram',
@@ -160,6 +161,7 @@
'query': {
'multi_match': {
'query': metadata_pattern,
+ 'operator': 'and',
'fields': ['intrinsic_metadata.*']
}
},
diff --git a/swh/search/tests/test_in_memory.py b/swh/search/tests/test_in_memory.py
--- a/swh/search/tests/test_in_memory.py
+++ b/swh/search/tests/test_in_memory.py
@@ -27,6 +27,10 @@
def test_origin_intrinsic_metadata_description(self):
pass
+ @pytest.mark.skip('Not implemented in the in-memory search')
+ def test_origin_intrinsic_metadata_all_terms(self):
+ pass
+
@pytest.mark.skip('Not implemented in the in-memory search')
def test_origin_intrinsic_metadata_nested(self):
pass
diff --git a/swh/search/tests/test_search.py b/swh/search/tests/test_search.py
--- a/swh/search/tests/test_search.py
+++ b/swh/search/tests/test_search.py
@@ -56,6 +56,19 @@
expected_results = ['http://barbaz.qux', 'http://qux.quux']
assert sorted(results) == sorted(expected_results)
+ def test_origin_url_all_terms(self):
+ self.search.origin_update([
+ {'url': 'http://foo.bar/baz'},
+ {'url': 'http://foo.bar/foo.bar'},
+ ])
+ self.search.flush()
+
+ # Only results containing all terms should be returned.
+ results = self.search.origin_search(url_pattern='foo bar baz')
+ assert results == {'next_page_token': None, 'results': [
+ {'url': 'http://foo.bar/baz'},
+ ]}
+
def test_origin_with_visit(self):
self.search.origin_update([
{'url': 'http://foobar.baz', 'has_visits': True},
@@ -114,14 +127,36 @@
assert results == {'next_page_token': None, 'results': [
{'url': 'http://origin2'}]}
- # ES returns both results, because blahblah
results = self.search.origin_search(metadata_pattern='foo bar')
assert results == {'next_page_token': None, 'results': [
- {'url': 'http://origin2'}, {'url': 'http://origin3'}]}
+ {'url': 'http://origin2'}]}
results = self.search.origin_search(metadata_pattern='bar baz')
assert results == {'next_page_token': None, 'results': [
- {'url': 'http://origin3'}, {'url': 'http://origin2'}]}
+ {'url': 'http://origin3'}]}
+
+ def test_origin_intrinsic_metadata_all_terms(self):
+ self.search.origin_update([
+ {
+ 'url': 'http://origin1',
+ 'intrinsic_metadata': {
+ '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
+ 'description': 'foo bar foo bar',
+ },
+ },
+ {
+ 'url': 'http://origin3',
+ 'intrinsic_metadata': {
+ '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
+ 'description': 'foo bar baz',
+ }
+ },
+ ])
+ self.search.flush()
+
+ results = self.search.origin_search(metadata_pattern='foo bar baz')
+ assert results == {'next_page_token': None, 'results': [
+ {'url': 'http://origin3'}]}
def test_origin_intrinsic_metadata_nested(self):
self.search.origin_update([
@@ -152,11 +187,11 @@
results = self.search.origin_search(metadata_pattern='foo bar')
assert results == {'next_page_token': None, 'results': [
- {'url': 'http://origin2'}, {'url': 'http://origin3'}]}
+ {'url': 'http://origin2'}]}
results = self.search.origin_search(metadata_pattern='bar baz')
assert results == {'next_page_token': None, 'results': [
- {'url': 'http://origin3'}, {'url': 'http://origin2'}]}
+ {'url': 'http://origin3'}]}
# TODO: add more tests with more codemeta terms
@@ -241,17 +276,14 @@
self.search.origin_search,
metadata_pattern='foo bar baz', count=count)
assert list(results) == [
- {'url': 'http://origin3'},
- {'url': 'http://origin2'},
- {'url': 'http://origin1'}]
+ {'url': 'http://origin3'}]
results = stream_results(
self.search.origin_search,
metadata_pattern='foo bar', count=count)
assert list(results) == [
{'url': 'http://origin2'},
- {'url': 'http://origin3'},
- {'url': 'http://origin1'}]
+ {'url': 'http://origin3'}]
results = stream_results(
self.search.origin_search,

File Metadata

Mime Type
text/plain
Expires
Jul 27 2024, 10:42 PM (11 w, 5 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3216049

Event Timeline