diff --git a/swh/search/elasticsearch.py b/swh/search/elasticsearch.py --- a/swh/search/elasticsearch.py +++ b/swh/search/elasticsearch.py @@ -145,6 +145,7 @@ 'multi_match': { 'query': url_pattern, 'type': 'bool_prefix', + 'operator': 'and', 'fields': [ 'url.as_you_type', 'url.as_you_type._2gram', @@ -160,6 +161,7 @@ 'query': { 'multi_match': { 'query': metadata_pattern, + 'operator': 'and', 'fields': ['intrinsic_metadata.*'] } }, diff --git a/swh/search/tests/test_in_memory.py b/swh/search/tests/test_in_memory.py --- a/swh/search/tests/test_in_memory.py +++ b/swh/search/tests/test_in_memory.py @@ -27,6 +27,10 @@ def test_origin_intrinsic_metadata_description(self): pass + @pytest.mark.skip('Not implemented in the in-memory search') + def test_origin_intrinsic_metadata_all_terms(self): + pass + @pytest.mark.skip('Not implemented in the in-memory search') def test_origin_intrinsic_metadata_nested(self): pass diff --git a/swh/search/tests/test_search.py b/swh/search/tests/test_search.py --- a/swh/search/tests/test_search.py +++ b/swh/search/tests/test_search.py @@ -56,6 +56,19 @@ expected_results = ['http://barbaz.qux', 'http://qux.quux'] assert sorted(results) == sorted(expected_results) + def test_origin_url_all_terms(self): + self.search.origin_update([ + {'url': 'http://foo.bar/baz'}, + {'url': 'http://foo.bar/foo.bar'}, + ]) + self.search.flush() + + # Only results containing all terms should be returned. + results = self.search.origin_search(url_pattern='foo bar baz') + assert results == {'next_page_token': None, 'results': [ + {'url': 'http://foo.bar/baz'}, + ]} + def test_origin_with_visit(self): self.search.origin_update([ {'url': 'http://foobar.baz', 'has_visits': True}, @@ -114,14 +127,36 @@ assert results == {'next_page_token': None, 'results': [ {'url': 'http://origin2'}]} - # ES returns both results, because blahblah results = self.search.origin_search(metadata_pattern='foo bar') assert results == {'next_page_token': None, 'results': [ - {'url': 'http://origin2'}, {'url': 'http://origin3'}]} + {'url': 'http://origin2'}]} results = self.search.origin_search(metadata_pattern='bar baz') assert results == {'next_page_token': None, 'results': [ - {'url': 'http://origin3'}, {'url': 'http://origin2'}]} + {'url': 'http://origin3'}]} + + def test_origin_intrinsic_metadata_all_terms(self): + self.search.origin_update([ + { + 'url': 'http://origin1', + 'intrinsic_metadata': { + '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', + 'description': 'foo bar foo bar', + }, + }, + { + 'url': 'http://origin3', + 'intrinsic_metadata': { + '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', + 'description': 'foo bar baz', + } + }, + ]) + self.search.flush() + + results = self.search.origin_search(metadata_pattern='foo bar baz') + assert results == {'next_page_token': None, 'results': [ + {'url': 'http://origin3'}]} def test_origin_intrinsic_metadata_nested(self): self.search.origin_update([ @@ -152,11 +187,11 @@ results = self.search.origin_search(metadata_pattern='foo bar') assert results == {'next_page_token': None, 'results': [ - {'url': 'http://origin2'}, {'url': 'http://origin3'}]} + {'url': 'http://origin2'}]} results = self.search.origin_search(metadata_pattern='bar baz') assert results == {'next_page_token': None, 'results': [ - {'url': 'http://origin3'}, {'url': 'http://origin2'}]} + {'url': 'http://origin3'}]} # TODO: add more tests with more codemeta terms @@ -241,17 +276,14 @@ self.search.origin_search, metadata_pattern='foo bar baz', count=count) assert list(results) == [ - {'url': 'http://origin3'}, - {'url': 'http://origin2'}, - {'url': 'http://origin1'}] + {'url': 'http://origin3'}] results = stream_results( self.search.origin_search, metadata_pattern='foo bar', count=count) assert list(results) == [ {'url': 'http://origin2'}, - {'url': 'http://origin3'}, - {'url': 'http://origin1'}] + {'url': 'http://origin3'}] results = stream_results( self.search.origin_search,