Changeset View
Changeset View
Standalone View
Standalone View
swh/search/tests/test_search.py
Show All 9 Lines | |||||
class CommonSearchTest: | class CommonSearchTest: | ||||
def test_origin_url_unique_word_prefix(self): | def test_origin_url_unique_word_prefix(self): | ||||
self.search.origin_update([ | self.search.origin_update([ | ||||
{'url': 'http://foobar.baz'}, | {'url': 'http://foobar.baz'}, | ||||
{'url': 'http://barbaz.qux'}, | {'url': 'http://barbaz.qux'}, | ||||
{'url': 'http://qux.quux'}, | {'url': 'http://qux.quux'}, | ||||
]) | ]) | ||||
self.search.flush() | |||||
results = self.search.origin_search(url_pattern='foobar') | results = self.search.origin_search(url_pattern='foobar') | ||||
assert results == {'next_page_token': None, 'results': [ | assert results == {'next_page_token': None, 'results': [ | ||||
{'url': 'http://foobar.baz'}]} | {'url': 'http://foobar.baz'}]} | ||||
results = self.search.origin_search(url_pattern='barb') | results = self.search.origin_search(url_pattern='barb') | ||||
assert results == {'next_page_token': None, 'results': [ | assert results == {'next_page_token': None, 'results': [ | ||||
{'url': 'http://barbaz.qux'}]} | {'url': 'http://barbaz.qux'}]} | ||||
# 'bar' is part of 'foobar', but is not the beginning of it | # 'bar' is part of 'foobar', but is not the beginning of it | ||||
results = self.search.origin_search(url_pattern='bar') | results = self.search.origin_search(url_pattern='bar') | ||||
assert results == {'next_page_token': None, 'results': [ | assert results == {'next_page_token': None, 'results': [ | ||||
{'url': 'http://barbaz.qux'}]} | {'url': 'http://barbaz.qux'}]} | ||||
results = self.search.origin_search(url_pattern='barbaz') | results = self.search.origin_search(url_pattern='barbaz') | ||||
assert results == {'next_page_token': None, 'results': [ | assert results == {'next_page_token': None, 'results': [ | ||||
{'url': 'http://barbaz.qux'}]} | {'url': 'http://barbaz.qux'}]} | ||||
def test_origin_url_unique_word_prefix_multiple_results(self): | def test_origin_url_unique_word_prefix_multiple_results(self): | ||||
self.search.origin_update([ | self.search.origin_update([ | ||||
{'url': 'http://foobar.baz'}, | {'url': 'http://foobar.baz'}, | ||||
{'url': 'http://barbaz.qux'}, | {'url': 'http://barbaz.qux'}, | ||||
{'url': 'http://qux.quux'}, | {'url': 'http://qux.quux'}, | ||||
]) | ]) | ||||
self.search.flush() | |||||
results = self.search.origin_search(url_pattern='qu') | results = self.search.origin_search(url_pattern='qu') | ||||
assert results['next_page_token'] is None | assert results['next_page_token'] is None | ||||
results = [res['url'] for res in results['results']] | results = [res['url'] for res in results['results']] | ||||
expected_results = ['http://qux.quux', 'http://barbaz.qux'] | expected_results = ['http://qux.quux', 'http://barbaz.qux'] | ||||
assert sorted(results) == sorted(expected_results) | assert sorted(results) == sorted(expected_results) | ||||
results = self.search.origin_search(url_pattern='qux') | results = self.search.origin_search(url_pattern='qux') | ||||
assert results['next_page_token'] is None | assert results['next_page_token'] is None | ||||
results = [res['url'] for res in results['results']] | results = [res['url'] for res in results['results']] | ||||
expected_results = ['http://barbaz.qux', 'http://qux.quux'] | expected_results = ['http://barbaz.qux', 'http://qux.quux'] | ||||
assert sorted(results) == sorted(expected_results) | assert sorted(results) == sorted(expected_results) | ||||
def test_origin_with_visit(self): | def test_origin_with_visit(self): | ||||
self.search.origin_update([ | self.search.origin_update([ | ||||
{'url': 'http://foobar.baz', 'has_visits': True}, | {'url': 'http://foobar.baz', 'has_visits': True}, | ||||
]) | ]) | ||||
self.search.flush() | |||||
results = self.search.origin_search( | results = self.search.origin_search( | ||||
url_pattern='foobar', with_visit=True) | url_pattern='foobar', with_visit=True) | ||||
assert results == {'next_page_token': None, 'results': [ | assert results == {'next_page_token': None, 'results': [ | ||||
{'url': 'http://foobar.baz'}]} | {'url': 'http://foobar.baz'}]} | ||||
def test_origin_with_visit_added(self): | def test_origin_with_visit_added(self): | ||||
self.search.origin_update([ | self.search.origin_update([ | ||||
{'url': 'http://foobar.baz'}, | {'url': 'http://foobar.baz'}, | ||||
]) | ]) | ||||
self.search.flush() | |||||
results = self.search.origin_search( | results = self.search.origin_search( | ||||
url_pattern='foobar', with_visit=True) | url_pattern='foobar', with_visit=True) | ||||
assert results == {'next_page_token': None, 'results': []} | assert results == {'next_page_token': None, 'results': []} | ||||
self.search.origin_update([ | self.search.origin_update([ | ||||
{'url': 'http://foobar.baz', 'has_visits': True}, | {'url': 'http://foobar.baz', 'has_visits': True}, | ||||
]) | ]) | ||||
self.search.flush() | |||||
results = self.search.origin_search( | results = self.search.origin_search( | ||||
url_pattern='foobar', with_visit=True) | url_pattern='foobar', with_visit=True) | ||||
assert results == {'next_page_token': None, 'results': [ | assert results == {'next_page_token': None, 'results': [ | ||||
{'url': 'http://foobar.baz'}]} | {'url': 'http://foobar.baz'}]} | ||||
def test_origin_intrinsic_metadata_description(self): | def test_origin_intrinsic_metadata_description(self): | ||||
self.search.origin_update([ | self.search.origin_update([ | ||||
Show All 11 Lines | def test_origin_intrinsic_metadata_description(self): | ||||
{ | { | ||||
'url': 'http://origin3', | 'url': 'http://origin3', | ||||
'intrinsic_metadata': { | 'intrinsic_metadata': { | ||||
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0', | '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', | ||||
'description': 'bar baz', | 'description': 'bar baz', | ||||
} | } | ||||
}, | }, | ||||
]) | ]) | ||||
self.search.flush() | |||||
results = self.search.origin_search(metadata_pattern='foo') | results = self.search.origin_search(metadata_pattern='foo') | ||||
assert results == {'next_page_token': None, 'results': [ | assert results == {'next_page_token': None, 'results': [ | ||||
{'url': 'http://origin2'}]} | {'url': 'http://origin2'}]} | ||||
# ES returns both results, because blahblah | # ES returns both results, because blahblah | ||||
results = self.search.origin_search(metadata_pattern='foo bar') | results = self.search.origin_search(metadata_pattern='foo bar') | ||||
assert results == {'next_page_token': None, 'results': [ | assert results == {'next_page_token': None, 'results': [ | ||||
Show All 19 Lines | def test_origin_intrinsic_metadata_nested(self): | ||||
{ | { | ||||
'url': 'http://origin3', | 'url': 'http://origin3', | ||||
'intrinsic_metadata': { | 'intrinsic_metadata': { | ||||
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0', | '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', | ||||
'keywords': ['bar', 'baz'], | 'keywords': ['bar', 'baz'], | ||||
} | } | ||||
}, | }, | ||||
]) | ]) | ||||
self.search.flush() | |||||
results = self.search.origin_search(metadata_pattern='foo') | results = self.search.origin_search(metadata_pattern='foo') | ||||
assert results == {'next_page_token': None, 'results': [ | assert results == {'next_page_token': None, 'results': [ | ||||
{'url': 'http://origin2'}]} | {'url': 'http://origin2'}]} | ||||
results = self.search.origin_search(metadata_pattern='foo bar') | results = self.search.origin_search(metadata_pattern='foo bar') | ||||
assert results == {'next_page_token': None, 'results': [ | assert results == {'next_page_token': None, 'results': [ | ||||
{'url': 'http://origin2'}, {'url': 'http://origin3'}]} | {'url': 'http://origin2'}, {'url': 'http://origin3'}]} | ||||
Show All 11 Lines | class CommonSearchTest: | ||||
def test_origin_url_paging(self, count): | def test_origin_url_paging(self, count): | ||||
# TODO: no hypothesis | # TODO: no hypothesis | ||||
self.reset() | self.reset() | ||||
self.search.origin_update([ | self.search.origin_update([ | ||||
{'url': 'http://origin1/foo'}, | {'url': 'http://origin1/foo'}, | ||||
{'url': 'http://origin2/foo/bar'}, | {'url': 'http://origin2/foo/bar'}, | ||||
{'url': 'http://origin3/foo/bar/baz'}, | {'url': 'http://origin3/foo/bar/baz'}, | ||||
]) | ]) | ||||
self.search.flush() | |||||
results = stream_results( | results = stream_results( | ||||
self.search.origin_search, | self.search.origin_search, | ||||
url_pattern='foo bar baz', count=count) | url_pattern='foo bar baz', count=count) | ||||
results = [res['url'] for res in results] | results = [res['url'] for res in results] | ||||
expected_results = [ | expected_results = [ | ||||
'http://origin3/foo/bar/baz', | 'http://origin3/foo/bar/baz', | ||||
] | ] | ||||
▲ Show 20 Lines • Show All 46 Lines • ▼ Show 20 Lines | def test_origin_intrinsic_metadata_paging(self, count): | ||||
{ | { | ||||
'url': 'http://origin3', | 'url': 'http://origin3', | ||||
'intrinsic_metadata': { | 'intrinsic_metadata': { | ||||
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0', | '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', | ||||
'keywords': ['foo', 'bar', 'baz'], | 'keywords': ['foo', 'bar', 'baz'], | ||||
} | } | ||||
}, | }, | ||||
]) | ]) | ||||
self.search.flush() | |||||
results = stream_results( | results = stream_results( | ||||
self.search.origin_search, | self.search.origin_search, | ||||
metadata_pattern='foo bar baz', count=count) | metadata_pattern='foo bar baz', count=count) | ||||
assert list(results) == [ | assert list(results) == [ | ||||
{'url': 'http://origin3'}, | {'url': 'http://origin3'}, | ||||
{'url': 'http://origin2'}, | {'url': 'http://origin2'}, | ||||
{'url': 'http://origin1'}] | {'url': 'http://origin1'}] | ||||
Show All 16 Lines |