diff --git a/swh/search/tests/conftest.py b/swh/search/tests/conftest.py index c37cef7..fb72551 100644 --- a/swh/search/tests/conftest.py +++ b/swh/search/tests/conftest.py @@ -1,108 +1,109 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -import os import socket import subprocess import time import elasticsearch import pytest + def free_port(): sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.bind(('127.0.0.1', 0)) port = sock.getsockname()[1] sock.close() return port def wait_for_peer(addr, port): while True: try: sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.connect((addr, port)) except ConnectionRefusedError: time.sleep(0.1) else: sock.close() break CONFIG_TEMPLATE = ''' node.name: node-1 path.data: {data} path.logs: {logs} network.host: 127.0.0.1 http.port: {http_port} transport.port: {transport_port} ''' -def _run_elasticsearch(conf_dir, data_dir, logs_dir, http_port, transport_port): + +def _run_elasticsearch( + conf_dir, data_dir, logs_dir, http_port, transport_port): es_home = '/usr/share/elasticsearch' with open(conf_dir + '/elasticsearch.yml', 'w') as fd: fd.write(CONFIG_TEMPLATE.format( data=data_dir, logs=logs_dir, http_port=http_port, transport_port=transport_port)) with open(conf_dir + '/log4j2.properties', 'w') as fd: pass cmd = [ '/usr/share/elasticsearch/jdk/bin/java', '-Des.path.home={}'.format(es_home), '-Des.path.conf={}'.format(conf_dir), '-Des.bundled_jdk=true', '-Dlog4j2.disable.jmx=true', '-cp', '{}/lib/*'.format(es_home), 'org.elasticsearch.bootstrap.Elasticsearch', ] host = '127.0.0.1:{}'.format(http_port) with open(logs_dir + '/output.txt', 'w') as fd: - p = subprocess.Popen(cmd) #, stdout=fd, stderr=fd) + p = subprocess.Popen(cmd) wait_for_peer('127.0.0.1', http_port) client = elasticsearch.Elasticsearch([host]) assert client.ping() return p + @pytest.fixture(scope='session') def elasticsearch_session(tmpdir_factory): tmpdir = tmpdir_factory.mktemp('elasticsearch') es_conf = tmpdir.mkdir('conf') http_port = free_port() http_port = 9200 transport_port = free_port() p = _run_elasticsearch( conf_dir=str(es_conf), data_dir=str(tmpdir.mkdir('data')), logs_dir=str(tmpdir.mkdir('logs')), http_port=http_port, transport_port=transport_port, ) yield '127.0.0.1:{}'.format(http_port) # Check ES didn't stop assert p.returncode is None, p.returncode p.kill() p.wait() -@pytest.fixture(scope='function') +@pytest.fixture(scope='class') def elasticsearch_host(elasticsearch_session): - client = elasticsearch.Elasticsearch([elasticsearch_session]) - client.indices.delete(index='*') yield elasticsearch_session diff --git a/swh/search/tests/test_elasticsearch.py b/swh/search/tests/test_elasticsearch.py new file mode 100644 index 0000000..6ff2015 --- /dev/null +++ b/swh/search/tests/test_elasticsearch.py @@ -0,0 +1,24 @@ +# Copyright (C) 2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import unittest + +import pytest + +from swh.search.elasticsearch import ElasticSearch +from .test_search import CommonSearchTest + + +class TestElasticsearchSearch(CommonSearchTest, unittest.TestCase): + @pytest.fixture(autouse=True) + def _instantiate_search(self, elasticsearch_host): + self.search = ElasticSearch([elasticsearch_host]) + + def setUp(self): + self.reset() + + def reset(self): + self.search.deinitialize() + self.search.initialize() diff --git a/swh/search/tests/test_search.py b/swh/search/tests/test_search.py index 40fe72e..64bc16a 100644 --- a/swh/search/tests/test_search.py +++ b/swh/search/tests/test_search.py @@ -1,171 +1,166 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -import unittest - from hypothesis import given, strategies, settings -import pytest - -from swh.model.model import Origin -from swh.search.elasticsearch import ElasticSearch from swh.search.utils import stream_results -def test_origin_url_unique_word_prefix(elasticsearch_host): - search = ElasticSearch([elasticsearch_host]) - search.initialize() - search.origin_update([ - {'url': 'http://foobar.baz'}, - {'url': 'http://barbaz.qux'}, - {'url': 'http://qux.quux'}, - ]) - - results = search.origin_search(url_substring='foobar') - assert results == {'cursor': None, 'results': [{'url': 'http://foobar.baz'}]} - - results = search.origin_search(url_substring='barb') - assert results == {'cursor': None, 'results': [{'url': 'http://barbaz.qux'}]} - - # 'bar' is part of 'foobar', but is not the beginning of it - results = search.origin_search(url_substring='bar') - assert results == {'cursor': None, 'results': [{'url': 'http://barbaz.qux'}]} - - results = search.origin_search(url_substring='barbaz') - assert results == {'cursor': None, 'results': [{'url': 'http://barbaz.qux'}]} - - results = search.origin_search(url_substring='qu') - assert results['cursor'] == None - assert sorted(res['url'] for res in results['results']) \ - == ['http://barbaz.qux', 'http://qux.quux'] - - results = search.origin_search(url_substring='qux') - assert results['cursor'] == None - assert sorted(res['url'] for res in results['results']) \ - == ['http://barbaz.qux', 'http://qux.quux'] - - -def test_origin_intrinsic_metadata_description(elasticsearch_host): - search = ElasticSearch([elasticsearch_host]) - search.initialize() - search.origin_update([ - { - 'url': 'http://origin1', - 'intrinsic_metadata': {}, - }, - { - 'url': 'http://origin2', - 'intrinsic_metadata': { - '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', - 'description': 'foo bar', +class CommonSearchTest: + def test_origin_url_unique_word_prefix(self): + self.search.origin_update([ + {'url': 'http://foobar.baz'}, + {'url': 'http://barbaz.qux'}, + {'url': 'http://qux.quux'}, + ]) + + results = self.search.origin_search(url_substring='foobar') + self.assertEqual(results, {'cursor': None, 'results': [ + {'url': 'http://foobar.baz'}]}) + + results = self.search.origin_search(url_substring='barb') + self.assertEqual(results, {'cursor': None, 'results': [ + {'url': 'http://barbaz.qux'}]}) + + # 'bar' is part of 'foobar', but is not the beginning of it + results = self.search.origin_search(url_substring='bar') + self.assertEqual(results, {'cursor': None, 'results': [ + {'url': 'http://barbaz.qux'}]}) + + results = self.search.origin_search(url_substring='barbaz') + self.assertEqual(results, {'cursor': None, 'results': [ + {'url': 'http://barbaz.qux'}]}) + + results = self.search.origin_search(url_substring='qu') + self.assertIsNone(results['cursor']) + self.assertEqual( + sorted(res['url'] for res in results['results']), + ['http://barbaz.qux', 'http://qux.quux']) + + results = self.search.origin_search(url_substring='qux') + self.assertIsNone(results['cursor']) + self.assertEqual( + sorted(res['url'] for res in results['results']), + ['http://barbaz.qux', 'http://qux.quux']) + + def test_origin_intrinsic_metadata_description(self): + self.search.origin_update([ + { + 'url': 'http://origin1', + 'intrinsic_metadata': {}, + }, + { + 'url': 'http://origin2', + 'intrinsic_metadata': { + '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', + 'description': 'foo bar', + }, + }, + { + 'url': 'http://origin3', + 'intrinsic_metadata': { + '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', + 'description': 'bar baz', + } + }, + ]) + + results = self.search.origin_search(metadata_substring='foo') + self.assertEqual(results, {'cursor': None, 'results': [ + {'url': 'http://origin2'}]}) + + results = self.search.origin_search(metadata_substring='foo bar') + self.assertEqual(results, {'cursor': None, 'results': [ + {'url': 'http://origin2'}, {'url': 'http://origin3'}]}) + + results = self.search.origin_search(metadata_substring='bar baz') + self.assertEqual(results, {'cursor': None, 'results': [ + {'url': 'http://origin3'}, {'url': 'http://origin2'}]}) + + def test_origin_intrinsic_metadata_keywords(self): + self.search.origin_update([ + { + 'url': 'http://origin1', + 'intrinsic_metadata': {}, + }, + { + 'url': 'http://origin2', + 'intrinsic_metadata': { + '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', + 'keywords': ['foo', 'bar'], + }, + }, + { + 'url': 'http://origin3', + 'intrinsic_metadata': { + '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', + 'keywords': ['bar', 'baz'], + } }, - }, - { - 'url': 'http://origin3', - 'intrinsic_metadata': { - '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', - 'description': 'bar baz', - } - }, - ]) - - results = search.origin_search(metadata_substring='foo') - assert results == {'cursor': None, 'results': [{'url': 'http://origin2'}]} - - results = search.origin_search(metadata_substring='foo bar') - assert results == {'cursor': None, 'results': [ - {'url': 'http://origin2'}, {'url': 'http://origin3'}]} - - results = search.origin_search(metadata_substring='bar baz') - assert results == {'cursor': None, 'results': [ - {'url': 'http://origin3'}, {'url': 'http://origin2'}]} - - -def test_origin_intrinsic_metadata_keywords(elasticsearch_host): - search = ElasticSearch([elasticsearch_host]) - search.initialize() - search.origin_update([ - { - 'url': 'http://origin1', - 'intrinsic_metadata': {}, - }, - { - 'url': 'http://origin2', - 'intrinsic_metadata': { - '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', - 'keywords': ['foo', 'bar'], + ]) + + results = self.search.origin_search(metadata_substring='foo') + self.assertEqual(results, {'cursor': None, 'results': [ + {'url': 'http://origin2'}]}) + + results = self.search.origin_search(metadata_substring='foo bar') + self.assertEqual(results, {'cursor': None, 'results': [ + {'url': 'http://origin2'}, {'url': 'http://origin3'}]}) + + results = self.search.origin_search(metadata_substring='bar baz') + self.assertEqual(results, {'cursor': None, 'results': [ + {'url': 'http://origin3'}, {'url': 'http://origin2'}]}) + + @settings(deadline=None) + @given(strategies.integers(min_value=1, max_value=4)) + def test_origin_paging(self, count): + self.reset() + self.search.origin_update([ + { + 'url': 'http://origin1', + 'intrinsic_metadata': { + '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', + 'keywords': ['foo'], + }, }, - }, - { - 'url': 'http://origin3', - 'intrinsic_metadata': { - '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', - 'keywords': ['bar', 'baz'], - } - }, - ]) - - results = search.origin_search(metadata_substring='foo') - assert results == {'cursor': None, 'results': [{'url': 'http://origin2'}]} - - results = search.origin_search(metadata_substring='foo bar') - assert results == {'cursor': None, 'results': [ - {'url': 'http://origin2'}, {'url': 'http://origin3'}]} - - results = search.origin_search(metadata_substring='bar baz') - assert results == {'cursor': None, 'results': [ - {'url': 'http://origin3'}, {'url': 'http://origin2'}]} - - -@settings(deadline=None) -@given(strategies.integers(min_value=1, max_value=4)) -def test_origin_paging(elasticsearch_host, count): - search = ElasticSearch([elasticsearch_host]) - search.deinitialize() - search.initialize() - search.origin_update([ - { - 'url': 'http://origin1', - 'intrinsic_metadata': { - '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', - 'keywords': ['foo'], + { + 'url': 'http://origin2', + 'intrinsic_metadata': { + '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', + 'keywords': ['foo', 'bar'], + }, }, - }, - { - 'url': 'http://origin2', - 'intrinsic_metadata': { - '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', - 'keywords': ['foo', 'bar'], + { + 'url': 'http://origin3', + 'intrinsic_metadata': { + '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', + 'keywords': ['foo', 'bar', 'baz'], + } }, - }, - { - 'url': 'http://origin3', - 'intrinsic_metadata': { - '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', - 'keywords': ['foo', 'bar', 'baz'], - } - }, - ]) - - results = stream_results( - search.origin_search, metadata_substring='foo bar baz', count=count) - assert list(results) == [ - {'url': 'http://origin3'}, - {'url': 'http://origin2'}, - {'url': 'http://origin1'}] - - results = stream_results( - search.origin_search, metadata_substring='foo bar', count=count) - assert list(results) == [ - {'url': 'http://origin2'}, - {'url': 'http://origin3'}, - {'url': 'http://origin1'}] - - results = stream_results( - search.origin_search, metadata_substring='foo', count=count) - assert list(results) == [ - {'url': 'http://origin1'}, - {'url': 'http://origin2'}, - {'url': 'http://origin3'}] + ]) + + results = stream_results( + self.search.origin_search, + metadata_substring='foo bar baz', count=count) + self.assertEqual(list(results), [ + {'url': 'http://origin3'}, + {'url': 'http://origin2'}, + {'url': 'http://origin1'}]) + + results = stream_results( + self.search.origin_search, + metadata_substring='foo bar', count=count) + self.assertEqual(list(results), [ + {'url': 'http://origin2'}, + {'url': 'http://origin3'}, + {'url': 'http://origin1'}]) + + results = stream_results( + self.search.origin_search, + metadata_substring='foo', count=count) + self.assertEqual(list(results), [ + {'url': 'http://origin1'}, + {'url': 'http://origin2'}, + {'url': 'http://origin3'}])