Page MenuHomeSoftware Heritage

D2196.id8323.diff
No OneTemporary

D2196.id8323.diff

diff --git a/requirements-swh.txt b/requirements-swh.txt
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -3,3 +3,4 @@
swh.vault >= 0.0.23
swh.indexer >= 0.0.120
swh.scheduler >= 0.0.31
+swh.search >= 0.0.3
diff --git a/swh/web/api/views/origin.py b/swh/web/api/views/origin.py
--- a/swh/web/api/views/origin.py
+++ b/swh/web/api/views/origin.py
@@ -193,7 +193,7 @@
and only the Link header should be used for paginating through
results.
- :param string url_pattern: a string pattern or a regular expression
+ :param string url_pattern: a string pattern
:query int limit: the maximum number of found origins to return
(bounded to 1000)
:query boolean regexp: if true, consider provided pattern as a regular
@@ -218,21 +218,19 @@
:swh_web_api:`origin/search/python/?limit=2`
"""
result = {}
- offset = int(request.query_params.get('offset', '0'))
limit = min(int(request.query_params.get('limit', '70')), 1000)
- regexp = request.query_params.get('regexp', 'false')
+ page_token = request.query_params.get('page_token')
with_visit = request.query_params.get('with_visit', 'false')
- results = api_lookup(service.search_origin, url_pattern, offset, limit,
- bool(strtobool(regexp)), bool(strtobool(with_visit)),
- enrich_fn=_enrich_origin)
+ (results, page_token) = api_lookup(
+ service.search_origin, url_pattern, limit,
+ page_token, bool(strtobool(with_visit)),
+ enrich_fn=_enrich_origin)
- nb_results = len(results)
- if nb_results == limit:
+ if page_token is not None:
query_params = {}
- query_params['offset'] = offset + limit
query_params['limit'] = limit
- query_params['regexp'] = regexp
+ query_params['page_token'] = page_token
result['headers'] = {
'link-next': reverse('api-1-origin-search',
@@ -241,7 +239,7 @@
}
result.update({
- 'results': results
+ 'results': list(results)
})
return result
diff --git a/swh/web/browse/views/origin.py b/swh/web/browse/views/origin.py
--- a/swh/web/browse/views/origin.py
+++ b/swh/web/browse/views/origin.py
@@ -177,17 +177,15 @@
a provided string pattern or match a provided regular expression.
The search is performed in a case insensitive way.
"""
- offset = int(request.GET.get('offset', '0'))
+ # TODO: page_token
limit = min(int(request.GET.get('limit', '50')), 1000)
- regexp = request.GET.get('regexp', 'false')
with_visit = request.GET.get('with_visit', 'false')
url_pattern = url_pattern.replace('///', '\\')
try:
- results = service.search_origin(url_pattern, offset, limit,
- bool(strtobool(regexp)),
- bool(strtobool(with_visit)))
+ (results, page_token) = service.search_origin(
+ url_pattern, limit, with_visit=bool(strtobool(with_visit)))
results = json.dumps(list(results), sort_keys=True, indent=4,
separators=(',', ': '))
diff --git a/swh/web/common/service.py b/swh/web/common/service.py
--- a/swh/web/common/service.py
+++ b/swh/web/common/service.py
@@ -21,6 +21,7 @@
from swh.web.common.origin_visits import get_origin_visit
from swh.web import config
+search = config.search()
storage = config.storage()
vault = config.vault()
idx_storage = config.indexer_storage()
@@ -245,8 +246,7 @@
return map(converters.from_origin, origins)
-def search_origin(url_pattern, offset=0, limit=50, regexp=False,
- with_visit=False):
+def search_origin(url_pattern, limit=50, page_token=None, with_visit=False):
"""Search for origins whose urls contain a provided string pattern
or match a provided regular expression.
@@ -259,9 +259,11 @@
list of origin information as dict.
"""
- origins = storage.origin_search(url_pattern, offset, limit, regexp,
- with_visit)
- return map(converters.from_origin, origins)
+ results = search.origin_search(url_pattern=url_pattern, count=limit,
+ page_token=page_token,
+ with_visit=with_visit)
+ origins = map(converters.from_origin, results['results'])
+ return (origins, results['next_page_token'])
def search_origin_metadata(fulltext, limit=50):
diff --git a/swh/web/config.py b/swh/web/config.py
--- a/swh/web/config.py
+++ b/swh/web/config.py
@@ -10,6 +10,7 @@
from swh.core import config
from swh.indexer.storage import get_indexer_storage
from swh.scheduler import get_scheduler
+from swh.search import get_search
from swh.storage import get_storage
from swh.vault import get_vault
from swh.web import settings
@@ -32,6 +33,13 @@
'timeout': 1,
}
}),
+ 'search': ('dict', {
+ 'cls': 'remote',
+ 'args': {
+ 'url': 'http://127.0.0.1:5010/',
+ 'timeout': 10,
+ },
+ }),
'log_dir': ('string', '/tmp/swh/log'),
'debug': ('bool', False),
'serve_assets': ('bool', False),
@@ -123,6 +131,7 @@
cfg = config.load_named_config(config_file, DEFAULT_CONFIG)
swhweb_config.update(cfg)
config.prepare_folders(swhweb_config, 'log_dir')
+ swhweb_config['search'] = get_search(**swhweb_config['search'])
swhweb_config['storage'] = get_storage(**swhweb_config['storage'])
swhweb_config['vault'] = get_vault(**swhweb_config['vault'])
swhweb_config['indexer_storage'] = \
@@ -132,6 +141,13 @@
return swhweb_config
+def search():
+ """Return the current application's search.
+
+ """
+ return get_config()['search']
+
+
def storage():
"""Return the current application's storage.
diff --git a/swh/web/settings/tests.py b/swh/web/settings/tests.py
--- a/swh/web/settings/tests.py
+++ b/swh/web/settings/tests.py
@@ -95,7 +95,8 @@
})
from swh.web.tests.data import get_tests_data, override_storages # noqa
test_data = get_tests_data()
- override_storages(test_data['storage'], test_data['idx_storage'])
+ override_storages(test_data['storage'], test_data['idx_storage'],
+ test_data['search'])
else:
ALLOWED_HOSTS += ['testserver']
diff --git a/swh/web/tests/api/views/test_origin.py b/swh/web/tests/api/views/test_origin.py
--- a/swh/web/tests/api/views/test_origin.py
+++ b/swh/web/tests/api/views/test_origin.py
@@ -454,22 +454,6 @@
assert {origin['url'] for origin in rv.data} == expected_origins
-def test_api_origin_search_regexp(api_client):
- expected_origins = {
- 'https://github.com/memononen/libtess2',
- 'repo_with_submodules'
- }
-
- url = reverse('api-1-origin-search',
- url_args={'url_pattern': '(repo|libtess)'},
- query_params={'limit': 10,
- 'regexp': True})
- rv = api_client.get(url)
- assert rv.status_code == 200, rv.data
- assert rv['Content-Type'] == 'application/json'
- assert {origin['url'] for origin in rv.data} == expected_origins
-
-
@pytest.mark.parametrize('limit', [1, 2, 3, 10])
def test_api_origin_search_scroll(api_client, archive_data, limit):
expected_origins = {
@@ -486,8 +470,8 @@
assert {origin['url'] for origin in results} == expected_origins
-def test_api_origin_search_limit(api_client, archive_data):
- archive_data.origin_add([
+def test_api_origin_search_limit(api_client, archive_data, tests_data):
+ tests_data['search'].origin_update([
{'url': 'http://foobar/{}'.format(i)}
for i in range(2000)
])
diff --git a/swh/web/tests/conftest.py b/swh/web/tests/conftest.py
--- a/swh/web/tests/conftest.py
+++ b/swh/web/tests/conftest.py
@@ -95,7 +95,8 @@
data = get_tests_data(reset=True)
# Update swh-web configuration to use the in-memory storages
# instantiated in the tests.data module
- override_storages(data['storage'], data['idx_storage'])
+ override_storages(data['storage'], data['idx_storage'],
+ data['search'])
return data
diff --git a/swh/web/tests/data.py b/swh/web/tests/data.py
--- a/swh/web/tests/data.py
+++ b/swh/web/tests/data.py
@@ -20,6 +20,7 @@
from swh.model.hashutil import hash_to_hex, hash_to_bytes, DEFAULT_ALGORITHMS
from swh.model.identifiers import directory_identifier
from swh.loader.git.from_disk import GitLoaderFromArchive
+from swh.search import get_search
from swh.storage.algos.dir_iterators import dir_iterator
from swh.web import config
from swh.web.browse.utils import (
@@ -244,6 +245,11 @@
# Create indexer storage instance that will be shared by indexers
idx_storage = get_indexer_storage('memory', {})
+ # Create search instance
+ search = get_search('memory', {})
+ search.initialize()
+ search.origin_update({'url': origin['url']}for origin in _TEST_ORIGINS)
+
# Add the empty directory to the test archive
empty_dir_id = directory_identifier({'entries': []})
empty_dir_id_bin = hash_to_bytes(empty_dir_id)
@@ -253,6 +259,7 @@
return {
'storage': storage,
'idx_storage': idx_storage,
+ 'search': search,
'origins': _TEST_ORIGINS,
'contents': contents,
'directories': list(directories),
@@ -311,17 +318,21 @@
return _current_tests_data
-def override_storages(storage, idx_storage):
+def override_storages(storage, idx_storage, search):
"""
Helper function to replace the storages from which archive data
are fetched.
"""
swh_config = config.get_config()
- swh_config.update({'storage': storage})
- service.storage = storage
+ swh_config.update({
+ 'storage': storage,
+ 'indexer_storage': idx_storage,
+ 'search': search,
+ })
- swh_config.update({'indexer_storage': idx_storage})
+ service.storage = storage
service.idx_storage = idx_storage
+ service.search = search
# Implement some special endpoints used to provide input tests data

File Metadata

Mime Type
text/plain
Expires
Thu, Jan 30, 2:49 PM (1 w, 11 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3227852

Event Timeline