Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7163760
D2196.id8323.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
9 KB
Subscribers
None
D2196.id8323.diff
View Options
diff --git a/requirements-swh.txt b/requirements-swh.txt
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -3,3 +3,4 @@
swh.vault >= 0.0.23
swh.indexer >= 0.0.120
swh.scheduler >= 0.0.31
+swh.search >= 0.0.3
diff --git a/swh/web/api/views/origin.py b/swh/web/api/views/origin.py
--- a/swh/web/api/views/origin.py
+++ b/swh/web/api/views/origin.py
@@ -193,7 +193,7 @@
and only the Link header should be used for paginating through
results.
- :param string url_pattern: a string pattern or a regular expression
+ :param string url_pattern: a string pattern
:query int limit: the maximum number of found origins to return
(bounded to 1000)
:query boolean regexp: if true, consider provided pattern as a regular
@@ -218,21 +218,19 @@
:swh_web_api:`origin/search/python/?limit=2`
"""
result = {}
- offset = int(request.query_params.get('offset', '0'))
limit = min(int(request.query_params.get('limit', '70')), 1000)
- regexp = request.query_params.get('regexp', 'false')
+ page_token = request.query_params.get('page_token')
with_visit = request.query_params.get('with_visit', 'false')
- results = api_lookup(service.search_origin, url_pattern, offset, limit,
- bool(strtobool(regexp)), bool(strtobool(with_visit)),
- enrich_fn=_enrich_origin)
+ (results, page_token) = api_lookup(
+ service.search_origin, url_pattern, limit,
+ page_token, bool(strtobool(with_visit)),
+ enrich_fn=_enrich_origin)
- nb_results = len(results)
- if nb_results == limit:
+ if page_token is not None:
query_params = {}
- query_params['offset'] = offset + limit
query_params['limit'] = limit
- query_params['regexp'] = regexp
+ query_params['page_token'] = page_token
result['headers'] = {
'link-next': reverse('api-1-origin-search',
@@ -241,7 +239,7 @@
}
result.update({
- 'results': results
+ 'results': list(results)
})
return result
diff --git a/swh/web/browse/views/origin.py b/swh/web/browse/views/origin.py
--- a/swh/web/browse/views/origin.py
+++ b/swh/web/browse/views/origin.py
@@ -177,17 +177,15 @@
a provided string pattern or match a provided regular expression.
The search is performed in a case insensitive way.
"""
- offset = int(request.GET.get('offset', '0'))
+ # TODO: page_token
limit = min(int(request.GET.get('limit', '50')), 1000)
- regexp = request.GET.get('regexp', 'false')
with_visit = request.GET.get('with_visit', 'false')
url_pattern = url_pattern.replace('///', '\\')
try:
- results = service.search_origin(url_pattern, offset, limit,
- bool(strtobool(regexp)),
- bool(strtobool(with_visit)))
+ (results, page_token) = service.search_origin(
+ url_pattern, limit, with_visit=bool(strtobool(with_visit)))
results = json.dumps(list(results), sort_keys=True, indent=4,
separators=(',', ': '))
diff --git a/swh/web/common/service.py b/swh/web/common/service.py
--- a/swh/web/common/service.py
+++ b/swh/web/common/service.py
@@ -21,6 +21,7 @@
from swh.web.common.origin_visits import get_origin_visit
from swh.web import config
+search = config.search()
storage = config.storage()
vault = config.vault()
idx_storage = config.indexer_storage()
@@ -245,8 +246,7 @@
return map(converters.from_origin, origins)
-def search_origin(url_pattern, offset=0, limit=50, regexp=False,
- with_visit=False):
+def search_origin(url_pattern, limit=50, page_token=None, with_visit=False):
"""Search for origins whose urls contain a provided string pattern
or match a provided regular expression.
@@ -259,9 +259,11 @@
list of origin information as dict.
"""
- origins = storage.origin_search(url_pattern, offset, limit, regexp,
- with_visit)
- return map(converters.from_origin, origins)
+ results = search.origin_search(url_pattern=url_pattern, count=limit,
+ page_token=page_token,
+ with_visit=with_visit)
+ origins = map(converters.from_origin, results['results'])
+ return (origins, results['next_page_token'])
def search_origin_metadata(fulltext, limit=50):
diff --git a/swh/web/config.py b/swh/web/config.py
--- a/swh/web/config.py
+++ b/swh/web/config.py
@@ -10,6 +10,7 @@
from swh.core import config
from swh.indexer.storage import get_indexer_storage
from swh.scheduler import get_scheduler
+from swh.search import get_search
from swh.storage import get_storage
from swh.vault import get_vault
from swh.web import settings
@@ -32,6 +33,13 @@
'timeout': 1,
}
}),
+ 'search': ('dict', {
+ 'cls': 'remote',
+ 'args': {
+ 'url': 'http://127.0.0.1:5010/',
+ 'timeout': 10,
+ },
+ }),
'log_dir': ('string', '/tmp/swh/log'),
'debug': ('bool', False),
'serve_assets': ('bool', False),
@@ -123,6 +131,7 @@
cfg = config.load_named_config(config_file, DEFAULT_CONFIG)
swhweb_config.update(cfg)
config.prepare_folders(swhweb_config, 'log_dir')
+ swhweb_config['search'] = get_search(**swhweb_config['search'])
swhweb_config['storage'] = get_storage(**swhweb_config['storage'])
swhweb_config['vault'] = get_vault(**swhweb_config['vault'])
swhweb_config['indexer_storage'] = \
@@ -132,6 +141,13 @@
return swhweb_config
+def search():
+ """Return the current application's search.
+
+ """
+ return get_config()['search']
+
+
def storage():
"""Return the current application's storage.
diff --git a/swh/web/settings/tests.py b/swh/web/settings/tests.py
--- a/swh/web/settings/tests.py
+++ b/swh/web/settings/tests.py
@@ -95,7 +95,8 @@
})
from swh.web.tests.data import get_tests_data, override_storages # noqa
test_data = get_tests_data()
- override_storages(test_data['storage'], test_data['idx_storage'])
+ override_storages(test_data['storage'], test_data['idx_storage'],
+ test_data['search'])
else:
ALLOWED_HOSTS += ['testserver']
diff --git a/swh/web/tests/api/views/test_origin.py b/swh/web/tests/api/views/test_origin.py
--- a/swh/web/tests/api/views/test_origin.py
+++ b/swh/web/tests/api/views/test_origin.py
@@ -454,22 +454,6 @@
assert {origin['url'] for origin in rv.data} == expected_origins
-def test_api_origin_search_regexp(api_client):
- expected_origins = {
- 'https://github.com/memononen/libtess2',
- 'repo_with_submodules'
- }
-
- url = reverse('api-1-origin-search',
- url_args={'url_pattern': '(repo|libtess)'},
- query_params={'limit': 10,
- 'regexp': True})
- rv = api_client.get(url)
- assert rv.status_code == 200, rv.data
- assert rv['Content-Type'] == 'application/json'
- assert {origin['url'] for origin in rv.data} == expected_origins
-
-
@pytest.mark.parametrize('limit', [1, 2, 3, 10])
def test_api_origin_search_scroll(api_client, archive_data, limit):
expected_origins = {
@@ -486,8 +470,8 @@
assert {origin['url'] for origin in results} == expected_origins
-def test_api_origin_search_limit(api_client, archive_data):
- archive_data.origin_add([
+def test_api_origin_search_limit(api_client, archive_data, tests_data):
+ tests_data['search'].origin_update([
{'url': 'http://foobar/{}'.format(i)}
for i in range(2000)
])
diff --git a/swh/web/tests/conftest.py b/swh/web/tests/conftest.py
--- a/swh/web/tests/conftest.py
+++ b/swh/web/tests/conftest.py
@@ -95,7 +95,8 @@
data = get_tests_data(reset=True)
# Update swh-web configuration to use the in-memory storages
# instantiated in the tests.data module
- override_storages(data['storage'], data['idx_storage'])
+ override_storages(data['storage'], data['idx_storage'],
+ data['search'])
return data
diff --git a/swh/web/tests/data.py b/swh/web/tests/data.py
--- a/swh/web/tests/data.py
+++ b/swh/web/tests/data.py
@@ -20,6 +20,7 @@
from swh.model.hashutil import hash_to_hex, hash_to_bytes, DEFAULT_ALGORITHMS
from swh.model.identifiers import directory_identifier
from swh.loader.git.from_disk import GitLoaderFromArchive
+from swh.search import get_search
from swh.storage.algos.dir_iterators import dir_iterator
from swh.web import config
from swh.web.browse.utils import (
@@ -244,6 +245,11 @@
# Create indexer storage instance that will be shared by indexers
idx_storage = get_indexer_storage('memory', {})
+ # Create search instance
+ search = get_search('memory', {})
+ search.initialize()
+ search.origin_update({'url': origin['url']}for origin in _TEST_ORIGINS)
+
# Add the empty directory to the test archive
empty_dir_id = directory_identifier({'entries': []})
empty_dir_id_bin = hash_to_bytes(empty_dir_id)
@@ -253,6 +259,7 @@
return {
'storage': storage,
'idx_storage': idx_storage,
+ 'search': search,
'origins': _TEST_ORIGINS,
'contents': contents,
'directories': list(directories),
@@ -311,17 +318,21 @@
return _current_tests_data
-def override_storages(storage, idx_storage):
+def override_storages(storage, idx_storage, search):
"""
Helper function to replace the storages from which archive data
are fetched.
"""
swh_config = config.get_config()
- swh_config.update({'storage': storage})
- service.storage = storage
+ swh_config.update({
+ 'storage': storage,
+ 'indexer_storage': idx_storage,
+ 'search': search,
+ })
- swh_config.update({'indexer_storage': idx_storage})
+ service.storage = storage
service.idx_storage = idx_storage
+ service.search = search
# Implement some special endpoints used to provide input tests data
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Jan 30, 2:49 PM (1 w, 11 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3227852
Attached To
D2196: [PoC] Use swh-search instead of swh-storage for searching origins.
Event Timeline
Log In to Comment