Page MenuHomeSoftware Heritage

D2192.id7475.diff
No OneTemporary

D2192.id7475.diff

diff --git a/swh/search/elasticsearch.py b/swh/search/elasticsearch.py
--- a/swh/search/elasticsearch.py
+++ b/swh/search/elasticsearch.py
@@ -20,7 +20,7 @@
res = {
'url': origin.pop('url')
}
- for field_name in ('intrinsic_metadata',):
+ for field_name in ('intrinsic_metadata', 'has_visits'):
if field_name in origin:
res[field_name] = origin.pop(field_name)
return res
@@ -63,6 +63,9 @@
}
}
},
+ 'has_visits': {
+ 'type': 'boolean',
+ },
'intrinsic_metadata': {
'type': 'nested',
'properties': {
@@ -107,14 +110,16 @@
def origin_search(
self, *,
url_pattern: str = None, metadata_pattern: str = None,
+ with_visit: bool = False,
scroll_token: str = None, count: int = 50
) -> Dict[str, object]:
"""Searches for origins matching the `url_pattern`.
Args:
url_pattern (str): Part of thr URL to search for
- scroll_token (str): `scroll_token` is an opaque value used for
- pagination.
+ with_visit (bool): Whether origins with no visit are to be
+ filtered out
+ scroll_token (str): Opaque value used for pagination.
count (int): number of results to return.
Returns:
@@ -159,10 +164,17 @@
'At least one of url_pattern and metadata_pattern '
'must be provided.')
+ if with_visit:
+ query_clauses.append({
+ 'term': {
+ 'has_visits': True,
+ }
+ })
+
body = {
'query': {
'bool': {
- 'should': query_clauses, # TODO: must?
+ 'must': query_clauses,
}
},
'size': count,
diff --git a/swh/search/in_memory.py b/swh/search/in_memory.py
--- a/swh/search/in_memory.py
+++ b/swh/search/in_memory.py
@@ -61,6 +61,7 @@
def origin_search(
self, *,
url_pattern: str = None, metadata_pattern: str = None,
+ with_visit: bool = False,
scroll_token: str = None, count: int = 50
) -> Dict[str, object]:
matches = (self._origins[id_] for id_ in self._origin_ids)
@@ -91,6 +92,9 @@
'At least one of url_pattern and metadata_pattern '
'must be provided.')
+ if with_visit:
+ matches = filter(lambda o: o.get('has_visits'), matches)
+
if scroll_token:
scroll_token = msgpack.loads(base64.b64decode(scroll_token))
start_at_index = scroll_token[b'start_at_index']
diff --git a/swh/search/tests/test_search.py b/swh/search/tests/test_search.py
--- a/swh/search/tests/test_search.py
+++ b/swh/search/tests/test_search.py
@@ -54,6 +54,34 @@
expected_results = ['http://barbaz.qux', 'http://qux.quux']
assert sorted(results) == sorted(expected_results)
+ def test_origin_with_visit(self):
+ self.search.origin_update([
+ {'url': 'http://foobar.baz', 'has_visits': True},
+ ])
+
+ results = self.search.origin_search(
+ url_pattern='foobar', with_visit=True)
+ assert results == {'scroll_token': None, 'results': [
+ {'url': 'http://foobar.baz'}]}
+
+ def test_origin_with_visit_added(self):
+ self.search.origin_update([
+ {'url': 'http://foobar.baz'},
+ ])
+
+ results = self.search.origin_search(
+ url_pattern='foobar', with_visit=True)
+ assert results == {'scroll_token': None, 'results': []}
+
+ self.search.origin_update([
+ {'url': 'http://foobar.baz', 'has_visits': True},
+ ])
+
+ results = self.search.origin_search(
+ url_pattern='foobar', with_visit=True)
+ assert results == {'scroll_token': None, 'results': [
+ {'url': 'http://foobar.baz'}]}
+
def test_origin_intrinsic_metadata_description(self):
self.search.origin_update([
{

File Metadata

Mime Type
text/plain
Expires
Sun, Aug 17, 9:51 PM (13 h, 8 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3218895

Event Timeline