Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9696858
D2192.id7475.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
4 KB
Subscribers
None
D2192.id7475.diff
View Options
diff --git a/swh/search/elasticsearch.py b/swh/search/elasticsearch.py
--- a/swh/search/elasticsearch.py
+++ b/swh/search/elasticsearch.py
@@ -20,7 +20,7 @@
res = {
'url': origin.pop('url')
}
- for field_name in ('intrinsic_metadata',):
+ for field_name in ('intrinsic_metadata', 'has_visits'):
if field_name in origin:
res[field_name] = origin.pop(field_name)
return res
@@ -63,6 +63,9 @@
}
}
},
+ 'has_visits': {
+ 'type': 'boolean',
+ },
'intrinsic_metadata': {
'type': 'nested',
'properties': {
@@ -107,14 +110,16 @@
def origin_search(
self, *,
url_pattern: str = None, metadata_pattern: str = None,
+ with_visit: bool = False,
scroll_token: str = None, count: int = 50
) -> Dict[str, object]:
"""Searches for origins matching the `url_pattern`.
Args:
url_pattern (str): Part of thr URL to search for
- scroll_token (str): `scroll_token` is an opaque value used for
- pagination.
+ with_visit (bool): Whether origins with no visit are to be
+ filtered out
+ scroll_token (str): Opaque value used for pagination.
count (int): number of results to return.
Returns:
@@ -159,10 +164,17 @@
'At least one of url_pattern and metadata_pattern '
'must be provided.')
+ if with_visit:
+ query_clauses.append({
+ 'term': {
+ 'has_visits': True,
+ }
+ })
+
body = {
'query': {
'bool': {
- 'should': query_clauses, # TODO: must?
+ 'must': query_clauses,
}
},
'size': count,
diff --git a/swh/search/in_memory.py b/swh/search/in_memory.py
--- a/swh/search/in_memory.py
+++ b/swh/search/in_memory.py
@@ -61,6 +61,7 @@
def origin_search(
self, *,
url_pattern: str = None, metadata_pattern: str = None,
+ with_visit: bool = False,
scroll_token: str = None, count: int = 50
) -> Dict[str, object]:
matches = (self._origins[id_] for id_ in self._origin_ids)
@@ -91,6 +92,9 @@
'At least one of url_pattern and metadata_pattern '
'must be provided.')
+ if with_visit:
+ matches = filter(lambda o: o.get('has_visits'), matches)
+
if scroll_token:
scroll_token = msgpack.loads(base64.b64decode(scroll_token))
start_at_index = scroll_token[b'start_at_index']
diff --git a/swh/search/tests/test_search.py b/swh/search/tests/test_search.py
--- a/swh/search/tests/test_search.py
+++ b/swh/search/tests/test_search.py
@@ -54,6 +54,34 @@
expected_results = ['http://barbaz.qux', 'http://qux.quux']
assert sorted(results) == sorted(expected_results)
+ def test_origin_with_visit(self):
+ self.search.origin_update([
+ {'url': 'http://foobar.baz', 'has_visits': True},
+ ])
+
+ results = self.search.origin_search(
+ url_pattern='foobar', with_visit=True)
+ assert results == {'scroll_token': None, 'results': [
+ {'url': 'http://foobar.baz'}]}
+
+ def test_origin_with_visit_added(self):
+ self.search.origin_update([
+ {'url': 'http://foobar.baz'},
+ ])
+
+ results = self.search.origin_search(
+ url_pattern='foobar', with_visit=True)
+ assert results == {'scroll_token': None, 'results': []}
+
+ self.search.origin_update([
+ {'url': 'http://foobar.baz', 'has_visits': True},
+ ])
+
+ results = self.search.origin_search(
+ url_pattern='foobar', with_visit=True)
+ assert results == {'scroll_token': None, 'results': [
+ {'url': 'http://foobar.baz'}]}
+
def test_origin_intrinsic_metadata_description(self):
self.search.origin_update([
{
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, Aug 17, 9:51 PM (13 h, 8 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3218895
Attached To
D2192: Add 'with_visit' filter to origin search.
Event Timeline
Log In to Comment