Page MenuHomeSoftware Heritage

D403.diff
No OneTemporary

D403.diff

diff --git a/swh/storage/api/client.py b/swh/storage/api/client.py
--- a/swh/storage/api/client.py
+++ b/swh/storage/api/client.py
@@ -123,11 +123,13 @@
def origin_get(self, origin):
return self.post('origin/get', {'origin': origin})
- def origin_search(self, url_pattern, offset=0, limit=50, regexp=False):
+ def origin_search(self, url_pattern, offset=0, limit=50, regexp=False,
+ with_visit=False):
return self.post('origin/search', {'url_pattern': url_pattern,
'offset': offset,
'limit': limit,
- 'regexp': regexp})
+ 'regexp': regexp,
+ 'with_visit': with_visit})
def origin_add(self, origins):
return self.post('origin/add_multi', {'origins': origins})
diff --git a/swh/storage/db.py b/swh/storage/db.py
--- a/swh/storage/db.py
+++ b/swh/storage/db.py
@@ -863,23 +863,32 @@
return None
def origin_search(self, url_pattern, offset=0, limit=50,
- regexp=False, cur=None):
+ regexp=False, with_visit=False, cur=None):
"""Search for origins whose urls contain a provided string pattern
or match a provided regular expression.
The search is performed in a case insensitive way.
Args:
- url_pattern: the string pattern to search for in origin urls
- offset: number of found origins to skip before returning results
- limit: the maximum number of found origins to return
- regexp: if True, consider the provided pattern as a regular
+ url_pattern (str): the string pattern to search for in origin urls
+ offset (int): number of found origins to skip before returning
+ results
+ limit (int): the maximum number of found origins to return
+ regexp (bool): if True, consider the provided pattern as a regular
expression and returns origins whose urls match it
+ with_visit (bool): if True, filter out origins with no visit
"""
cur = self._cursor(cur)
origin_cols = ','.join(self.origin_cols)
query = """SELECT %s
- FROM origin WHERE url %s %%s
+ FROM origin
+ WHERE """
+ if with_visit:
+ query += """
+ EXISTS (SELECT 1 from origin_visit WHERE origin=origin.id)
+ AND """
+ query += """
+ url %s %%s
ORDER BY id
OFFSET %%s LIMIT %%s"""
diff --git a/swh/storage/storage.py b/swh/storage/storage.py
--- a/swh/storage/storage.py
+++ b/swh/storage/storage.py
@@ -1128,24 +1128,26 @@
@db_transaction_generator()
def origin_search(self, url_pattern, offset=0, limit=50,
- regexp=False, db=None, cur=None):
+ regexp=False, with_visit=False, db=None, cur=None):
"""Search for origins whose urls contain a provided string pattern
or match a provided regular expression.
The search is performed in a case insensitive way.
Args:
- url_pattern: the string pattern to search for in origin urls
- offset: number of found origins to skip before returning results
- limit: the maximum number of found origins to return
- regexp: if True, consider the provided pattern as a regular
+ url_pattern (str): the string pattern to search for in origin urls
+ offset (int): number of found origins to skip before returning
+ results
+ limit (int): the maximum number of found origins to return
+ regexp (bool): if True, consider the provided pattern as a regular
expression and return origins whose urls match it
+ with_visit (bool): if True, filter out origins with no visit
Returns:
An iterable of dict containing origin information as returned
by :meth:`swh.storage.storage.Storage.origin_get`.
"""
for origin in db.origin_search(url_pattern, offset, limit,
- regexp, cur):
+ regexp, with_visit, cur):
yield dict(zip(self.origin_keys, origin))
@db_transaction()

File Metadata

Mime Type
text/plain
Expires
Sun, Aug 17, 9:51 PM (8 h, 25 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3215939

Event Timeline