Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/db.py
Show First 20 Lines • Show All 857 Lines • ▼ Show 20 Lines | def origin_get(self, id, cur=None): | ||||
cur.execute(query, (id,)) | cur.execute(query, (id,)) | ||||
data = cur.fetchone() | data = cur.fetchone() | ||||
if data: | if data: | ||||
return line_to_bytes(data) | return line_to_bytes(data) | ||||
return None | return None | ||||
def origin_search(self, url_pattern, offset=0, limit=50, | def origin_search(self, url_pattern, offset=0, limit=50, | ||||
regexp=False, cur=None): | regexp=False, with_visit=False, cur=None): | ||||
"""Search for origins whose urls contain a provided string pattern | """Search for origins whose urls contain a provided string pattern | ||||
or match a provided regular expression. | or match a provided regular expression. | ||||
The search is performed in a case insensitive way. | The search is performed in a case insensitive way. | ||||
Args: | Args: | ||||
url_pattern: the string pattern to search for in origin urls | url_pattern (str): the string pattern to search for in origin urls | ||||
offset: number of found origins to skip before returning results | offset (int): number of found origins to skip before returning | ||||
limit: the maximum number of found origins to return | results | ||||
regexp: if True, consider the provided pattern as a regular | limit (int): the maximum number of found origins to return | ||||
regexp (bool): if True, consider the provided pattern as a regular | |||||
expression and returns origins whose urls match it | expression and returns origins whose urls match it | ||||
with_visit (bool): if True, filter out origins with no visit | |||||
""" | """ | ||||
cur = self._cursor(cur) | cur = self._cursor(cur) | ||||
origin_cols = ','.join(self.origin_cols) | origin_cols = ','.join(self.origin_cols) | ||||
query = """SELECT %s | query = """SELECT %s | ||||
FROM origin WHERE url %s %%s | FROM origin | ||||
WHERE """ | |||||
if with_visit: | |||||
query += """ | |||||
EXISTS (SELECT 1 from origin_visit WHERE origin=origin.id) | |||||
AND """ | |||||
query += """ | |||||
url %s %%s | |||||
ORDER BY id | ORDER BY id | ||||
OFFSET %%s LIMIT %%s""" | OFFSET %%s LIMIT %%s""" | ||||
if not regexp: | if not regexp: | ||||
query = query % (origin_cols, 'ILIKE') | query = query % (origin_cols, 'ILIKE') | ||||
query_params = ('%'+url_pattern+'%', offset, limit) | query_params = ('%'+url_pattern+'%', offset, limit) | ||||
else: | else: | ||||
query = query % (origin_cols, '~*') | query = query % (origin_cols, '~*') | ||||
▲ Show 20 Lines • Show All 257 Lines • Show Last 20 Lines |