Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/db.py
Show First 20 Lines • Show All 486 Lines • ▼ Show 20 Lines | def origin_visit_get_latest( | ||||
query = '\n'.join(query_parts) | query = '\n'.join(query_parts) | ||||
cur.execute(query, (origin_id,)) | cur.execute(query, (origin_id,)) | ||||
r = cur.fetchone() | r = cur.fetchone() | ||||
if not r: | if not r: | ||||
return None | return None | ||||
return r | return r | ||||
def origin_visit_get_random(self, type, cur=None): | |||||
"""Randomly select one origin whose last visit was full in the last 3 | |||||
months | |||||
""" | |||||
cur = self._cursor(cur) | |||||
columns = ','.join(self.origin_visit_select_cols) | |||||
query = f"""with visits as ( | |||||
select * | |||||
from origin_visit | |||||
where origin_visit.status='full' and | |||||
origin_visit.type=%s and | |||||
origin_visit.date > now() - '3 months'::interval | |||||
) | |||||
select {columns} | |||||
from visits as origin_visit | |||||
inner join origin | |||||
on origin_visit.origin=origin.id | |||||
where random() < 0.1 | |||||
limit 1 | |||||
""" | |||||
cur.execute(query, (type, )) | |||||
return cur.fetchone() | |||||
@staticmethod | @staticmethod | ||||
def mangle_query_key(key, main_table): | def mangle_query_key(key, main_table): | ||||
if key == 'id': | if key == 'id': | ||||
return 't.id' | return 't.id' | ||||
if key == 'parents': | if key == 'parents': | ||||
return ''' | return ''' | ||||
ARRAY( | ARRAY( | ||||
SELECT rh.parent_id::bytea | SELECT rh.parent_id::bytea | ||||
▲ Show 20 Lines • Show All 140 Lines • ▼ Show 20 Lines | def origin_get_by_sha1(self, sha1s, cur=None): | ||||
query = """SELECT %s FROM (VALUES %%s) as t(sha1) | query = """SELECT %s FROM (VALUES %%s) as t(sha1) | ||||
LEFT JOIN origin ON t.sha1 = digest(origin.url, 'sha1') | LEFT JOIN origin ON t.sha1 = digest(origin.url, 'sha1') | ||||
""" % ','.join('origin.' + col for col in self.origin_cols) | """ % ','.join('origin.' + col for col in self.origin_cols) | ||||
yield from execute_values_generator( | yield from execute_values_generator( | ||||
cur, query, ((sha1,) for sha1 in sha1s)) | cur, query, ((sha1,) for sha1 in sha1s)) | ||||
def origin_visit_get_random(self, type, cur=None): | |||||
"""Randomly select one origin whose last visit was full in the last 3 | |||||
months | |||||
""" | |||||
cur = self._cursor(cur) | |||||
columns = ','.join(self.origin_visit_select_cols) | |||||
query = f"""with visits as ( | |||||
select * | |||||
from origin_visit | |||||
where origin_visit.status='full' and | |||||
origin_visit.type=%s and | |||||
origin_visit.date > now() - '3 months'::interval | |||||
) | |||||
select {columns} | |||||
from visits as origin_visit | |||||
inner join origin | |||||
on origin_visit.origin=origin.id | |||||
where random() < 0.1 | |||||
limit 1 | |||||
""" | |||||
cur.execute(query, (type, )) | |||||
return cur.fetchone() | |||||
def origin_id_get_by_url(self, origins, cur=None): | def origin_id_get_by_url(self, origins, cur=None): | ||||
"""Retrieve origin `(type, url)` from urls if found.""" | """Retrieve origin `(type, url)` from urls if found.""" | ||||
cur = self._cursor(cur) | cur = self._cursor(cur) | ||||
query = """SELECT id FROM (VALUES %s) as t(url) | query = """SELECT id FROM (VALUES %s) as t(url) | ||||
LEFT JOIN origin ON t.url = origin.url | LEFT JOIN origin ON t.url = origin.url | ||||
""" | """ | ||||
▲ Show 20 Lines • Show All 235 Lines • Show Last 20 Lines |