diff --git a/swh/storage/db.py b/swh/storage/db.py --- a/swh/storage/db.py +++ b/swh/storage/db.py @@ -492,6 +492,30 @@ return None return r + def origin_visit_get_random(self, type, cur=None): + """Randomly select one origin whose last visit was full in the last 3 + months + + """ + cur = self._cursor(cur) + columns = ','.join(self.origin_visit_select_cols) + query = f"""with visits as ( + select * + from origin_visit + where origin_visit.status='full' and + origin_visit.type=%s and + origin_visit.date > now() - '3 months'::interval + ) + select {columns} + from visits as origin_visit + inner join origin + on origin_visit.origin=origin.id + where random() < 0.1 + limit 1 + """ + cur.execute(query, (type, )) + return cur.fetchone() + @staticmethod def mangle_query_key(key, main_table): if key == 'id': @@ -648,30 +672,6 @@ yield from execute_values_generator( cur, query, ((sha1,) for sha1 in sha1s)) - def origin_visit_get_random(self, type, cur=None): - """Randomly select one origin whose last visit was full in the last 3 - months - - """ - cur = self._cursor(cur) - columns = ','.join(self.origin_visit_select_cols) - query = f"""with visits as ( - select * - from origin_visit - where origin_visit.status='full' and - origin_visit.type=%s and - origin_visit.date > now() - '3 months'::interval - ) - select {columns} - from visits as origin_visit - inner join origin - on origin_visit.origin=origin.id - where random() < 0.1 - limit 1 - """ - cur.execute(query, (type, )) - return cur.fetchone() - def origin_id_get_by_url(self, origins, cur=None): """Retrieve origin `(type, url)` from urls if found.""" cur = self._cursor(cur) diff --git a/swh/storage/storage.py b/swh/storage/storage.py --- a/swh/storage/storage.py +++ b/swh/storage/storage.py @@ -1496,6 +1496,24 @@ if origin_visit: return dict(zip(db.origin_visit_get_cols, origin_visit)) + @remote_api_endpoint('origin/visit/get_random') + @timed + @db_transaction() + def origin_visit_get_random( + self, type, db=None, cur=None) -> Mapping[str, Any]: + """Randomly select one origin visit from the archive + + Returns: + dict representing an origin visit, in the same format as + `origin_visit_get`. + + """ + data: Dict[str, Any] = {} + result = db.origin_visit_get_random(type, cur) + if result: + data = dict(zip(db.origin_visit_get_cols, result)) + return data + @remote_api_endpoint('object/find_by_sha1_git') @timed @db_transaction(statement_timeout=2000) @@ -1595,23 +1613,6 @@ else: yield None - @remote_api_endpoint('origin/visit/get_random') - @timed - @db_transaction() - def origin_visit_get_random( - self, type, db=None, cur=None) -> Mapping[str, Any]: - """Randomly select one origin from the archive - - Returns: - origin dict selected randomly on the dataset if found - - """ - data: Dict[str, Any] = {} - result = db.origin_visit_get_random(type, cur) - if result: - data = dict(zip(db.origin_visit_get_cols, result)) - return data - @remote_api_endpoint('origin/get_range') @timed @db_transaction_generator()