Page MenuHomeSoftware Heritage

D2445.id8649.diff
No OneTemporary

D2445.id8649.diff

diff --git a/swh/storage/db.py b/swh/storage/db.py
--- a/swh/storage/db.py
+++ b/swh/storage/db.py
@@ -492,6 +492,30 @@
return None
return r
+ def origin_visit_get_random(self, type, cur=None):
+ """Randomly select one origin whose last visit was full in the last 3
+ months
+
+ """
+ cur = self._cursor(cur)
+ columns = ','.join(self.origin_visit_select_cols)
+ query = f"""with visits as (
+ select *
+ from origin_visit
+ where origin_visit.status='full' and
+ origin_visit.type=%s and
+ origin_visit.date > now() - '3 months'::interval
+ )
+ select {columns}
+ from visits as origin_visit
+ inner join origin
+ on origin_visit.origin=origin.id
+ where random() < 0.1
+ limit 1
+ """
+ cur.execute(query, (type, ))
+ return cur.fetchone()
+
@staticmethod
def mangle_query_key(key, main_table):
if key == 'id':
@@ -648,30 +672,6 @@
yield from execute_values_generator(
cur, query, ((sha1,) for sha1 in sha1s))
- def origin_visit_get_random(self, type, cur=None):
- """Randomly select one origin whose last visit was full in the last 3
- months
-
- """
- cur = self._cursor(cur)
- columns = ','.join(self.origin_visit_select_cols)
- query = f"""with visits as (
- select *
- from origin_visit
- where origin_visit.status='full' and
- origin_visit.type=%s and
- origin_visit.date > now() - '3 months'::interval
- )
- select {columns}
- from visits as origin_visit
- inner join origin
- on origin_visit.origin=origin.id
- where random() < 0.1
- limit 1
- """
- cur.execute(query, (type, ))
- return cur.fetchone()
-
def origin_id_get_by_url(self, origins, cur=None):
"""Retrieve origin `(type, url)` from urls if found."""
cur = self._cursor(cur)
diff --git a/swh/storage/storage.py b/swh/storage/storage.py
--- a/swh/storage/storage.py
+++ b/swh/storage/storage.py
@@ -1496,6 +1496,24 @@
if origin_visit:
return dict(zip(db.origin_visit_get_cols, origin_visit))
+ @remote_api_endpoint('origin/visit/get_random')
+ @timed
+ @db_transaction()
+ def origin_visit_get_random(
+ self, type, db=None, cur=None) -> Mapping[str, Any]:
+ """Randomly select one origin visit from the archive
+
+ Returns:
+ dict representing an origin visit, in the same format as
+ `origin_visit_get`.
+
+ """
+ data: Dict[str, Any] = {}
+ result = db.origin_visit_get_random(type, cur)
+ if result:
+ data = dict(zip(db.origin_visit_get_cols, result))
+ return data
+
@remote_api_endpoint('object/find_by_sha1_git')
@timed
@db_transaction(statement_timeout=2000)
@@ -1595,23 +1613,6 @@
else:
yield None
- @remote_api_endpoint('origin/visit/get_random')
- @timed
- @db_transaction()
- def origin_visit_get_random(
- self, type, db=None, cur=None) -> Mapping[str, Any]:
- """Randomly select one origin from the archive
-
- Returns:
- origin dict selected randomly on the dataset if found
-
- """
- data: Dict[str, Any] = {}
- result = db.origin_visit_get_random(type, cur)
- if result:
- data = dict(zip(db.origin_visit_get_cols, result))
- return data
-
@remote_api_endpoint('origin/get_range')
@timed
@db_transaction_generator()

File Metadata

Mime Type
text/plain
Expires
Thu, Jul 3, 3:39 PM (1 w, 6 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3232915

Event Timeline