Page MenuHomeSoftware Heritage

D474.id.diff
No OneTemporary

D474.id.diff

diff --git a/morane/crossminer_launch.py b/morane/crossminer_launch.py
--- a/morane/crossminer_launch.py
+++ b/morane/crossminer_launch.py
@@ -13,13 +13,13 @@
def __init__(self, db_conn_string='service=mirror-swh'):
self.conn = psycopg2.connect(db_conn_string)
- def execute_query(self, query):
+ def execute_query(self, query, arguments):
"""Connect to swh archive to execute query
"""
try:
cursor = self.conn.cursor()
- cursor.execute(query)
+ cursor.execute(query, arguments)
records = cursor.fetchall()
cursor.close()
return records
@@ -43,14 +43,14 @@
writer.writerow(row)
-def origin_scan_query(min_batch, max_batch, file_name):
+def origin_scan_query(db, min_batch, max_batch, file_name):
"""Retrieve origins between range [min_batch, max_batch[ whose last
visit resulted in a revision targetting a directory holding a
filename matching the pattern `filename`.
"""
limit = max_batch - min_batch
- return """
+ query = """
WITH last_visited AS (
SELECT o.url url, ov.snapshot_id snp, date
FROM origin o
@@ -74,7 +74,9 @@
INNER JOIN revision rev on hbr.revision_sha1 = rev.id
INNER JOIN directory dir on rev.directory = dir.id
INNER JOIN directory_entry_file def on def.id = any(dir.file_entries)
- WHERE def.name='%s'""" % (min_batch, max_batch, limit, file_name)
+ WHERE def.name = %s"""
+
+ return db.execute_query(query, (min_batch, max_batch, limit, file_name))
@click.command()
@@ -96,8 +98,7 @@
min_batch = start_from
max_batch = min_batch + block_size
while True:
- query = origin_scan_query(min_batch, max_batch, pattern_filename)
- records = db.execute_query(query)
+ records = origin_scan_query(db, min_batch, max_batch, pattern_filename)
if not records:
break
name = "%s_%s_origin.csv" % (min_batch, max_batch)

File Metadata

Mime Type
text/plain
Expires
Nov 5 2024, 3:00 PM (11 w, 15 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3222694

Event Timeline