Changeset View
Changeset View
Standalone View
Standalone View
swh/provenance/postgresql/provenancedb_with_path.py
Show First 20 Lines • Show All 144 Lines • ▼ Show 20 Lines | def insert_location(self, src0_table, src1_table, dst_table): | ||||
# TODO: find a better way of doing this; might be doable in a coupls of | # TODO: find a better way of doing this; might be doable in a coupls of | ||||
# SQL queries (one to insert missing entries in the location' table, | # SQL queries (one to insert missing entries in the location' table, | ||||
# one to insert entries in the dst_table) | # one to insert entries in the dst_table) | ||||
# Resolve src0 ids | # Resolve src0 ids | ||||
src0_sha1s = tuple(set(sha1 for (sha1, _, _) in self.insert_cache[dst_table])) | src0_sha1s = tuple(set(sha1 for (sha1, _, _) in self.insert_cache[dst_table])) | ||||
fmt = ",".join(["%s"] * len(src0_sha1s)) | fmt = ",".join(["%s"] * len(src0_sha1s)) | ||||
self.cursor.execute( | self.cursor.execute( | ||||
f"""SELECT sha1, id FROM {src0_table} WHERE sha1 IN ({fmt})""", src0_sha1s, | f"""SELECT sha1, id FROM {src0_table} WHERE sha1 IN ({fmt})""", | ||||
src0_sha1s, | |||||
) | ) | ||||
src0_values = dict(self.cursor.fetchall()) | src0_values = dict(self.cursor.fetchall()) | ||||
# Resolve src1 ids | # Resolve src1 ids | ||||
src1_sha1s = tuple(set(sha1 for (_, sha1, _) in self.insert_cache[dst_table])) | src1_sha1s = tuple(set(sha1 for (_, sha1, _) in self.insert_cache[dst_table])) | ||||
fmt = ",".join(["%s"] * len(src1_sha1s)) | fmt = ",".join(["%s"] * len(src1_sha1s)) | ||||
self.cursor.execute( | self.cursor.execute( | ||||
f"""SELECT sha1, id FROM {src1_table} WHERE sha1 IN ({fmt})""", src1_sha1s, | f"""SELECT sha1, id FROM {src1_table} WHERE sha1 IN ({fmt})""", | ||||
src1_sha1s, | |||||
) | ) | ||||
src1_values = dict(self.cursor.fetchall()) | src1_values = dict(self.cursor.fetchall()) | ||||
# insert missing locations | # insert missing locations | ||||
locations = tuple(set((loc,) for (_, _, loc) in self.insert_cache[dst_table])) | locations = tuple(set((loc,) for (_, _, loc) in self.insert_cache[dst_table])) | ||||
psycopg2.extras.execute_values( | psycopg2.extras.execute_values( | ||||
self.cursor, | self.cursor, | ||||
""" | """ | ||||
INSERT INTO location(path) VALUES %s | INSERT INTO location(path) VALUES %s | ||||
ON CONFLICT (path) DO NOTHING | ON CONFLICT (path) DO NOTHING | ||||
""", | """, | ||||
locations, | locations, | ||||
) | ) | ||||
# fetch location ids | # fetch location ids | ||||
fmt = ",".join(["%s"] * len(locations)) | fmt = ",".join(["%s"] * len(locations)) | ||||
self.cursor.execute( | self.cursor.execute( | ||||
f"SELECT path, id FROM location WHERE path IN ({fmt})", locations, | f"SELECT path, id FROM location WHERE path IN ({fmt})", | ||||
locations, | |||||
) | ) | ||||
loc_ids = dict(self.cursor.fetchall()) | loc_ids = dict(self.cursor.fetchall()) | ||||
# Insert values in dst_table | # Insert values in dst_table | ||||
rows = [ | rows = [ | ||||
(src0_values[sha1_src], src1_values[sha1_dst], loc_ids[loc]) | (src0_values[sha1_src], src1_values[sha1_dst], loc_ids[loc]) | ||||
for (sha1_src, sha1_dst, loc) in self.insert_cache[dst_table] | for (sha1_src, sha1_dst, loc) in self.insert_cache[dst_table] | ||||
] | ] | ||||
psycopg2.extras.execute_values( | psycopg2.extras.execute_values( | ||||
self.cursor, | self.cursor, | ||||
f"""INSERT INTO {dst_table} VALUES %s | f"""INSERT INTO {dst_table} VALUES %s | ||||
ON CONFLICT DO NOTHING""", | ON CONFLICT DO NOTHING""", | ||||
rows, | rows, | ||||
) | ) | ||||
self.insert_cache[dst_table].clear() | self.insert_cache[dst_table].clear() |