Page MenuHomeSoftware Heritage

D5842.id21022.diff
No OneTemporary

D5842.id21022.diff

diff --git a/swh/provenance/postgresql/provenancedb_with_path.py b/swh/provenance/postgresql/provenancedb_with_path.py
--- a/swh/provenance/postgresql/provenancedb_with_path.py
+++ b/swh/provenance/postgresql/provenancedb_with_path.py
@@ -74,28 +74,6 @@
Also insert missing location entries in the 'location' table.
"""
if data:
- # TODO: find a better way of doing this; might be doable in a couple of
- # SQL queries (one to insert missing entries in the location' table,
- # one to insert entries in the relation)
-
- # Resolve src ids
- src_sha1s = tuple(set(sha1 for (sha1, _, _) in data))
- fmt = ",".join(["%s"] * len(src_sha1s))
- self.cursor.execute(
- f"""SELECT sha1, id FROM {src} WHERE sha1 IN ({fmt})""",
- src_sha1s,
- )
- src_values = dict(self.cursor.fetchall())
-
- # Resolve dst ids
- dst_sha1s = tuple(set(sha1 for (_, sha1, _) in data))
- fmt = ",".join(["%s"] * len(dst_sha1s))
- self.cursor.execute(
- f"""SELECT sha1, id FROM {dst} WHERE sha1 IN ({fmt})""",
- dst_sha1s,
- )
- dst_values = dict(self.cursor.fetchall())
-
# insert missing locations
locations = tuple(set((loc,) for (_, _, loc) in data))
psycopg2.extras.execute_values(
@@ -107,26 +85,14 @@
""",
locations,
)
- # fetch location ids
- fmt = ",".join(["%s"] * len(locations))
- self.cursor.execute(
- f"SELECT path, id FROM location WHERE path IN ({fmt})",
- locations,
- )
- loc_ids = dict(self.cursor.fetchall())
-
- # Insert values in relation
- rows = [
- (src_values[sha1_src], dst_values[sha1_dst], loc_ids[loc])
- for (sha1_src, sha1_dst, loc) in data
- ]
- psycopg2.extras.execute_values(
- self.cursor,
- f"""
- LOCK TABLE ONLY {relation};
- INSERT INTO {relation} VALUES %s
- ON CONFLICT DO NOTHING
- """,
- rows,
- )
+ sql = f"""
+ LOCK TABLE ONLY {relation};
+ INSERT INTO {relation}
+ SELECT {src}.id, {dst}.id, location.id
+ FROM (VALUES %s) AS V(src, dst, path)
+ INNER JOIN {src} on ({src}.sha1=V.src)
+ INNER JOIN {dst} on ({dst}.sha1=V.dst)
+ INNER JOIN location on (location.path=V.path)
+ """
+ psycopg2.extras.execute_values(self.cursor, sql, data)
data.clear()
diff --git a/swh/provenance/postgresql/provenancedb_without_path.py b/swh/provenance/postgresql/provenancedb_without_path.py
--- a/swh/provenance/postgresql/provenancedb_without_path.py
+++ b/swh/provenance/postgresql/provenancedb_without_path.py
@@ -1,6 +1,4 @@
from datetime import datetime
-import itertools
-import operator
from typing import Generator, Optional, Set, Tuple
import psycopg2
@@ -87,36 +85,13 @@
self, src: str, dst: str, relation: str, data: Set[Tuple[bytes, bytes, bytes]]
):
if data:
- # Resolve src ids
- src_values = dict().fromkeys(map(operator.itemgetter(0), data))
- values = ", ".join(itertools.repeat("%s", len(src_values)))
- self.cursor.execute(
- f"""SELECT sha1, id FROM {src} WHERE sha1 IN ({values})""",
- tuple(src_values),
- )
- src_values = dict(self.cursor.fetchall())
-
- # Resolve dst ids
- dst_values = dict().fromkeys(map(operator.itemgetter(1), data))
- values = ", ".join(itertools.repeat("%s", len(dst_values)))
- self.cursor.execute(
- f"""SELECT sha1, id FROM {dst} WHERE sha1 IN ({values})""",
- tuple(dst_values),
- )
- dst_values = dict(self.cursor.fetchall())
-
- # Insert values in relation
- rows = map(
- lambda row: (src_values[row[0]], dst_values[row[1]]),
- data,
- )
- psycopg2.extras.execute_values(
- self.cursor,
- f"""
- LOCK TABLE ONLY {relation};
- INSERT INTO {relation} VALUES %s
- ON CONFLICT DO NOTHING
- """,
- rows,
- )
+ sql = f"""
+ LOCK TABLE ONLY {relation};
+ INSERT INTO {relation}
+ SELECT {src}.id, {dst}.id
+ FROM (VALUES %s) AS V(src, dst)
+ INNER JOIN {src} on ({src}.sha1=V.src)
+ INNER JOIN {dst} on ({dst}.sha1=V.dst)
+ """
+ psycopg2.extras.execute_values(self.cursor, sql, data)
data.clear()

File Metadata

Mime Type
text/plain
Expires
Sun, Aug 24, 5:56 PM (4 d, 8 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3225657

Event Timeline