Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9749538
D5842.id21022.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
4 KB
Subscribers
None
D5842.id21022.diff
View Options
diff --git a/swh/provenance/postgresql/provenancedb_with_path.py b/swh/provenance/postgresql/provenancedb_with_path.py
--- a/swh/provenance/postgresql/provenancedb_with_path.py
+++ b/swh/provenance/postgresql/provenancedb_with_path.py
@@ -74,28 +74,6 @@
Also insert missing location entries in the 'location' table.
"""
if data:
- # TODO: find a better way of doing this; might be doable in a couple of
- # SQL queries (one to insert missing entries in the location' table,
- # one to insert entries in the relation)
-
- # Resolve src ids
- src_sha1s = tuple(set(sha1 for (sha1, _, _) in data))
- fmt = ",".join(["%s"] * len(src_sha1s))
- self.cursor.execute(
- f"""SELECT sha1, id FROM {src} WHERE sha1 IN ({fmt})""",
- src_sha1s,
- )
- src_values = dict(self.cursor.fetchall())
-
- # Resolve dst ids
- dst_sha1s = tuple(set(sha1 for (_, sha1, _) in data))
- fmt = ",".join(["%s"] * len(dst_sha1s))
- self.cursor.execute(
- f"""SELECT sha1, id FROM {dst} WHERE sha1 IN ({fmt})""",
- dst_sha1s,
- )
- dst_values = dict(self.cursor.fetchall())
-
# insert missing locations
locations = tuple(set((loc,) for (_, _, loc) in data))
psycopg2.extras.execute_values(
@@ -107,26 +85,14 @@
""",
locations,
)
- # fetch location ids
- fmt = ",".join(["%s"] * len(locations))
- self.cursor.execute(
- f"SELECT path, id FROM location WHERE path IN ({fmt})",
- locations,
- )
- loc_ids = dict(self.cursor.fetchall())
-
- # Insert values in relation
- rows = [
- (src_values[sha1_src], dst_values[sha1_dst], loc_ids[loc])
- for (sha1_src, sha1_dst, loc) in data
- ]
- psycopg2.extras.execute_values(
- self.cursor,
- f"""
- LOCK TABLE ONLY {relation};
- INSERT INTO {relation} VALUES %s
- ON CONFLICT DO NOTHING
- """,
- rows,
- )
+ sql = f"""
+ LOCK TABLE ONLY {relation};
+ INSERT INTO {relation}
+ SELECT {src}.id, {dst}.id, location.id
+ FROM (VALUES %s) AS V(src, dst, path)
+ INNER JOIN {src} on ({src}.sha1=V.src)
+ INNER JOIN {dst} on ({dst}.sha1=V.dst)
+ INNER JOIN location on (location.path=V.path)
+ """
+ psycopg2.extras.execute_values(self.cursor, sql, data)
data.clear()
diff --git a/swh/provenance/postgresql/provenancedb_without_path.py b/swh/provenance/postgresql/provenancedb_without_path.py
--- a/swh/provenance/postgresql/provenancedb_without_path.py
+++ b/swh/provenance/postgresql/provenancedb_without_path.py
@@ -1,6 +1,4 @@
from datetime import datetime
-import itertools
-import operator
from typing import Generator, Optional, Set, Tuple
import psycopg2
@@ -87,36 +85,13 @@
self, src: str, dst: str, relation: str, data: Set[Tuple[bytes, bytes, bytes]]
):
if data:
- # Resolve src ids
- src_values = dict().fromkeys(map(operator.itemgetter(0), data))
- values = ", ".join(itertools.repeat("%s", len(src_values)))
- self.cursor.execute(
- f"""SELECT sha1, id FROM {src} WHERE sha1 IN ({values})""",
- tuple(src_values),
- )
- src_values = dict(self.cursor.fetchall())
-
- # Resolve dst ids
- dst_values = dict().fromkeys(map(operator.itemgetter(1), data))
- values = ", ".join(itertools.repeat("%s", len(dst_values)))
- self.cursor.execute(
- f"""SELECT sha1, id FROM {dst} WHERE sha1 IN ({values})""",
- tuple(dst_values),
- )
- dst_values = dict(self.cursor.fetchall())
-
- # Insert values in relation
- rows = map(
- lambda row: (src_values[row[0]], dst_values[row[1]]),
- data,
- )
- psycopg2.extras.execute_values(
- self.cursor,
- f"""
- LOCK TABLE ONLY {relation};
- INSERT INTO {relation} VALUES %s
- ON CONFLICT DO NOTHING
- """,
- rows,
- )
+ sql = f"""
+ LOCK TABLE ONLY {relation};
+ INSERT INTO {relation}
+ SELECT {src}.id, {dst}.id
+ FROM (VALUES %s) AS V(src, dst)
+ INNER JOIN {src} on ({src}.sha1=V.src)
+ INNER JOIN {dst} on ({dst}.sha1=V.dst)
+ """
+ psycopg2.extras.execute_values(self.cursor, sql, data)
data.clear()
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, Aug 24, 5:56 PM (4 d, 8 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3225657
Attached To
D5842: Refactor the provenanceDB.insert_relation()
Event Timeline
Log In to Comment