diff --git a/swh/provenance/postgresql/provenancedb_base.py b/swh/provenance/postgresql/provenancedb_base.py --- a/swh/provenance/postgresql/provenancedb_base.py +++ b/swh/provenance/postgresql/provenancedb_base.py @@ -33,7 +33,6 @@ try: # First insert entities for entity in ("content", "directory", "revision"): - self.insert_entity( entity, { @@ -43,12 +42,12 @@ ) # Relations should come after ids for entities were resolved - for rel_table in ( + for relation in ( "content_in_revision", "content_in_directory", "directory_in_revision", ): - self.insert_relation(rel_table, data[rel_table]) + self.insert_relation(relation, data[relation]) # TODO: this should be updated when origin-revision layer gets properly # updated. @@ -102,8 +101,8 @@ f""" LOCK TABLE ONLY {entity}; INSERT INTO {entity}(sha1, date) VALUES %s - ON CONFLICT (sha1) DO - UPDATE SET date=LEAST(EXCLUDED.date,{entity}.date) + ON CONFLICT (sha1) DO + UPDATE SET date=LEAST(EXCLUDED.date,{entity}.date) """, data.items(), ) @@ -130,8 +129,8 @@ """ LOCK TABLE ONLY origin; INSERT INTO origin(url) VALUES (%s) - ON CONFLICT DO NOTHING - RETURNING id + ON CONFLICT DO NOTHING + RETURNING id """, (url,), ) diff --git a/swh/provenance/postgresql/provenancedb_with_path.py b/swh/provenance/postgresql/provenancedb_with_path.py --- a/swh/provenance/postgresql/provenancedb_with_path.py +++ b/swh/provenance/postgresql/provenancedb_with_path.py @@ -76,7 +76,6 @@ "content_in_directory", "directory_in_revision", ) - # insert missing locations src, dst = relation.split("_in_") # insert missing locations @@ -86,18 +85,21 @@ """ LOCK TABLE ONLY location; INSERT INTO location(path) VALUES %s - ON CONFLICT (path) DO NOTHING + ON CONFLICT (path) DO NOTHING """, locations, ) - sql = f""" - LOCK TABLE ONLY {relation}; - INSERT INTO {relation} - SELECT {src}.id, {dst}.id, location.id - FROM (VALUES %s) AS V(src, dst, path) - INNER JOIN {src} on ({src}.sha1=V.src) - INNER JOIN {dst} on ({dst}.sha1=V.dst) - INNER JOIN location on (location.path=V.path) - """ - psycopg2.extras.execute_values(self.cursor, sql, data) + psycopg2.extras.execute_values( + self.cursor, + f""" + LOCK TABLE ONLY {relation}; + INSERT INTO {relation} + SELECT {src}.id, {dst}.id, location.id + FROM (VALUES %s) AS V(src, dst, path) + INNER JOIN {src} on ({src}.sha1=V.src) + INNER JOIN {dst} on ({dst}.sha1=V.dst) + INNER JOIN location on (location.path=V.path) + """, + data, + ) data.clear() diff --git a/swh/provenance/postgresql/provenancedb_without_path.py b/swh/provenance/postgresql/provenancedb_without_path.py --- a/swh/provenance/postgresql/provenancedb_without_path.py +++ b/swh/provenance/postgresql/provenancedb_without_path.py @@ -68,16 +68,18 @@ "content_in_directory", "directory_in_revision", ) - # insert missing locations src, dst = relation.split("_in_") - sql = f""" - LOCK TABLE ONLY {relation}; - INSERT INTO {relation} - SELECT {src}.id, {dst}.id - FROM (VALUES %s) AS V(src, dst) - INNER JOIN {src} on ({src}.sha1=V.src) - INNER JOIN {dst} on ({dst}.sha1=V.dst) - """ - psycopg2.extras.execute_values(self.cursor, sql, data) + psycopg2.extras.execute_values( + self.cursor, + f""" + LOCK TABLE ONLY {relation}; + INSERT INTO {relation} + SELECT {src}.id, {dst}.id + FROM (VALUES %s) AS V(src, dst) + INNER JOIN {src} on ({src}.sha1=V.src) + INNER JOIN {dst} on ({dst}.sha1=V.dst) + """, + data, + ) data.clear() diff --git a/swh/provenance/provenance.py b/swh/provenance/provenance.py --- a/swh/provenance/provenance.py +++ b/swh/provenance/provenance.py @@ -159,7 +159,7 @@ self.cache = new_cache() def commit(self): - # TODO: for now we just forward the write_cache. This should be improved! + # TODO: for now we just forward the cache. This should be improved! while not self.storage.commit(self.cache, raise_on_commit=self.raise_on_commit): logging.warning( f"Unable to commit cached information {self.write_cache}. Retrying..." diff --git a/swh/provenance/sql/30-schema.sql b/swh/provenance/sql/30-schema.sql --- a/swh/provenance/sql/30-schema.sql +++ b/swh/provenance/sql/30-schema.sql @@ -2,9 +2,9 @@ create table dbversion ( - version int primary key, - release timestamptz, - description text + version int primary key, + release timestamptz, + description text ); comment on table dbversion is 'Details of current db version'; @@ -14,7 +14,7 @@ -- latest schema version insert into dbversion(version, release, description) - values(1, now(), 'Work In Progress'); + values(1, now(), 'Work In Progress'); -- a Git object ID, i.e., a Git-style salted SHA1 checksum create domain sha1_git as bytea check (length(value) = 20); @@ -77,7 +77,7 @@ ( content bigint not null, -- internal identifier of the content blob revision bigint not null, -- internal identifier of the revision where the blob appears for the first time - location bigint -- location of the content relative to the revision root directory + location bigint -- location of the content relative to the revision root directory -- foreign key (blob) references content (id), -- foreign key (rev) references revision (id), -- foreign key (loc) references location (id)