Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9341044
D5943.id21367.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
5 KB
Subscribers
None
D5943.id21367.diff
View Options
diff --git a/swh/provenance/postgresql/provenancedb_base.py b/swh/provenance/postgresql/provenancedb_base.py
--- a/swh/provenance/postgresql/provenancedb_base.py
+++ b/swh/provenance/postgresql/provenancedb_base.py
@@ -64,8 +64,8 @@
data["origin"]["added"].clear()
# Insert relations from the origin-revision layer
- self.insert_origin_head(data["revision_in_origin"])
self.insert_revision_history(data["revision_before_revision"])
+ self.insert_origin_head(data["revision_in_origin"])
# Update preferred origins
self.update_preferred_origin(
@@ -141,16 +141,29 @@
def insert_origin_head(self, data: Set[Tuple[Sha1Git, Sha1Git]]):
if data:
+ # Insert revisions first, to ensure "foreign keys" exist
+ # Origins are assumed to be already inserted (they require knowing the url)
+ psycopg2.extras.execute_values(
+ self.cursor,
+ """
+ LOCK TABLE ONLY revision;
+ INSERT INTO revision(sha1) VALUES %s
+ ON CONFLICT DO NOTHING
+ """,
+ set((rev,) for rev, _ in data),
+ )
+
psycopg2.extras.execute_values(
self.cursor,
# XXX: not clear how conflicts are handled here!
"""
LOCK TABLE ONLY revision_in_origin;
INSERT INTO revision_in_origin
- SELECT R.id, O.id
- FROM (VALUES %s) AS V(rev, org)
- INNER JOIN revision AS R on (R.sha1=V.rev)
- INNER JOIN origin AS O on (O.sha1=V.org)
+ SELECT R.id, O.id
+ FROM (VALUES %s) AS V(rev, org)
+ INNER JOIN revision AS R on (R.sha1=V.rev)
+ INNER JOIN origin AS O on (O.sha1=V.org)
+ ON CONFLICT DO NOTHING
""",
data,
)
@@ -159,8 +172,23 @@
def insert_relation(self, relation: str, data: Set[Tuple[Sha1Git, Sha1Git, bytes]]):
...
- def insert_revision_history(self, data: Dict[Sha1Git, Sha1Git]):
+ def insert_revision_history(self, data: Dict[Sha1Git, Set[Sha1Git]]):
if data:
+ # print(f"Inserting histories: {data}")
+ # Insert revisions first, to ensure "foreign keys" exist
+ revisions = set(data)
+ for rev in data:
+ revisions.update(data[rev])
+ psycopg2.extras.execute_values(
+ self.cursor,
+ """
+ LOCK TABLE ONLY revision;
+ INSERT INTO revision(sha1) VALUES %s
+ ON CONFLICT DO NOTHING
+ """,
+ ((rev,) for rev in revisions),
+ )
+
values = [[(prev, next) for next in data[prev]] for prev in data]
psycopg2.extras.execute_values(
self.cursor,
@@ -168,12 +196,13 @@
"""
LOCK TABLE ONLY revision_before_revision;
INSERT INTO revision_before_revision
- SELECT P.id, N.id
- FROM (VALUES %s) AS V(prev, next)
- INNER JOIN revision AS P on (P.sha1=V.prev)
- INNER JOIN revision AS N on (N.sha1=V.next)
+ SELECT P.id, N.id
+ FROM (VALUES %s) AS V(prev, next)
+ INNER JOIN revision AS P on (P.sha1=V.prev)
+ INNER JOIN revision AS N on (N.sha1=V.next)
+ ON CONFLICT DO NOTHING
""",
- tuple(sum(values, [])),
+ sum(values, []),
)
data.clear()
diff --git a/swh/provenance/provenance.py b/swh/provenance/provenance.py
--- a/swh/provenance/provenance.py
+++ b/swh/provenance/provenance.py
@@ -109,7 +109,7 @@
class DatetimeCache(TypedDict):
- data: Dict[Sha1Git, datetime]
+ data: Dict[Sha1Git, Optional[datetime]]
added: Set[Sha1Git]
@@ -250,15 +250,17 @@
missing_ids = set(id for id in ids if id not in cache)
if missing_ids:
cache["data"].update(self.storage.get_dates(entity, list(missing_ids)))
- return {sha1: cache["data"][sha1] for sha1 in ids if sha1 in cache["data"]}
+ return {
+ sha1: date
+ for sha1, date in cache["data"].items()
+ if sha1 in ids and date is not None
+ }
def origin_add(self, origin: OriginEntry) -> None:
self.cache["origin"]["data"][origin.id] = origin.url
self.cache["origin"]["added"].add(origin.id)
def revision_add(self, revision: RevisionEntry):
- # Add current revision to the compact DB
- assert revision.date is not None
self.cache["revision"]["data"][revision.id] = revision.date
self.cache["revision"]["added"].add(revision.id)
diff --git a/swh/provenance/sql/30-schema.sql b/swh/provenance/sql/30-schema.sql
--- a/swh/provenance/sql/30-schema.sql
+++ b/swh/provenance/sql/30-schema.sql
@@ -47,7 +47,7 @@
(
id bigserial primary key, -- internal identifier of the revision
sha1 sha1_git unique not null, -- intrinsic identifier of the revision
- date timestamptz not null, -- timestamp of the revision
+ date timestamptz, -- timestamp of the revision
origin bigint -- id of the preferred origin
-- foreign key (org) references origin (id)
);
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Jul 3, 11:33 AM (3 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3232469
Attached To
D5943: Fix database queries related to the origin-revision layer
Event Timeline
Log In to Comment