Changeset View
Changeset View
Standalone View
Standalone View
swh/provenance/postgresql/provenancedb_base.py
Show First 20 Lines • Show All 58 Lines • ▼ Show 20 Lines | def commit(self, data: Dict[str, Any], raise_on_commit: bool = False) -> bool: | ||||||||||
sha1: data["origin"]["data"][sha1] | sha1: data["origin"]["data"][sha1] | ||||||||||
for sha1 in data["origin"]["added"] | for sha1 in data["origin"]["added"] | ||||||||||
}, | }, | ||||||||||
) | ) | ||||||||||
data["origin"]["data"].clear() | data["origin"]["data"].clear() | ||||||||||
data["origin"]["added"].clear() | data["origin"]["added"].clear() | ||||||||||
# Insert relations from the origin-revision layer | # Insert relations from the origin-revision layer | ||||||||||
self.insert_origin_head(data["revision_in_origin"]) | |||||||||||
self.insert_revision_history(data["revision_before_revision"]) | self.insert_revision_history(data["revision_before_revision"]) | ||||||||||
self.insert_origin_head(data["revision_in_origin"]) | |||||||||||
# Update preferred origins | # Update preferred origins | ||||||||||
self.update_preferred_origin( | self.update_preferred_origin( | ||||||||||
{ | { | ||||||||||
sha1: data["revision_origin"]["data"][sha1] | sha1: data["revision_origin"]["data"][sha1] | ||||||||||
for sha1 in data["revision_origin"]["added"] | for sha1 in data["revision_origin"]["added"] | ||||||||||
} | } | ||||||||||
) | ) | ||||||||||
▲ Show 20 Lines • Show All 59 Lines • ▼ Show 20 Lines | def insert_origin(self, data: Dict[Sha1Git, str]): | ||||||||||
data.items(), | data.items(), | ||||||||||
) | ) | ||||||||||
# XXX: not sure if Python takes a reference or a copy. | # XXX: not sure if Python takes a reference or a copy. | ||||||||||
# This might be useless! | # This might be useless! | ||||||||||
data.clear() | data.clear() | ||||||||||
def insert_origin_head(self, data: Set[Tuple[Sha1Git, Sha1Git]]): | def insert_origin_head(self, data: Set[Tuple[Sha1Git, Sha1Git]]): | ||||||||||
if data: | if data: | ||||||||||
# Insert revisions first, to ensure "foreign keys" exist | |||||||||||
# Origins are assumed to be already inserted (they require knowing the url) | |||||||||||
psycopg2.extras.execute_values( | |||||||||||
self.cursor, | |||||||||||
""" | |||||||||||
douardda: unless I'm wrong, the locking of the table in this very case is not needed, thanks to the `on… | |||||||||||
Done Inline ActionsNot really sure, but this locks will be reworked soon so I don't think is worth worrying about them right now aeviso: Not really sure, but this locks will be reworked soon so I don't think is worth worrying about… | |||||||||||
LOCK TABLE ONLY revision; | |||||||||||
INSERT INTO revision(sha1) VALUES %s | |||||||||||
ON CONFLICT DO NOTHING | |||||||||||
""", | |||||||||||
Done Inline Actions
nitpick vlorentz: nitpick | |||||||||||
Done Inline ActionsAgreed. I missed this one. Thanks! aeviso: Agreed. I missed this one. Thanks! | |||||||||||
Done Inline ActionsI've realized this set is actually there to avoid trying to insert twice the same revision (the input is a set of tuples, not a dictionary in this case) aeviso: I've realized this `set` is actually there to avoid trying to insert twice the same revision… | |||||||||||
Done Inline Actionsbeware: the form {x for x in stuff} is a set (built by comprehension). The dict would look like {x: "foo" for x in stuff} douardda: beware: the form
`{x for x in stuff}`
is a set (built by comprehension). The dict would… | |||||||||||
Done Inline ActionsI don't follow. There is no dict here aeviso: I don't follow. There is no dict here | |||||||||||
Done Inline Actionsset((rev,) for rev, _ in data) and {(rev,) for rev, _ in data} do exactly the same thing: building a set; which does always deduplicate. vlorentz: `set((rev,) for rev, _ in data)` and `{(rev,) for rev, _ in data}` do exactly the same thing… | |||||||||||
Done Inline ActionsSure, but this code is replaced in the next few commits, so no point rebasing for such a minor detail. aeviso: Sure, but this code is replaced in the next few commits, so no point rebasing for such a minor… | |||||||||||
{(rev,) for rev, _ in data}, | |||||||||||
) | |||||||||||
psycopg2.extras.execute_values( | psycopg2.extras.execute_values( | ||||||||||
self.cursor, | self.cursor, | ||||||||||
# XXX: not clear how conflicts are handled here! | # XXX: not clear how conflicts are handled here! | ||||||||||
""" | """ | ||||||||||
LOCK TABLE ONLY revision_in_origin; | LOCK TABLE ONLY revision_in_origin; | ||||||||||
INSERT INTO revision_in_origin | INSERT INTO revision_in_origin | ||||||||||
SELECT R.id, O.id | SELECT R.id, O.id | ||||||||||
FROM (VALUES %s) AS V(rev, org) | FROM (VALUES %s) AS V(rev, org) | ||||||||||
INNER JOIN revision AS R on (R.sha1=V.rev) | INNER JOIN revision AS R on (R.sha1=V.rev) | ||||||||||
INNER JOIN origin AS O on (O.sha1=V.org) | INNER JOIN origin AS O on (O.sha1=V.org) | ||||||||||
ON CONFLICT DO NOTHING | |||||||||||
""", | """, | ||||||||||
data, | data, | ||||||||||
) | ) | ||||||||||
data.clear() | data.clear() | ||||||||||
def insert_relation(self, relation: str, data: Set[Tuple[Sha1Git, Sha1Git, bytes]]): | def insert_relation(self, relation: str, data: Set[Tuple[Sha1Git, Sha1Git, bytes]]): | ||||||||||
... | ... | ||||||||||
def insert_revision_history(self, data: Dict[Sha1Git, Sha1Git]): | def insert_revision_history(self, data: Dict[Sha1Git, Set[Sha1Git]]): | ||||||||||
if data: | if data: | ||||||||||
# print(f"Inserting histories: {data}") | |||||||||||
# Insert revisions first, to ensure "foreign keys" exist | |||||||||||
revisions = set(data) | |||||||||||
for rev in data: | |||||||||||
revisions.update(data[rev]) | |||||||||||
psycopg2.extras.execute_values( | |||||||||||
self.cursor, | |||||||||||
""" | |||||||||||
LOCK TABLE ONLY revision; | |||||||||||
INSERT INTO revision(sha1) VALUES %s | |||||||||||
ON CONFLICT DO NOTHING | |||||||||||
""", | |||||||||||
((rev,) for rev in revisions), | |||||||||||
) | |||||||||||
values = [[(prev, next) for next in data[prev]] for prev in data] | values = [[(prev, next) for next in data[prev]] for prev in data] | ||||||||||
psycopg2.extras.execute_values( | psycopg2.extras.execute_values( | ||||||||||
self.cursor, | self.cursor, | ||||||||||
# XXX: not clear how conflicts are handled here! | # XXX: not clear how conflicts are handled here! | ||||||||||
""" | """ | ||||||||||
LOCK TABLE ONLY revision_before_revision; | LOCK TABLE ONLY revision_before_revision; | ||||||||||
INSERT INTO revision_before_revision | INSERT INTO revision_before_revision | ||||||||||
SELECT P.id, N.id | SELECT P.id, N.id | ||||||||||
FROM (VALUES %s) AS V(prev, next) | FROM (VALUES %s) AS V(prev, next) | ||||||||||
INNER JOIN revision AS P on (P.sha1=V.prev) | INNER JOIN revision AS P on (P.sha1=V.prev) | ||||||||||
INNER JOIN revision AS N on (N.sha1=V.next) | INNER JOIN revision AS N on (N.sha1=V.next) | ||||||||||
ON CONFLICT DO NOTHING | |||||||||||
""", | """, | ||||||||||
tuple(sum(values, [])), | sum(values, []), | ||||||||||
) | ) | ||||||||||
data.clear() | data.clear() | ||||||||||
def revision_get_preferred_origin(self, revision: Sha1Git) -> Optional[Sha1Git]: | def revision_get_preferred_origin(self, revision: Sha1Git) -> Optional[Sha1Git]: | ||||||||||
self.cursor.execute( | self.cursor.execute( | ||||||||||
""" | """ | ||||||||||
SELECT O.sha1 | SELECT O.sha1 | ||||||||||
FROM revision AS R | FROM revision AS R | ||||||||||
▲ Show 20 Lines • Show All 50 Lines • Show Last 20 Lines |
unless I'm wrong, the locking of the table in this very case is not needed, thanks to the on conflict do nothing part.
If true, some other table lockings can be removed safely.
@olasd am I right?