Changeset View
Changeset View
Standalone View
Standalone View
swh/provenance/postgresql/provenancedb_without_path.py
from typing import Generator, Optional, Set, Tuple | from typing import Generator, Optional | ||||
import psycopg2 | |||||
import psycopg2.extras | |||||
from swh.model.model import Sha1Git | from swh.model.model import Sha1Git | ||||
from ..provenance import ProvenanceResult | from ..provenance import ProvenanceResult, RelationType | ||||
from .provenancedb_base import ProvenanceDBBase | from .provenancedb_base import ProvenanceDBBase | ||||
class ProvenanceWithoutPathDB(ProvenanceDBBase): | class ProvenanceWithoutPathDB(ProvenanceDBBase): | ||||
def content_find_first(self, id: Sha1Git) -> Optional[ProvenanceResult]: | def content_find_first(self, id: Sha1Git) -> Optional[ProvenanceResult]: | ||||
self.cursor.execute( | sql = """ | ||||
""" | SELECT C.sha1 AS content, | ||||
SELECT C.sha1 AS blob, | R.sha1 AS revision, | ||||
R.sha1 AS rev, | |||||
R.date AS date, | R.date AS date, | ||||
O.url AS url, | O.url AS origin, | ||||
'\\x'::bytea as path | '\\x'::bytea as path | ||||
FROM content AS C | FROM content AS C | ||||
INNER JOIN content_in_revision AS CR ON (CR.content=C.id) | INNER JOIN content_in_revision AS CR ON (CR.content=C.id) | ||||
INNER JOIN revision as R ON (CR.revision=R.id) | INNER JOIN revision as R ON (CR.revision=R.id) | ||||
LEFT JOIN origin as O ON (R.origin=O.id) | LEFT JOIN origin as O ON (R.origin=O.id) | ||||
WHERE C.sha1=%s | WHERE C.sha1=%s | ||||
ORDER BY date, rev, url ASC LIMIT 1 | ORDER BY date, revision, origin ASC LIMIT 1 | ||||
""", | """ | ||||
(id,), | self.cursor.execute(sql, (id,)) | ||||
) | |||||
row = self.cursor.fetchone() | row = self.cursor.fetchone() | ||||
if row: | return ProvenanceResult(**row) if row is not None else None | ||||
return ProvenanceResult( | |||||
content=row[0], revision=row[1], date=row[2], origin=row[3], path=row[4] | |||||
) | |||||
else: | |||||
return None | |||||
def content_find_all( | def content_find_all( | ||||
self, id: Sha1Git, limit: Optional[int] = None | self, id: Sha1Git, limit: Optional[int] = None | ||||
) -> Generator[ProvenanceResult, None, None]: | ) -> Generator[ProvenanceResult, None, None]: | ||||
early_cut = f"LIMIT {limit}" if limit is not None else "" | early_cut = f"LIMIT {limit}" if limit is not None else "" | ||||
self.cursor.execute( | sql = f""" | ||||
f""" | (SELECT C.sha1 AS content, | ||||
(SELECT C.sha1 AS blob, | R.sha1 AS revision, | ||||
R.sha1 AS rev, | |||||
R.date AS date, | R.date AS date, | ||||
O.url AS url, | O.url AS origin, | ||||
'\\x'::bytea as path | '\\x'::bytea as path | ||||
FROM content AS C | FROM content AS C | ||||
INNER JOIN content_in_revision AS CR ON (CR.content=C.id) | INNER JOIN content_in_revision AS CR ON (CR.content=C.id) | ||||
INNER JOIN revision AS R ON (CR.revision=R.id) | INNER JOIN revision AS R ON (CR.revision=R.id) | ||||
LEFT JOIN origin as O ON (R.origin=O.id) | LEFT JOIN origin as O ON (R.origin=O.id) | ||||
WHERE C.sha1=%s) | WHERE C.sha1=%s) | ||||
UNION | UNION | ||||
(SELECT C.sha1 AS content, | (SELECT C.sha1 AS content, | ||||
R.sha1 AS revision, | R.sha1 AS revision, | ||||
R.date AS date, | R.date AS date, | ||||
O.url AS url, | O.url AS origin, | ||||
'\\x'::bytea as path | '\\x'::bytea as path | ||||
FROM content AS C | FROM content AS C | ||||
INNER JOIN content_in_directory AS CD ON (C.id=CD.content) | INNER JOIN content_in_directory AS CD ON (C.id=CD.content) | ||||
INNER JOIN directory_in_revision AS DR ON (CD.directory=DR.directory) | INNER JOIN directory_in_revision AS DR ON (CD.directory=DR.directory) | ||||
INNER JOIN revision AS R ON (DR.revision=R.id) | INNER JOIN revision AS R ON (DR.revision=R.id) | ||||
LEFT JOIN origin as O ON (R.origin=O.id) | LEFT JOIN origin as O ON (R.origin=O.id) | ||||
WHERE C.sha1=%s) | WHERE C.sha1=%s) | ||||
ORDER BY date, rev, url {early_cut} | ORDER BY date, revision, origin {early_cut} | ||||
""", | """ | ||||
(id, id), | self.cursor.execute(sql, (id, id)) | ||||
) | yield from (ProvenanceResult(**row) for row in self.cursor.fetchall()) | ||||
for row in self.cursor.fetchall(): | |||||
yield ProvenanceResult( | def _relation_uses_location_table(self, relation: RelationType) -> bool: | ||||
content=row[0], revision=row[1], date=row[2], origin=row[3], path=row[4] | return False | ||||
) | |||||
def insert_relation(self, relation: str, data: Set[Tuple[Sha1Git, Sha1Git, bytes]]): | |||||
if data: | |||||
assert relation in ( | |||||
"content_in_revision", | |||||
"content_in_directory", | |||||
"directory_in_revision", | |||||
) | |||||
src, dst = relation.split("_in_") | |||||
psycopg2.extras.execute_values( | |||||
self.cursor, | |||||
f""" | |||||
LOCK TABLE ONLY {relation}; | |||||
INSERT INTO {relation} | |||||
SELECT {src}.id, {dst}.id | |||||
FROM (VALUES %s) AS V(src, dst) | |||||
INNER JOIN {src} on ({src}.sha1=V.src) | |||||
INNER JOIN {dst} on ({dst}.sha1=V.dst) | |||||
""", | |||||
data, | |||||
) | |||||
data.clear() |