Changeset View
Changeset View
Standalone View
Standalone View
swh/provenance/postgresql/provenancedb_with_path.py
from datetime import datetime | |||||
from typing import Generator, Optional, Set, Tuple | from typing import Generator, Optional, Set, Tuple | ||||
import psycopg2 | import psycopg2 | ||||
import psycopg2.extras | import psycopg2.extras | ||||
from swh.model.model import Sha1Git | from swh.model.model import Sha1Git | ||||
from ..provenance import ProvenanceResult | |||||
from .provenancedb_base import ProvenanceDBBase | from .provenancedb_base import ProvenanceDBBase | ||||
class ProvenanceWithPathDB(ProvenanceDBBase): | class ProvenanceWithPathDB(ProvenanceDBBase): | ||||
def content_find_first( | def content_find_first(self, id: Sha1Git) -> Optional[ProvenanceResult]: | ||||
self, id: Sha1Git | |||||
) -> Optional[Tuple[Sha1Git, Sha1Git, datetime, bytes]]: | |||||
self.cursor.execute( | self.cursor.execute( | ||||
""" | """ | ||||
SELECT C.sha1 AS blob, | SELECT C.sha1 AS blob, | ||||
R.sha1 AS rev, | R.sha1 AS rev, | ||||
R.date AS date, | R.date AS date, | ||||
O.url AS url, | |||||
L.path AS path | L.path AS path | ||||
FROM content AS C | FROM content AS C | ||||
INNER JOIN content_in_revision AS CR ON (CR.content = C.id) | INNER JOIN content_in_revision AS CR ON (CR.content=C.id) | ||||
INNER JOIN location as L ON (CR.location = L.id) | INNER JOIN location as L ON (CR.location=L.id) | ||||
INNER JOIN revision as R ON (CR.revision = R.id) | INNER JOIN revision as R ON (CR.revision=R.id) | ||||
LEFT JOIN origin as O ON (R.origin=O.id) | |||||
WHERE C.sha1=%s | WHERE C.sha1=%s | ||||
ORDER BY date, rev, path ASC LIMIT 1 | ORDER BY date, rev, url, path ASC LIMIT 1 | ||||
""", | """, | ||||
(id,), | (id,), | ||||
) | ) | ||||
return self.cursor.fetchone() | row = self.cursor.fetchone() | ||||
if row: | |||||
return ProvenanceResult( | |||||
content=row[0], revision=row[1], date=row[2], origin=row[3], path=row[4] | |||||
) | |||||
else: | |||||
return None | |||||
def content_find_all( | def content_find_all( | ||||
self, id: Sha1Git, limit: Optional[int] = None | self, id: Sha1Git, limit: Optional[int] = None | ||||
) -> Generator[Tuple[Sha1Git, Sha1Git, datetime, bytes], None, None]: | ) -> Generator[ProvenanceResult, None, None]: | ||||
early_cut = f"LIMIT {limit}" if limit is not None else "" | early_cut = f"LIMIT {limit}" if limit is not None else "" | ||||
self.cursor.execute( | self.cursor.execute( | ||||
f""" | f""" | ||||
(SELECT C.sha1 AS blob, | (SELECT C.sha1 AS blob, | ||||
R.sha1 AS rev, | R.sha1 AS rev, | ||||
R.date AS date, | R.date AS date, | ||||
O.url AS url, | |||||
L.path AS path | L.path AS path | ||||
FROM content AS C | FROM content AS C | ||||
INNER JOIN content_in_revision AS CR ON (CR.content = C.id) | INNER JOIN content_in_revision AS CR ON (CR.content=C.id) | ||||
INNER JOIN location AS L ON (CR.location = L.id) | INNER JOIN location AS L ON (CR.location=L.id) | ||||
INNER JOIN revision AS R ON (CR.revision = R.id) | INNER JOIN revision AS R ON (CR.revision=R.id) | ||||
LEFT JOIN origin as O ON (R.origin=O.id) | |||||
WHERE C.sha1=%s) | WHERE C.sha1=%s) | ||||
UNION | UNION | ||||
(SELECT C.sha1 AS content, | (SELECT C.sha1 AS content, | ||||
R.sha1 AS revision, | R.sha1 AS revision, | ||||
R.date AS date, | R.date AS date, | ||||
O.url AS url, | |||||
CASE DL.path | CASE DL.path | ||||
WHEN '' THEN CL.path | WHEN '' THEN CL.path | ||||
WHEN '.' THEN CL.path | WHEN '.' THEN CL.path | ||||
ELSE (DL.path || '/' || CL.path)::unix_path | ELSE (DL.path || '/' || CL.path)::unix_path | ||||
END AS path | END AS path | ||||
FROM content AS C | FROM content AS C | ||||
INNER JOIN content_in_directory AS CD ON (C.id = CD.content) | INNER JOIN content_in_directory AS CD ON (C.id=CD.content) | ||||
INNER JOIN directory_in_revision AS DR ON (CD.directory = DR.directory) | INNER JOIN directory_in_revision AS DR ON (CD.directory=DR.directory) | ||||
INNER JOIN revision AS R ON (DR.revision = R.id) | INNER JOIN revision AS R ON (DR.revision=R.id) | ||||
INNER JOIN location AS CL ON (CD.location = CL.id) | INNER JOIN location AS CL ON (CD.location=CL.id) | ||||
INNER JOIN location AS DL ON (DR.location = DL.id) | INNER JOIN location AS DL ON (DR.location=DL.id) | ||||
LEFT JOIN origin AS O ON (R.origin=O.id) | |||||
WHERE C.sha1=%s) | WHERE C.sha1=%s) | ||||
ORDER BY date, rev, path {early_cut} | ORDER BY date, rev, url, path {early_cut} | ||||
""", | """, | ||||
(id, id), | (id, id), | ||||
) | ) | ||||
yield from self.cursor.fetchall() | for row in self.cursor.fetchall(): | ||||
yield ProvenanceResult( | |||||
content=row[0], revision=row[1], date=row[2], origin=row[3], path=row[4] | |||||
vlorentz: more typo-proof IMO | |||||
Done Inline Actionsthe original idea was to use psycopg2.extras.DictCursor actually. But after since the refactoring is quite big I postponed that change and end up forgetting about it. Then we can simply do ProvenanceResult(**row) by setting the right aliases in the SQL query (ie url -> origin) aeviso: the original idea was to use `psycopg2.extras.DictCursor` actually. But after since the… | |||||
) | |||||
def insert_relation(self, relation: str, data: Set[Tuple[Sha1Git, Sha1Git, bytes]]): | def insert_relation(self, relation: str, data: Set[Tuple[Sha1Git, Sha1Git, bytes]]): | ||||
"""Insert entries in `relation` from `data` | """Insert entries in `relation` from `data` | ||||
Also insert missing location entries in the 'location' table. | Also insert missing location entries in the 'location' table. | ||||
""" | """ | ||||
if data: | if data: | ||||
assert relation in ( | assert relation in ( | ||||
Show All 31 Lines |
more typo-proof IMO