diff --git a/docs/index.rst b/docs/index.rst --- a/docs/index.rst +++ b/docs/index.rst @@ -66,7 +66,7 @@ # config.yaml provenance: storage: - cls: local + cls: postgresql db: host: /tmp/tmpifn2ov_j port: 9824 diff --git a/swh/provenance/__init__.py b/swh/provenance/__init__.py --- a/swh/provenance/__init__.py +++ b/swh/provenance/__init__.py @@ -6,6 +6,7 @@ from __future__ import annotations from typing import TYPE_CHECKING +import warnings if TYPE_CHECKING: from .archive import ArchiveInterface @@ -70,14 +71,21 @@ Raises: :cls:`ValueError` if passed an unknown archive class. """ - if cls == "local": + if cls in ["local", "postgresql"]: from swh.core.db import BaseDb - from .postgresql.provenancedb import ProvenanceDB + from .postgresql.provenance import ProvenanceStoragePostgreSql + + if cls == "local": + warnings.warn( + '"local" class is deprecated for provenance storage, please ' + 'use "postgresql" class instead.', + DeprecationWarning, + ) conn = BaseDb.connect(**kwargs["db"]).conn raise_on_commit = kwargs.get("raise_on_commit", False) - return ProvenanceDB(conn, raise_on_commit) + return ProvenanceStoragePostgreSql(conn, raise_on_commit) elif cls == "remote": from .api.client import RemoteProvenanceStorage diff --git a/swh/provenance/api/server.py b/swh/provenance/api/server.py --- a/swh/provenance/api/server.py +++ b/swh/provenance/api/server.py @@ -114,9 +114,9 @@ if type == "local": cls = scfg.get("cls") - if cls != "local": + if cls != "postgresql": raise ValueError( - "The provenance backend can only be started with a 'local' " + "The provenance backend can only be started with a 'postgresql' " "configuration" ) diff --git a/swh/provenance/cli.py b/swh/provenance/cli.py --- a/swh/provenance/cli.py +++ b/swh/provenance/cli.py @@ -39,7 +39,7 @@ }, }, "storage": { - "cls": "local", + "cls": "postgresql", "db": {"host": "localhost", "dbname": "provenance"}, }, } diff --git a/swh/provenance/postgresql/provenancedb.py b/swh/provenance/postgresql/provenance.py rename from swh/provenance/postgresql/provenancedb.py rename to swh/provenance/postgresql/provenance.py --- a/swh/provenance/postgresql/provenancedb.py +++ b/swh/provenance/postgresql/provenance.py @@ -24,7 +24,7 @@ ) -class ProvenanceDB: +class ProvenanceStoragePostgreSql: def __init__( self, conn: psycopg2.extensions.connection, raise_on_commit: bool = False ) -> None: @@ -98,6 +98,7 @@ urls: Dict[Sha1Git, str] = {} sha1s = tuple(ids) if sha1s: + # TODO: consider splitting this query in several ones if sha1s is too big! values = ", ".join(itertools.repeat("%s", len(sha1s))) sql = f""" SELECT sha1, url @@ -151,6 +152,7 @@ result: Dict[Sha1Git, RevisionData] = {} sha1s = tuple(ids) if sha1s: + # TODO: consider splitting this query in several ones if sha1s is too big! values = ", ".join(itertools.repeat("%s", len(sha1s))) sql = f""" SELECT sha1, date, origin @@ -230,10 +232,11 @@ sql_l.append( f"""ON CONFLICT ({src}) DO UPDATE SET {dst}=ARRAY( - SELECT UNNEST({table}.{dst} || excluded.{dst})), - location=ARRAY( - SELECT UNNEST({relation.value}.location || excluded.location)) - """ + SELECT UNNEST({table}.{dst} || EXCLUDED.{dst}) + ), location=ARRAY( + SELECT UNNEST({relation.value}.location || EXCLUDED.location) + ) + """ ) else: sql_l.append("ON CONFLICT DO NOTHING") @@ -263,6 +266,7 @@ dates: Dict[Sha1Git, datetime] = {} sha1s = tuple(ids) if sha1s: + # TODO: consider splitting this query in several ones if sha1s is too big! values = ", ".join(itertools.repeat("%s", len(sha1s))) sql = f""" SELECT sha1, date diff --git a/swh/provenance/sql/40-funcs.sql b/swh/provenance/sql/40-funcs.sql --- a/swh/provenance/sql/40-funcs.sql +++ b/swh/provenance/sql/40-funcs.sql @@ -28,10 +28,10 @@ L.path as path from content as C inner join content_in_revision as CR on (CR.content = C.id) - inner join location as L on (CR.location = L.id) - inner join revision as R on (CR.revision = R.id) - left join origin as O on (R.origin=O.id) - where C.sha1=content_id + inner join location as L on (L.id = CR.location) + inner join revision as R on (R.id = CR.revision) + left join origin as O on (O.id = R.origin) + where C.sha1 = content_id order by date, revision, origin, path asc limit 1 $$; @@ -47,34 +47,34 @@ stable as $$ (select C.sha1 as content, - r.sha1 as revision, - r.date as date, + R.sha1 as revision, + R.date as date, O.url as origin, - l.path as path - from content as c - inner join content_in_revision as cr on (cr.content = c.id) - inner join location as l on (cr.location = l.id) - inner join revision as r on (cr.revision = r.id) - left join origin AS O on (R.origin=O.id) - where c.sha1=content_id) + L.path as path + from content as C + inner join content_in_revision as CR on (CR.content = C.id) + inner join location as L on (L.id = CR.location) + inner join revision as R on (R.id = CR.revision) + left join origin as O on (O.id = R.origin) + where C.sha1 = content_id) union - (select c.sha1 as content, - r.sha1 as revision, - r.date as date, + (select C.sha1 as content, + R.sha1 as revision, + R.date as date, O.url as origin, - case dirloc.path - when '' then cntloc.path - when '.' then cntloc.path - else (dirloc.path || '/' || cntloc.path)::unix_path + case DL.path + when '' then CL.path + when '.' then CL.path + else (DL.path || '/' || CL.path)::unix_path end as path - from content as c - inner join content_in_directory as cd on (c.id = cd.content) - inner join directory_in_revision as dr on (cd.directory = dr.directory) - inner join revision as r on (dr.revision = r.id) - inner join location as cntloc on (cd.location = cntloc.id) - inner join location as dirloc on (dr.location = dirloc.id) - left join origin as O on (R.origin=O.id) - where C.sha1=content_id) + from content as C + inner join content_in_directory as CD on (CD.content = C.id) + inner join directory_in_revision as DR on (DR.directory = CD.directory) + inner join revision as R on (R.id = DR.revision) + inner join location as CL on (CL.id = CD.location) + inner join location as DL on (DL.id = DR.location) + left join origin as O on (O.id = R.origin) + where C.sha1 = content_id) order by date, revision, origin, path limit early_cut $$; @@ -100,9 +100,9 @@ '\x'::unix_path as path from content as C inner join content_in_revision as CR on (CR.content = C.id) - inner join revision as R on (CR.revision = R.id) - left join origin as O on (R.origin=O.id) - where C.sha1=content_id + inner join revision as R on (R.id = CR.revision) + left join origin as O on (O.id = R.origin) + where C.sha1 = content_id order by date, revision, origin asc limit 1 $$; @@ -118,27 +118,27 @@ stable as $$ (select C.sha1 as content, - r.sha1 as revision, - r.date as date, + R.sha1 as revision, + R.date as date, O.url as origin, '\x'::unix_path as path - from content as c - inner join content_in_revision as cr on (cr.content = c.id) - inner join revision as r on (cr.revision = r.id) - left join origin as O on (R.origin=O.id) - where c.sha1=content_id) + from content as C + inner join content_in_revision as CR on (CR.content = C.id) + inner join revision as R on (R.id = CR.revision) + left join origin as O on (O.id = R.origin) + where C.sha1 = content_id) union - (select c.sha1 as content, - r.sha1 as revision, - r.date as date, + (select C.sha1 as content, + R.sha1 as revision, + R.date as date, O.url as origin, '\x'::unix_path as path - from content as c - inner join content_in_directory as cd on (c.id = cd.content) - inner join directory_in_revision as dr on (cd.directory = dr.directory) - inner join revision as r on (dr.revision = r.id) - left join origin as O on (R.origin=O.id) - where C.sha1=content_id) + from content as C + inner join content_in_directory as CD on (CD.content = C.id) + inner join directory_in_revision as DR on (DR.directory = CD.directory) + inner join revision as R on (R.id = DR.revision) + left join origin as O on (O.id = R.origin) + where C.sha1 = content_id) order by date, revision, origin, path limit early_cut $$; @@ -164,22 +164,22 @@ language sql stable as $$ - select C_L.sha1 as content, + select CL.sha1 as content, R.sha1 as revision, R.date as date, O.url as origin, L.path as path from ( select C.sha1 as sha1, - unnest(revision) as revision, - unnest(location) as location - from content_in_revision as C_R - inner join content as C on (C.id=C_R.content) - where C.sha1=content_id - ) as C_L - inner join revision as R on (R.id=C_L.revision) - inner join location as L on (L.id=C_L.location) - left join origin as O on (R.origin=O.id) + unnest(CR.revision) as revision, + unnest(CR.location) as location + from content_in_revision as CR + inner join content as C on (C.id = CR.content) + where C.sha1 = content_id + ) as CL + inner join revision as R on (R.id = CL.revision) + inner join location as L on (L.id = CL.location) + left join origin as O on (O.id = R.origin) order by date, revision, origin, path asc limit 1 $$; @@ -194,55 +194,55 @@ language sql stable as $$ - (with cnt as ( - select c.sha1 as sha1, - unnest(c_r.revision) as revision, - unnest(c_r.location) as location - from content_in_revision as c_r - inner join content as c on (c.id = c_r.content) - where c.sha1 = content_id + (with cntrev as ( + select C.sha1 as sha1, + unnest(CR.revision) as revision, + unnest(CR.location) as location + from content_in_revision as CR + inner join content as C on (C.id = CR.content) + where C.sha1 = content_id ) - select cnt.sha1 as content, - r.sha1 as revision, - r.date as date, + select CR.sha1 as content, + R.sha1 as revision, + R.date as date, O.url as origin, - l.path as path - from cnt - inner join revision as r on (r.id = cnt.revision) - inner join location as l on (l.id = cnt.location) - left join origin as O on (R.origin=O.id) + L.path as path + from cntrev as CR + inner join revision as R on (R.id = CR.revision) + inner join location as L on (L.id = CR.location) + left join origin as O on (O.id = R.origin) ) union - (with cnt as ( - select c.sha1 as content_sha1, - unnest(cd.directory) as directory, - unnest(cd.location) as location - from content as c - inner join content_in_directory as cd on (cd.content = c.id) - where c.sha1 = content_id + (with cntdir as ( + select C.sha1 as sha1, + unnest(CD.directory) as directory, + unnest(CD.location) as location + from content as C + inner join content_in_directory as CD on (CD.content = C.id) + where C.sha1 = content_id ), - cntdir as ( - select cnt.content_sha1 as content_sha1, - cntloc.path as file_path, - unnest(dr.revision) as revision, - unnest(dr.location) as prefix_location - from cnt - inner join directory_in_revision as dr on (dr.directory = cnt.directory) - inner join location as cntloc on (cntloc.id = cnt.location) + cntrev as ( + select CD.sha1 as sha1, + L.path as path, + unnest(DR.revision) as revision, + unnest(DR.location) as prefix + from cntdir as CD + inner join directory_in_revision as DR on (DR.directory = CD.directory) + inner join location as L on (L.id = CD.location) ) - select cntdir.content_sha1 as content, - r.sha1 as revision, - r.date as date, + select CR.sha1 as content, + R.sha1 as revision, + R.date as date, O.url as origin, - case dirloc.path - when '' then cntdir.file_path - when '.' then cntdir.file_path - else (dirloc.path || '/' || cntdir.file_path)::unix_path + case DL.path + when '' then CR.path + when '.' then CR.path + else (DL.path || '/' || CR.path)::unix_path end as path - from cntdir - inner join location as dirloc on (cntdir.prefix_location = dirloc.id) - inner join revision as r on (cntdir.revision = r.id) - left join origin as O on (R.origin=O.id) + from cntrev as CR + inner join revision as R on (R.id = CR.revision) + inner join location as DL on (DL.id = CR.prefix) + left join origin as O on (O.id = R.origin) ) order by date, revision, origin, path limit early_cut $$; @@ -262,19 +262,19 @@ language sql stable as $$ - select C_L.sha1 as content, + select CL.sha1 as content, R.sha1 as revision, R.date as date, O.url as origin, '\x'::unix_path as path from ( select C.sha1, unnest(revision) as revision - from content_in_revision as C_R - inner join content as C on (C.id=C_R.content) + from content_in_revision as CR + inner join content as C on (C.id = CR.content) where C.sha1=content_id - ) as C_L - inner join revision as R on (R.id=C_L.revision) - left join origin as O on (R.origin=O.id) + ) as CL + inner join revision as R on (R.id = CL.revision) + left join origin as O on (O.id = R.origin) order by date, revision, origin, path asc limit 1 $$; @@ -290,44 +290,44 @@ language sql stable as $$ - (with cnt as ( - select c.sha1 as sha1, - unnest(c_r.revision) as revision - from content_in_revision as c_r - inner join content as c on (c.id = c_r.content) - where c.sha1 = content_id + (with cntrev as ( + select C.sha1 as sha1, + unnest(CR.revision) as revision + from content_in_revision as CR + inner join content as C on (C.id = CR.content) + where C.sha1 = content_id ) - select cnt.sha1 as content, - r.sha1 as revision, - r.date as date, + select CR.sha1 as content, + R.sha1 as revision, + R.date as date, O.url as origin, '\x'::unix_path as path - from cnt - inner join revision as r on (r.id = cnt.revision) - left join origin as O on (r.origin=O.id) + from cntrev as CR + inner join revision as R on (R.id = CR.revision) + left join origin as O on (O.id = R.origin) ) union - (with cnt as ( - select c.sha1 as content_sha1, - unnest(cd.directory) as directory - from content as c - inner join content_in_directory as cd on (cd.content = c.id) - where c.sha1 = content_id + (with cntdir as ( + select C.sha1 as sha1, + unnest(CD.directory) as directory + from content as C + inner join content_in_directory as CD on (CD.content = C.id) + where C.sha1 = content_id ), - cntdir as ( - select cnt.content_sha1 as content_sha1, - unnest(dr.revision) as revision - from cnt - inner join directory_in_revision as dr on (dr.directory = cnt.directory) + cntrev as ( + select CD.sha1 as sha1, + unnest(DR.revision) as revision + from cntdir as CD + inner join directory_in_revision as DR on (DR.directory = CD.directory) ) - select cntdir.content_sha1 as content, - r.sha1 as revision, - r.date as date, + select CR.sha1 as content, + R.sha1 as revision, + R.date as date, O.url as origin, '\x'::unix_path as path - from cntdir - inner join revision as r on (cntdir.revision = r.id) - left join origin as O on (r.origin=O.id) + from cntrev as CR + inner join revision as R on (R.id = CR.revision) + left join origin as O on (O.id = R.origin) ) order by date, revision, origin, path limit early_cut $$; diff --git a/swh/provenance/tests/conftest.py b/swh/provenance/tests/conftest.py --- a/swh/provenance/tests/conftest.py +++ b/swh/provenance/tests/conftest.py @@ -49,7 +49,7 @@ @pytest.fixture def app(populated_db: Dict[str, str]) -> Iterator[server.ProvenanceStorageServerApp]: assert hasattr(server, "storage") - server.storage = get_provenance_storage(cls="local", db=populated_db) + server.storage = get_provenance_storage(cls="postgresql", db=populated_db) yield server.app @@ -59,7 +59,7 @@ return RemoteProvenanceStorage -@pytest.fixture(params=["local", "remote"]) +@pytest.fixture(params=["postgresql", "remote"]) def provenance_storage( request: SubRequest, populated_db: Dict[str, str], @@ -94,7 +94,9 @@ ) # in test sessions, we DO want to raise any exception occurring at commit time return get_provenance( - cls="local", db=provenance_postgresql.get_dsn_parameters(), raise_on_commit=True + cls="postgresql", + db=provenance_postgresql.get_dsn_parameters(), + raise_on_commit=True, ) diff --git a/swh/provenance/tests/test_conftest.py b/swh/provenance/tests/test_conftest.py --- a/swh/provenance/tests/test_conftest.py +++ b/swh/provenance/tests/test_conftest.py @@ -9,7 +9,8 @@ def test_provenance_fixture(provenance: ProvenanceInterface) -> None: - """Check the 'provenance' fixture produce a working ProvenanceDB object""" + """Check the 'provenance' fixture produce a working + ProvenanceStoragePostgreSql object""" assert provenance provenance.flush() # should be a noop diff --git a/swh/provenance/tests/test_provenance_db.py b/swh/provenance/tests/test_provenance_db.py --- a/swh/provenance/tests/test_provenance_db.py +++ b/swh/provenance/tests/test_provenance_db.py @@ -4,11 +4,11 @@ # See top-level LICENSE file for more information from swh.provenance.interface import ProvenanceInterface -from swh.provenance.postgresql.provenancedb import ProvenanceDB +from swh.provenance.postgresql.provenance import ProvenanceStoragePostgreSql def test_provenance_flavor(provenance: ProvenanceInterface) -> None: - if isinstance(provenance.storage, ProvenanceDB): + if isinstance(provenance.storage, ProvenanceStoragePostgreSql): assert provenance.storage.flavor in ( "with-path", "without-path",