Page MenuHomeSoftware Heritage

D6084.id.diff
No OneTemporary

D6084.id.diff

diff --git a/docs/index.rst b/docs/index.rst
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -66,7 +66,7 @@
# config.yaml
provenance:
storage:
- cls: local
+ cls: postgresql
db:
host: /tmp/tmpifn2ov_j
port: 9824
diff --git a/swh/provenance/__init__.py b/swh/provenance/__init__.py
--- a/swh/provenance/__init__.py
+++ b/swh/provenance/__init__.py
@@ -6,6 +6,7 @@
from __future__ import annotations
from typing import TYPE_CHECKING
+import warnings
if TYPE_CHECKING:
from .archive import ArchiveInterface
@@ -70,14 +71,21 @@
Raises:
:cls:`ValueError` if passed an unknown archive class.
"""
- if cls == "local":
+ if cls in ["local", "postgresql"]:
from swh.core.db import BaseDb
- from .postgresql.provenancedb import ProvenanceDB
+ from .postgresql.provenance import ProvenanceStoragePostgreSql
+
+ if cls == "local":
+ warnings.warn(
+ '"local" class is deprecated for provenance storage, please '
+ 'use "postgresql" class instead.',
+ DeprecationWarning,
+ )
conn = BaseDb.connect(**kwargs["db"]).conn
raise_on_commit = kwargs.get("raise_on_commit", False)
- return ProvenanceDB(conn, raise_on_commit)
+ return ProvenanceStoragePostgreSql(conn, raise_on_commit)
elif cls == "remote":
from .api.client import RemoteProvenanceStorage
diff --git a/swh/provenance/api/server.py b/swh/provenance/api/server.py
--- a/swh/provenance/api/server.py
+++ b/swh/provenance/api/server.py
@@ -114,9 +114,9 @@
if type == "local":
cls = scfg.get("cls")
- if cls != "local":
+ if cls != "postgresql":
raise ValueError(
- "The provenance backend can only be started with a 'local' "
+ "The provenance backend can only be started with a 'postgresql' "
"configuration"
)
diff --git a/swh/provenance/cli.py b/swh/provenance/cli.py
--- a/swh/provenance/cli.py
+++ b/swh/provenance/cli.py
@@ -39,7 +39,7 @@
},
},
"storage": {
- "cls": "local",
+ "cls": "postgresql",
"db": {"host": "localhost", "dbname": "provenance"},
},
}
diff --git a/swh/provenance/postgresql/provenancedb.py b/swh/provenance/postgresql/provenance.py
rename from swh/provenance/postgresql/provenancedb.py
rename to swh/provenance/postgresql/provenance.py
--- a/swh/provenance/postgresql/provenancedb.py
+++ b/swh/provenance/postgresql/provenance.py
@@ -24,7 +24,7 @@
)
-class ProvenanceDB:
+class ProvenanceStoragePostgreSql:
def __init__(
self, conn: psycopg2.extensions.connection, raise_on_commit: bool = False
) -> None:
@@ -98,6 +98,7 @@
urls: Dict[Sha1Git, str] = {}
sha1s = tuple(ids)
if sha1s:
+ # TODO: consider splitting this query in several ones if sha1s is too big!
values = ", ".join(itertools.repeat("%s", len(sha1s)))
sql = f"""
SELECT sha1, url
@@ -151,6 +152,7 @@
result: Dict[Sha1Git, RevisionData] = {}
sha1s = tuple(ids)
if sha1s:
+ # TODO: consider splitting this query in several ones if sha1s is too big!
values = ", ".join(itertools.repeat("%s", len(sha1s)))
sql = f"""
SELECT sha1, date, origin
@@ -230,10 +232,11 @@
sql_l.append(
f"""ON CONFLICT ({src}) DO UPDATE
SET {dst}=ARRAY(
- SELECT UNNEST({table}.{dst} || excluded.{dst})),
- location=ARRAY(
- SELECT UNNEST({relation.value}.location || excluded.location))
- """
+ SELECT UNNEST({table}.{dst} || EXCLUDED.{dst})
+ ), location=ARRAY(
+ SELECT UNNEST({relation.value}.location || EXCLUDED.location)
+ )
+ """
)
else:
sql_l.append("ON CONFLICT DO NOTHING")
@@ -263,6 +266,7 @@
dates: Dict[Sha1Git, datetime] = {}
sha1s = tuple(ids)
if sha1s:
+ # TODO: consider splitting this query in several ones if sha1s is too big!
values = ", ".join(itertools.repeat("%s", len(sha1s)))
sql = f"""
SELECT sha1, date
diff --git a/swh/provenance/sql/40-funcs.sql b/swh/provenance/sql/40-funcs.sql
--- a/swh/provenance/sql/40-funcs.sql
+++ b/swh/provenance/sql/40-funcs.sql
@@ -28,10 +28,10 @@
L.path as path
from content as C
inner join content_in_revision as CR on (CR.content = C.id)
- inner join location as L on (CR.location = L.id)
- inner join revision as R on (CR.revision = R.id)
- left join origin as O on (R.origin=O.id)
- where C.sha1=content_id
+ inner join location as L on (L.id = CR.location)
+ inner join revision as R on (R.id = CR.revision)
+ left join origin as O on (O.id = R.origin)
+ where C.sha1 = content_id
order by date, revision, origin, path asc limit 1
$$;
@@ -47,34 +47,34 @@
stable
as $$
(select C.sha1 as content,
- r.sha1 as revision,
- r.date as date,
+ R.sha1 as revision,
+ R.date as date,
O.url as origin,
- l.path as path
- from content as c
- inner join content_in_revision as cr on (cr.content = c.id)
- inner join location as l on (cr.location = l.id)
- inner join revision as r on (cr.revision = r.id)
- left join origin AS O on (R.origin=O.id)
- where c.sha1=content_id)
+ L.path as path
+ from content as C
+ inner join content_in_revision as CR on (CR.content = C.id)
+ inner join location as L on (L.id = CR.location)
+ inner join revision as R on (R.id = CR.revision)
+ left join origin as O on (O.id = R.origin)
+ where C.sha1 = content_id)
union
- (select c.sha1 as content,
- r.sha1 as revision,
- r.date as date,
+ (select C.sha1 as content,
+ R.sha1 as revision,
+ R.date as date,
O.url as origin,
- case dirloc.path
- when '' then cntloc.path
- when '.' then cntloc.path
- else (dirloc.path || '/' || cntloc.path)::unix_path
+ case DL.path
+ when '' then CL.path
+ when '.' then CL.path
+ else (DL.path || '/' || CL.path)::unix_path
end as path
- from content as c
- inner join content_in_directory as cd on (c.id = cd.content)
- inner join directory_in_revision as dr on (cd.directory = dr.directory)
- inner join revision as r on (dr.revision = r.id)
- inner join location as cntloc on (cd.location = cntloc.id)
- inner join location as dirloc on (dr.location = dirloc.id)
- left join origin as O on (R.origin=O.id)
- where C.sha1=content_id)
+ from content as C
+ inner join content_in_directory as CD on (CD.content = C.id)
+ inner join directory_in_revision as DR on (DR.directory = CD.directory)
+ inner join revision as R on (R.id = DR.revision)
+ inner join location as CL on (CL.id = CD.location)
+ inner join location as DL on (DL.id = DR.location)
+ left join origin as O on (O.id = R.origin)
+ where C.sha1 = content_id)
order by date, revision, origin, path limit early_cut
$$;
@@ -100,9 +100,9 @@
'\x'::unix_path as path
from content as C
inner join content_in_revision as CR on (CR.content = C.id)
- inner join revision as R on (CR.revision = R.id)
- left join origin as O on (R.origin=O.id)
- where C.sha1=content_id
+ inner join revision as R on (R.id = CR.revision)
+ left join origin as O on (O.id = R.origin)
+ where C.sha1 = content_id
order by date, revision, origin asc limit 1
$$;
@@ -118,27 +118,27 @@
stable
as $$
(select C.sha1 as content,
- r.sha1 as revision,
- r.date as date,
+ R.sha1 as revision,
+ R.date as date,
O.url as origin,
'\x'::unix_path as path
- from content as c
- inner join content_in_revision as cr on (cr.content = c.id)
- inner join revision as r on (cr.revision = r.id)
- left join origin as O on (R.origin=O.id)
- where c.sha1=content_id)
+ from content as C
+ inner join content_in_revision as CR on (CR.content = C.id)
+ inner join revision as R on (R.id = CR.revision)
+ left join origin as O on (O.id = R.origin)
+ where C.sha1 = content_id)
union
- (select c.sha1 as content,
- r.sha1 as revision,
- r.date as date,
+ (select C.sha1 as content,
+ R.sha1 as revision,
+ R.date as date,
O.url as origin,
'\x'::unix_path as path
- from content as c
- inner join content_in_directory as cd on (c.id = cd.content)
- inner join directory_in_revision as dr on (cd.directory = dr.directory)
- inner join revision as r on (dr.revision = r.id)
- left join origin as O on (R.origin=O.id)
- where C.sha1=content_id)
+ from content as C
+ inner join content_in_directory as CD on (CD.content = C.id)
+ inner join directory_in_revision as DR on (DR.directory = CD.directory)
+ inner join revision as R on (R.id = DR.revision)
+ left join origin as O on (O.id = R.origin)
+ where C.sha1 = content_id)
order by date, revision, origin, path limit early_cut
$$;
@@ -164,22 +164,22 @@
language sql
stable
as $$
- select C_L.sha1 as content,
+ select CL.sha1 as content,
R.sha1 as revision,
R.date as date,
O.url as origin,
L.path as path
from (
select C.sha1 as sha1,
- unnest(revision) as revision,
- unnest(location) as location
- from content_in_revision as C_R
- inner join content as C on (C.id=C_R.content)
- where C.sha1=content_id
- ) as C_L
- inner join revision as R on (R.id=C_L.revision)
- inner join location as L on (L.id=C_L.location)
- left join origin as O on (R.origin=O.id)
+ unnest(CR.revision) as revision,
+ unnest(CR.location) as location
+ from content_in_revision as CR
+ inner join content as C on (C.id = CR.content)
+ where C.sha1 = content_id
+ ) as CL
+ inner join revision as R on (R.id = CL.revision)
+ inner join location as L on (L.id = CL.location)
+ left join origin as O on (O.id = R.origin)
order by date, revision, origin, path asc limit 1
$$;
@@ -194,55 +194,55 @@
language sql
stable
as $$
- (with cnt as (
- select c.sha1 as sha1,
- unnest(c_r.revision) as revision,
- unnest(c_r.location) as location
- from content_in_revision as c_r
- inner join content as c on (c.id = c_r.content)
- where c.sha1 = content_id
+ (with cntrev as (
+ select C.sha1 as sha1,
+ unnest(CR.revision) as revision,
+ unnest(CR.location) as location
+ from content_in_revision as CR
+ inner join content as C on (C.id = CR.content)
+ where C.sha1 = content_id
)
- select cnt.sha1 as content,
- r.sha1 as revision,
- r.date as date,
+ select CR.sha1 as content,
+ R.sha1 as revision,
+ R.date as date,
O.url as origin,
- l.path as path
- from cnt
- inner join revision as r on (r.id = cnt.revision)
- inner join location as l on (l.id = cnt.location)
- left join origin as O on (R.origin=O.id)
+ L.path as path
+ from cntrev as CR
+ inner join revision as R on (R.id = CR.revision)
+ inner join location as L on (L.id = CR.location)
+ left join origin as O on (O.id = R.origin)
)
union
- (with cnt as (
- select c.sha1 as content_sha1,
- unnest(cd.directory) as directory,
- unnest(cd.location) as location
- from content as c
- inner join content_in_directory as cd on (cd.content = c.id)
- where c.sha1 = content_id
+ (with cntdir as (
+ select C.sha1 as sha1,
+ unnest(CD.directory) as directory,
+ unnest(CD.location) as location
+ from content as C
+ inner join content_in_directory as CD on (CD.content = C.id)
+ where C.sha1 = content_id
),
- cntdir as (
- select cnt.content_sha1 as content_sha1,
- cntloc.path as file_path,
- unnest(dr.revision) as revision,
- unnest(dr.location) as prefix_location
- from cnt
- inner join directory_in_revision as dr on (dr.directory = cnt.directory)
- inner join location as cntloc on (cntloc.id = cnt.location)
+ cntrev as (
+ select CD.sha1 as sha1,
+ L.path as path,
+ unnest(DR.revision) as revision,
+ unnest(DR.location) as prefix
+ from cntdir as CD
+ inner join directory_in_revision as DR on (DR.directory = CD.directory)
+ inner join location as L on (L.id = CD.location)
)
- select cntdir.content_sha1 as content,
- r.sha1 as revision,
- r.date as date,
+ select CR.sha1 as content,
+ R.sha1 as revision,
+ R.date as date,
O.url as origin,
- case dirloc.path
- when '' then cntdir.file_path
- when '.' then cntdir.file_path
- else (dirloc.path || '/' || cntdir.file_path)::unix_path
+ case DL.path
+ when '' then CR.path
+ when '.' then CR.path
+ else (DL.path || '/' || CR.path)::unix_path
end as path
- from cntdir
- inner join location as dirloc on (cntdir.prefix_location = dirloc.id)
- inner join revision as r on (cntdir.revision = r.id)
- left join origin as O on (R.origin=O.id)
+ from cntrev as CR
+ inner join revision as R on (R.id = CR.revision)
+ inner join location as DL on (DL.id = CR.prefix)
+ left join origin as O on (O.id = R.origin)
)
order by date, revision, origin, path limit early_cut
$$;
@@ -262,19 +262,19 @@
language sql
stable
as $$
- select C_L.sha1 as content,
+ select CL.sha1 as content,
R.sha1 as revision,
R.date as date,
O.url as origin,
'\x'::unix_path as path
from (
select C.sha1, unnest(revision) as revision
- from content_in_revision as C_R
- inner join content as C on (C.id=C_R.content)
+ from content_in_revision as CR
+ inner join content as C on (C.id = CR.content)
where C.sha1=content_id
- ) as C_L
- inner join revision as R on (R.id=C_L.revision)
- left join origin as O on (R.origin=O.id)
+ ) as CL
+ inner join revision as R on (R.id = CL.revision)
+ left join origin as O on (O.id = R.origin)
order by date, revision, origin, path asc limit 1
$$;
@@ -290,44 +290,44 @@
language sql
stable
as $$
- (with cnt as (
- select c.sha1 as sha1,
- unnest(c_r.revision) as revision
- from content_in_revision as c_r
- inner join content as c on (c.id = c_r.content)
- where c.sha1 = content_id
+ (with cntrev as (
+ select C.sha1 as sha1,
+ unnest(CR.revision) as revision
+ from content_in_revision as CR
+ inner join content as C on (C.id = CR.content)
+ where C.sha1 = content_id
)
- select cnt.sha1 as content,
- r.sha1 as revision,
- r.date as date,
+ select CR.sha1 as content,
+ R.sha1 as revision,
+ R.date as date,
O.url as origin,
'\x'::unix_path as path
- from cnt
- inner join revision as r on (r.id = cnt.revision)
- left join origin as O on (r.origin=O.id)
+ from cntrev as CR
+ inner join revision as R on (R.id = CR.revision)
+ left join origin as O on (O.id = R.origin)
)
union
- (with cnt as (
- select c.sha1 as content_sha1,
- unnest(cd.directory) as directory
- from content as c
- inner join content_in_directory as cd on (cd.content = c.id)
- where c.sha1 = content_id
+ (with cntdir as (
+ select C.sha1 as sha1,
+ unnest(CD.directory) as directory
+ from content as C
+ inner join content_in_directory as CD on (CD.content = C.id)
+ where C.sha1 = content_id
),
- cntdir as (
- select cnt.content_sha1 as content_sha1,
- unnest(dr.revision) as revision
- from cnt
- inner join directory_in_revision as dr on (dr.directory = cnt.directory)
+ cntrev as (
+ select CD.sha1 as sha1,
+ unnest(DR.revision) as revision
+ from cntdir as CD
+ inner join directory_in_revision as DR on (DR.directory = CD.directory)
)
- select cntdir.content_sha1 as content,
- r.sha1 as revision,
- r.date as date,
+ select CR.sha1 as content,
+ R.sha1 as revision,
+ R.date as date,
O.url as origin,
'\x'::unix_path as path
- from cntdir
- inner join revision as r on (cntdir.revision = r.id)
- left join origin as O on (r.origin=O.id)
+ from cntrev as CR
+ inner join revision as R on (R.id = CR.revision)
+ left join origin as O on (O.id = R.origin)
)
order by date, revision, origin, path limit early_cut
$$;
diff --git a/swh/provenance/tests/conftest.py b/swh/provenance/tests/conftest.py
--- a/swh/provenance/tests/conftest.py
+++ b/swh/provenance/tests/conftest.py
@@ -49,7 +49,7 @@
@pytest.fixture
def app(populated_db: Dict[str, str]) -> Iterator[server.ProvenanceStorageServerApp]:
assert hasattr(server, "storage")
- server.storage = get_provenance_storage(cls="local", db=populated_db)
+ server.storage = get_provenance_storage(cls="postgresql", db=populated_db)
yield server.app
@@ -59,7 +59,7 @@
return RemoteProvenanceStorage
-@pytest.fixture(params=["local", "remote"])
+@pytest.fixture(params=["postgresql", "remote"])
def provenance_storage(
request: SubRequest,
populated_db: Dict[str, str],
@@ -94,7 +94,9 @@
)
# in test sessions, we DO want to raise any exception occurring at commit time
return get_provenance(
- cls="local", db=provenance_postgresql.get_dsn_parameters(), raise_on_commit=True
+ cls="postgresql",
+ db=provenance_postgresql.get_dsn_parameters(),
+ raise_on_commit=True,
)
diff --git a/swh/provenance/tests/test_conftest.py b/swh/provenance/tests/test_conftest.py
--- a/swh/provenance/tests/test_conftest.py
+++ b/swh/provenance/tests/test_conftest.py
@@ -9,7 +9,8 @@
def test_provenance_fixture(provenance: ProvenanceInterface) -> None:
- """Check the 'provenance' fixture produce a working ProvenanceDB object"""
+ """Check the 'provenance' fixture produce a working
+ ProvenanceStoragePostgreSql object"""
assert provenance
provenance.flush() # should be a noop
diff --git a/swh/provenance/tests/test_provenance_db.py b/swh/provenance/tests/test_provenance_db.py
--- a/swh/provenance/tests/test_provenance_db.py
+++ b/swh/provenance/tests/test_provenance_db.py
@@ -4,11 +4,11 @@
# See top-level LICENSE file for more information
from swh.provenance.interface import ProvenanceInterface
-from swh.provenance.postgresql.provenancedb import ProvenanceDB
+from swh.provenance.postgresql.provenance import ProvenanceStoragePostgreSql
def test_provenance_flavor(provenance: ProvenanceInterface) -> None:
- if isinstance(provenance.storage, ProvenanceDB):
+ if isinstance(provenance.storage, ProvenanceStoragePostgreSql):
assert provenance.storage.flavor in (
"with-path",
"without-path",

File Metadata

Mime Type
text/plain
Expires
Mar 17 2025, 6:25 PM (7 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3223834

Event Timeline