Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7342972
D6084.id.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
19 KB
Subscribers
None
D6084.id.diff
View Options
diff --git a/docs/index.rst b/docs/index.rst
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -66,7 +66,7 @@
# config.yaml
provenance:
storage:
- cls: local
+ cls: postgresql
db:
host: /tmp/tmpifn2ov_j
port: 9824
diff --git a/swh/provenance/__init__.py b/swh/provenance/__init__.py
--- a/swh/provenance/__init__.py
+++ b/swh/provenance/__init__.py
@@ -6,6 +6,7 @@
from __future__ import annotations
from typing import TYPE_CHECKING
+import warnings
if TYPE_CHECKING:
from .archive import ArchiveInterface
@@ -70,14 +71,21 @@
Raises:
:cls:`ValueError` if passed an unknown archive class.
"""
- if cls == "local":
+ if cls in ["local", "postgresql"]:
from swh.core.db import BaseDb
- from .postgresql.provenancedb import ProvenanceDB
+ from .postgresql.provenance import ProvenanceStoragePostgreSql
+
+ if cls == "local":
+ warnings.warn(
+ '"local" class is deprecated for provenance storage, please '
+ 'use "postgresql" class instead.',
+ DeprecationWarning,
+ )
conn = BaseDb.connect(**kwargs["db"]).conn
raise_on_commit = kwargs.get("raise_on_commit", False)
- return ProvenanceDB(conn, raise_on_commit)
+ return ProvenanceStoragePostgreSql(conn, raise_on_commit)
elif cls == "remote":
from .api.client import RemoteProvenanceStorage
diff --git a/swh/provenance/api/server.py b/swh/provenance/api/server.py
--- a/swh/provenance/api/server.py
+++ b/swh/provenance/api/server.py
@@ -114,9 +114,9 @@
if type == "local":
cls = scfg.get("cls")
- if cls != "local":
+ if cls != "postgresql":
raise ValueError(
- "The provenance backend can only be started with a 'local' "
+ "The provenance backend can only be started with a 'postgresql' "
"configuration"
)
diff --git a/swh/provenance/cli.py b/swh/provenance/cli.py
--- a/swh/provenance/cli.py
+++ b/swh/provenance/cli.py
@@ -39,7 +39,7 @@
},
},
"storage": {
- "cls": "local",
+ "cls": "postgresql",
"db": {"host": "localhost", "dbname": "provenance"},
},
}
diff --git a/swh/provenance/postgresql/provenancedb.py b/swh/provenance/postgresql/provenance.py
rename from swh/provenance/postgresql/provenancedb.py
rename to swh/provenance/postgresql/provenance.py
--- a/swh/provenance/postgresql/provenancedb.py
+++ b/swh/provenance/postgresql/provenance.py
@@ -24,7 +24,7 @@
)
-class ProvenanceDB:
+class ProvenanceStoragePostgreSql:
def __init__(
self, conn: psycopg2.extensions.connection, raise_on_commit: bool = False
) -> None:
@@ -98,6 +98,7 @@
urls: Dict[Sha1Git, str] = {}
sha1s = tuple(ids)
if sha1s:
+ # TODO: consider splitting this query in several ones if sha1s is too big!
values = ", ".join(itertools.repeat("%s", len(sha1s)))
sql = f"""
SELECT sha1, url
@@ -151,6 +152,7 @@
result: Dict[Sha1Git, RevisionData] = {}
sha1s = tuple(ids)
if sha1s:
+ # TODO: consider splitting this query in several ones if sha1s is too big!
values = ", ".join(itertools.repeat("%s", len(sha1s)))
sql = f"""
SELECT sha1, date, origin
@@ -230,10 +232,11 @@
sql_l.append(
f"""ON CONFLICT ({src}) DO UPDATE
SET {dst}=ARRAY(
- SELECT UNNEST({table}.{dst} || excluded.{dst})),
- location=ARRAY(
- SELECT UNNEST({relation.value}.location || excluded.location))
- """
+ SELECT UNNEST({table}.{dst} || EXCLUDED.{dst})
+ ), location=ARRAY(
+ SELECT UNNEST({relation.value}.location || EXCLUDED.location)
+ )
+ """
)
else:
sql_l.append("ON CONFLICT DO NOTHING")
@@ -263,6 +266,7 @@
dates: Dict[Sha1Git, datetime] = {}
sha1s = tuple(ids)
if sha1s:
+ # TODO: consider splitting this query in several ones if sha1s is too big!
values = ", ".join(itertools.repeat("%s", len(sha1s)))
sql = f"""
SELECT sha1, date
diff --git a/swh/provenance/sql/40-funcs.sql b/swh/provenance/sql/40-funcs.sql
--- a/swh/provenance/sql/40-funcs.sql
+++ b/swh/provenance/sql/40-funcs.sql
@@ -28,10 +28,10 @@
L.path as path
from content as C
inner join content_in_revision as CR on (CR.content = C.id)
- inner join location as L on (CR.location = L.id)
- inner join revision as R on (CR.revision = R.id)
- left join origin as O on (R.origin=O.id)
- where C.sha1=content_id
+ inner join location as L on (L.id = CR.location)
+ inner join revision as R on (R.id = CR.revision)
+ left join origin as O on (O.id = R.origin)
+ where C.sha1 = content_id
order by date, revision, origin, path asc limit 1
$$;
@@ -47,34 +47,34 @@
stable
as $$
(select C.sha1 as content,
- r.sha1 as revision,
- r.date as date,
+ R.sha1 as revision,
+ R.date as date,
O.url as origin,
- l.path as path
- from content as c
- inner join content_in_revision as cr on (cr.content = c.id)
- inner join location as l on (cr.location = l.id)
- inner join revision as r on (cr.revision = r.id)
- left join origin AS O on (R.origin=O.id)
- where c.sha1=content_id)
+ L.path as path
+ from content as C
+ inner join content_in_revision as CR on (CR.content = C.id)
+ inner join location as L on (L.id = CR.location)
+ inner join revision as R on (R.id = CR.revision)
+ left join origin as O on (O.id = R.origin)
+ where C.sha1 = content_id)
union
- (select c.sha1 as content,
- r.sha1 as revision,
- r.date as date,
+ (select C.sha1 as content,
+ R.sha1 as revision,
+ R.date as date,
O.url as origin,
- case dirloc.path
- when '' then cntloc.path
- when '.' then cntloc.path
- else (dirloc.path || '/' || cntloc.path)::unix_path
+ case DL.path
+ when '' then CL.path
+ when '.' then CL.path
+ else (DL.path || '/' || CL.path)::unix_path
end as path
- from content as c
- inner join content_in_directory as cd on (c.id = cd.content)
- inner join directory_in_revision as dr on (cd.directory = dr.directory)
- inner join revision as r on (dr.revision = r.id)
- inner join location as cntloc on (cd.location = cntloc.id)
- inner join location as dirloc on (dr.location = dirloc.id)
- left join origin as O on (R.origin=O.id)
- where C.sha1=content_id)
+ from content as C
+ inner join content_in_directory as CD on (CD.content = C.id)
+ inner join directory_in_revision as DR on (DR.directory = CD.directory)
+ inner join revision as R on (R.id = DR.revision)
+ inner join location as CL on (CL.id = CD.location)
+ inner join location as DL on (DL.id = DR.location)
+ left join origin as O on (O.id = R.origin)
+ where C.sha1 = content_id)
order by date, revision, origin, path limit early_cut
$$;
@@ -100,9 +100,9 @@
'\x'::unix_path as path
from content as C
inner join content_in_revision as CR on (CR.content = C.id)
- inner join revision as R on (CR.revision = R.id)
- left join origin as O on (R.origin=O.id)
- where C.sha1=content_id
+ inner join revision as R on (R.id = CR.revision)
+ left join origin as O on (O.id = R.origin)
+ where C.sha1 = content_id
order by date, revision, origin asc limit 1
$$;
@@ -118,27 +118,27 @@
stable
as $$
(select C.sha1 as content,
- r.sha1 as revision,
- r.date as date,
+ R.sha1 as revision,
+ R.date as date,
O.url as origin,
'\x'::unix_path as path
- from content as c
- inner join content_in_revision as cr on (cr.content = c.id)
- inner join revision as r on (cr.revision = r.id)
- left join origin as O on (R.origin=O.id)
- where c.sha1=content_id)
+ from content as C
+ inner join content_in_revision as CR on (CR.content = C.id)
+ inner join revision as R on (R.id = CR.revision)
+ left join origin as O on (O.id = R.origin)
+ where C.sha1 = content_id)
union
- (select c.sha1 as content,
- r.sha1 as revision,
- r.date as date,
+ (select C.sha1 as content,
+ R.sha1 as revision,
+ R.date as date,
O.url as origin,
'\x'::unix_path as path
- from content as c
- inner join content_in_directory as cd on (c.id = cd.content)
- inner join directory_in_revision as dr on (cd.directory = dr.directory)
- inner join revision as r on (dr.revision = r.id)
- left join origin as O on (R.origin=O.id)
- where C.sha1=content_id)
+ from content as C
+ inner join content_in_directory as CD on (CD.content = C.id)
+ inner join directory_in_revision as DR on (DR.directory = CD.directory)
+ inner join revision as R on (R.id = DR.revision)
+ left join origin as O on (O.id = R.origin)
+ where C.sha1 = content_id)
order by date, revision, origin, path limit early_cut
$$;
@@ -164,22 +164,22 @@
language sql
stable
as $$
- select C_L.sha1 as content,
+ select CL.sha1 as content,
R.sha1 as revision,
R.date as date,
O.url as origin,
L.path as path
from (
select C.sha1 as sha1,
- unnest(revision) as revision,
- unnest(location) as location
- from content_in_revision as C_R
- inner join content as C on (C.id=C_R.content)
- where C.sha1=content_id
- ) as C_L
- inner join revision as R on (R.id=C_L.revision)
- inner join location as L on (L.id=C_L.location)
- left join origin as O on (R.origin=O.id)
+ unnest(CR.revision) as revision,
+ unnest(CR.location) as location
+ from content_in_revision as CR
+ inner join content as C on (C.id = CR.content)
+ where C.sha1 = content_id
+ ) as CL
+ inner join revision as R on (R.id = CL.revision)
+ inner join location as L on (L.id = CL.location)
+ left join origin as O on (O.id = R.origin)
order by date, revision, origin, path asc limit 1
$$;
@@ -194,55 +194,55 @@
language sql
stable
as $$
- (with cnt as (
- select c.sha1 as sha1,
- unnest(c_r.revision) as revision,
- unnest(c_r.location) as location
- from content_in_revision as c_r
- inner join content as c on (c.id = c_r.content)
- where c.sha1 = content_id
+ (with cntrev as (
+ select C.sha1 as sha1,
+ unnest(CR.revision) as revision,
+ unnest(CR.location) as location
+ from content_in_revision as CR
+ inner join content as C on (C.id = CR.content)
+ where C.sha1 = content_id
)
- select cnt.sha1 as content,
- r.sha1 as revision,
- r.date as date,
+ select CR.sha1 as content,
+ R.sha1 as revision,
+ R.date as date,
O.url as origin,
- l.path as path
- from cnt
- inner join revision as r on (r.id = cnt.revision)
- inner join location as l on (l.id = cnt.location)
- left join origin as O on (R.origin=O.id)
+ L.path as path
+ from cntrev as CR
+ inner join revision as R on (R.id = CR.revision)
+ inner join location as L on (L.id = CR.location)
+ left join origin as O on (O.id = R.origin)
)
union
- (with cnt as (
- select c.sha1 as content_sha1,
- unnest(cd.directory) as directory,
- unnest(cd.location) as location
- from content as c
- inner join content_in_directory as cd on (cd.content = c.id)
- where c.sha1 = content_id
+ (with cntdir as (
+ select C.sha1 as sha1,
+ unnest(CD.directory) as directory,
+ unnest(CD.location) as location
+ from content as C
+ inner join content_in_directory as CD on (CD.content = C.id)
+ where C.sha1 = content_id
),
- cntdir as (
- select cnt.content_sha1 as content_sha1,
- cntloc.path as file_path,
- unnest(dr.revision) as revision,
- unnest(dr.location) as prefix_location
- from cnt
- inner join directory_in_revision as dr on (dr.directory = cnt.directory)
- inner join location as cntloc on (cntloc.id = cnt.location)
+ cntrev as (
+ select CD.sha1 as sha1,
+ L.path as path,
+ unnest(DR.revision) as revision,
+ unnest(DR.location) as prefix
+ from cntdir as CD
+ inner join directory_in_revision as DR on (DR.directory = CD.directory)
+ inner join location as L on (L.id = CD.location)
)
- select cntdir.content_sha1 as content,
- r.sha1 as revision,
- r.date as date,
+ select CR.sha1 as content,
+ R.sha1 as revision,
+ R.date as date,
O.url as origin,
- case dirloc.path
- when '' then cntdir.file_path
- when '.' then cntdir.file_path
- else (dirloc.path || '/' || cntdir.file_path)::unix_path
+ case DL.path
+ when '' then CR.path
+ when '.' then CR.path
+ else (DL.path || '/' || CR.path)::unix_path
end as path
- from cntdir
- inner join location as dirloc on (cntdir.prefix_location = dirloc.id)
- inner join revision as r on (cntdir.revision = r.id)
- left join origin as O on (R.origin=O.id)
+ from cntrev as CR
+ inner join revision as R on (R.id = CR.revision)
+ inner join location as DL on (DL.id = CR.prefix)
+ left join origin as O on (O.id = R.origin)
)
order by date, revision, origin, path limit early_cut
$$;
@@ -262,19 +262,19 @@
language sql
stable
as $$
- select C_L.sha1 as content,
+ select CL.sha1 as content,
R.sha1 as revision,
R.date as date,
O.url as origin,
'\x'::unix_path as path
from (
select C.sha1, unnest(revision) as revision
- from content_in_revision as C_R
- inner join content as C on (C.id=C_R.content)
+ from content_in_revision as CR
+ inner join content as C on (C.id = CR.content)
where C.sha1=content_id
- ) as C_L
- inner join revision as R on (R.id=C_L.revision)
- left join origin as O on (R.origin=O.id)
+ ) as CL
+ inner join revision as R on (R.id = CL.revision)
+ left join origin as O on (O.id = R.origin)
order by date, revision, origin, path asc limit 1
$$;
@@ -290,44 +290,44 @@
language sql
stable
as $$
- (with cnt as (
- select c.sha1 as sha1,
- unnest(c_r.revision) as revision
- from content_in_revision as c_r
- inner join content as c on (c.id = c_r.content)
- where c.sha1 = content_id
+ (with cntrev as (
+ select C.sha1 as sha1,
+ unnest(CR.revision) as revision
+ from content_in_revision as CR
+ inner join content as C on (C.id = CR.content)
+ where C.sha1 = content_id
)
- select cnt.sha1 as content,
- r.sha1 as revision,
- r.date as date,
+ select CR.sha1 as content,
+ R.sha1 as revision,
+ R.date as date,
O.url as origin,
'\x'::unix_path as path
- from cnt
- inner join revision as r on (r.id = cnt.revision)
- left join origin as O on (r.origin=O.id)
+ from cntrev as CR
+ inner join revision as R on (R.id = CR.revision)
+ left join origin as O on (O.id = R.origin)
)
union
- (with cnt as (
- select c.sha1 as content_sha1,
- unnest(cd.directory) as directory
- from content as c
- inner join content_in_directory as cd on (cd.content = c.id)
- where c.sha1 = content_id
+ (with cntdir as (
+ select C.sha1 as sha1,
+ unnest(CD.directory) as directory
+ from content as C
+ inner join content_in_directory as CD on (CD.content = C.id)
+ where C.sha1 = content_id
),
- cntdir as (
- select cnt.content_sha1 as content_sha1,
- unnest(dr.revision) as revision
- from cnt
- inner join directory_in_revision as dr on (dr.directory = cnt.directory)
+ cntrev as (
+ select CD.sha1 as sha1,
+ unnest(DR.revision) as revision
+ from cntdir as CD
+ inner join directory_in_revision as DR on (DR.directory = CD.directory)
)
- select cntdir.content_sha1 as content,
- r.sha1 as revision,
- r.date as date,
+ select CR.sha1 as content,
+ R.sha1 as revision,
+ R.date as date,
O.url as origin,
'\x'::unix_path as path
- from cntdir
- inner join revision as r on (cntdir.revision = r.id)
- left join origin as O on (r.origin=O.id)
+ from cntrev as CR
+ inner join revision as R on (R.id = CR.revision)
+ left join origin as O on (O.id = R.origin)
)
order by date, revision, origin, path limit early_cut
$$;
diff --git a/swh/provenance/tests/conftest.py b/swh/provenance/tests/conftest.py
--- a/swh/provenance/tests/conftest.py
+++ b/swh/provenance/tests/conftest.py
@@ -49,7 +49,7 @@
@pytest.fixture
def app(populated_db: Dict[str, str]) -> Iterator[server.ProvenanceStorageServerApp]:
assert hasattr(server, "storage")
- server.storage = get_provenance_storage(cls="local", db=populated_db)
+ server.storage = get_provenance_storage(cls="postgresql", db=populated_db)
yield server.app
@@ -59,7 +59,7 @@
return RemoteProvenanceStorage
-@pytest.fixture(params=["local", "remote"])
+@pytest.fixture(params=["postgresql", "remote"])
def provenance_storage(
request: SubRequest,
populated_db: Dict[str, str],
@@ -94,7 +94,9 @@
)
# in test sessions, we DO want to raise any exception occurring at commit time
return get_provenance(
- cls="local", db=provenance_postgresql.get_dsn_parameters(), raise_on_commit=True
+ cls="postgresql",
+ db=provenance_postgresql.get_dsn_parameters(),
+ raise_on_commit=True,
)
diff --git a/swh/provenance/tests/test_conftest.py b/swh/provenance/tests/test_conftest.py
--- a/swh/provenance/tests/test_conftest.py
+++ b/swh/provenance/tests/test_conftest.py
@@ -9,7 +9,8 @@
def test_provenance_fixture(provenance: ProvenanceInterface) -> None:
- """Check the 'provenance' fixture produce a working ProvenanceDB object"""
+ """Check the 'provenance' fixture produce a working
+ ProvenanceStoragePostgreSql object"""
assert provenance
provenance.flush() # should be a noop
diff --git a/swh/provenance/tests/test_provenance_db.py b/swh/provenance/tests/test_provenance_db.py
--- a/swh/provenance/tests/test_provenance_db.py
+++ b/swh/provenance/tests/test_provenance_db.py
@@ -4,11 +4,11 @@
# See top-level LICENSE file for more information
from swh.provenance.interface import ProvenanceInterface
-from swh.provenance.postgresql.provenancedb import ProvenanceDB
+from swh.provenance.postgresql.provenance import ProvenanceStoragePostgreSql
def test_provenance_flavor(provenance: ProvenanceInterface) -> None:
- if isinstance(provenance.storage, ProvenanceDB):
+ if isinstance(provenance.storage, ProvenanceStoragePostgreSql):
assert provenance.storage.flavor in (
"with-path",
"without-path",
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Mar 17 2025, 6:25 PM (7 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3223834
Attached To
D6084: Rename PostgreSQL backend and code styling
Event Timeline
Log In to Comment