diff --git a/swh/provenance/sql/15-flavor.sql b/swh/provenance/sql/15-flavor.sql --- a/swh/provenance/sql/15-flavor.sql +++ b/swh/provenance/sql/15-flavor.sql @@ -1,9 +1,7 @@ -- database flavor create type database_flavor as enum ( - 'with-path', - 'without-path', - 'with-path-denormalized', - 'without-path-denormalized' + 'normalized', + 'denormalized' ); comment on type database_flavor is 'Flavor of the current database'; @@ -17,7 +15,7 @@ comment on column dbflavor.single_row is 'Bogus column to force the table to have a single row'; create or replace function swh_get_dbflavor() returns database_flavor language sql stable as $$ - select coalesce((select flavor from dbflavor), 'with-path'); + select coalesce((select flavor from dbflavor), 'normalized'); $$; comment on function swh_get_dbflavor is 'Get the flavor of the database currently deployed'; diff --git a/swh/provenance/sql/30-schema.sql b/swh/provenance/sql/30-schema.sql --- a/swh/provenance/sql/30-schema.sql +++ b/swh/provenance/sql/30-schema.sql @@ -1,6 +1,5 @@ -- psql variables to get the current database flavor select position('denormalized' in swh_get_dbflavor()::text) = 0 as dbflavor_norm \gset -select position('without-path' in swh_get_dbflavor()::text) = 0 as dbflavor_with_path \gset -- a Git object ID, i.e., a Git-style salted SHA1 checksum create domain sha1_git as bytea check (length(value) = 20); diff --git a/swh/provenance/sql/40-funcs.sql b/swh/provenance/sql/40-funcs.sql --- a/swh/provenance/sql/40-funcs.sql +++ b/swh/provenance/sql/40-funcs.sql @@ -1,6 +1,5 @@ -- psql variables to get the current database flavor select position('denormalized' in swh_get_dbflavor()::text) = 0 as dbflavor_norm \gset -select position('with-path' in swh_get_dbflavor()::text) != 0 as dbflavor_with_path \gset create or replace function swh_mktemp_relation_add() returns void language sql @@ -14,9 +13,8 @@ \if :dbflavor_norm -\if :dbflavor_with_path -- --- with path and normalized +-- normalized -- create or replace function swh_provenance_content_find_first(content_id sha1_git) @@ -174,143 +172,11 @@ end; $$; -\else --- --- without path and normalized --- -create or replace function swh_provenance_content_find_first(content_id sha1_git) - returns table ( - content sha1_git, - revision sha1_git, - date timestamptz, - origin text, - path unix_path - ) - language sql - stable -as $$ - select C.sha1 as content, - R.sha1 as revision, - R.date as date, - O.url as origin, - '\x'::unix_path as path - from content as C - inner join content_in_revision as CR on (CR.content = C.id) - inner join revision as R on (R.id = CR.revision) - left join origin as O on (O.id = R.origin) - where C.sha1 = content_id - order by date, revision, origin asc limit 1 -$$; - -create or replace function swh_provenance_content_find_all(content_id sha1_git, early_cut int) - returns table ( - content sha1_git, - revision sha1_git, - date timestamptz, - origin text, - path unix_path - ) - language sql - stable -as $$ - (select C.sha1 as content, - R.sha1 as revision, - R.date as date, - O.url as origin, - '\x'::unix_path as path - from content as C - inner join content_in_revision as CR on (CR.content = C.id) - inner join revision as R on (R.id = CR.revision) - left join origin as O on (O.id = R.origin) - where C.sha1 = content_id) - union - (select C.sha1 as content, - R.sha1 as revision, - R.date as date, - O.url as origin, - '\x'::unix_path as path - from content as C - inner join content_in_directory as CD on (CD.content = C.id) - inner join directory_in_revision as DR on (DR.directory = CD.directory) - inner join revision as R on (R.id = DR.revision) - left join origin as O on (O.id = R.origin) - where C.sha1 = content_id) - order by date, revision, origin, path limit early_cut -$$; - -create or replace function swh_provenance_relation_add_from_temp( - rel_table regclass, src_table regclass, dst_table regclass -) - returns void - language plpgsql - volatile -as $$ - begin - execute format( - 'insert into %s - select S.id, D.id - from tmp_relation_add as V - inner join %s as S on (S.sha1 = V.src) - inner join %s as D on (D.sha1 = V.dst) - on conflict do nothing', - rel_table, src_table, dst_table - ); - end; -$$; - -create or replace function swh_provenance_relation_get( - rel_table regclass, src_table regclass, dst_table regclass, filter rel_flt, sha1s sha1_git[] -) - returns table ( - src sha1_git, - dst sha1_git, - path unix_path - ) - language plpgsql - stable -as $$ - declare - src_field text; - dst_field text; - filter_result text; - begin - if rel_table = 'revision_before_revision'::regclass then - src_field := 'prev'; - dst_field := 'next'; - else - src_field := src_table::text; - dst_field := dst_table::text; - end if; - - case filter - when 'filter-src'::rel_flt then - filter_result := 'where S.sha1 = any($1)'; - when 'filter-dst'::rel_flt then - filter_result := 'where D.sha1 = any($1)'; - else - filter_result := ''; - end case; - - return query execute format( - 'select S.sha1 as src, D.sha1 as dst, NULL::unix_path as path - from %s as R - inner join %s as S on (S.id = R.' || src_field || ') - inner join %s as D on (D.id = R.' || dst_field || ') - ' || filter_result, - rel_table, src_table, dst_table - ) using sha1s; - end; -$$; - --- :dbflavor_with_path -\endif - -- :dbflavor_norm \else -\if :dbflavor_with_path -- --- with path and denormalized +-- denormalized -- create or replace function swh_provenance_content_find_first(content_id sha1_git) @@ -522,188 +388,5 @@ end; $$; -\else --- --- without path and denormalized --- -create or replace function swh_provenance_content_find_first(content_id sha1_git) - returns table ( - content sha1_git, - revision sha1_git, - date timestamptz, - origin text, - path unix_path - ) - language sql - stable -as $$ - select CL.sha1 as content, - R.sha1 as revision, - R.date as date, - O.url as origin, - '\x'::unix_path as path - from ( - select C.sha1, unnest(revision) as revision - from content_in_revision as CR - inner join content as C on (C.id = CR.content) - where C.sha1=content_id - ) as CL - inner join revision as R on (R.id = CL.revision) - left join origin as O on (O.id = R.origin) - order by date, revision, origin, path asc limit 1 -$$; - -create or replace function swh_provenance_content_find_all(content_id sha1_git, early_cut int) - returns table ( - content sha1_git, - revision sha1_git, - date timestamptz, - origin text, - path unix_path - ) - language sql - stable -as $$ - (with - cntrev as ( - select C.sha1 as sha1, - unnest(CR.revision) as revision - from content_in_revision as CR - inner join content as C on (C.id = CR.content) - where C.sha1 = content_id) - select CR.sha1 as content, - R.sha1 as revision, - R.date as date, - O.url as origin, - '\x'::unix_path as path - from cntrev as CR - inner join revision as R on (R.id = CR.revision) - left join origin as O on (O.id = R.origin)) - union - (with - cntdir as ( - select C.sha1 as sha1, - unnest(CD.directory) as directory - from content as C - inner join content_in_directory as CD on (CD.content = C.id) - where C.sha1 = content_id), - cntrev as ( - select CD.sha1 as sha1, - unnest(DR.revision) as revision - from cntdir as CD - inner join directory_in_revision as DR on (DR.directory = CD.directory)) - select CR.sha1 as content, - R.sha1 as revision, - R.date as date, - O.url as origin, - '\x'::unix_path as path - from cntrev as CR - inner join revision as R on (R.id = CR.revision) - left join origin as O on (O.id = R.origin)) - order by date, revision, origin, path limit early_cut -$$; - -create or replace function swh_provenance_relation_add_from_temp( - rel_table regclass, src_table regclass, dst_table regclass -) - returns void - language plpgsql - volatile -as $$ - declare - select_fields text; - group_entries text; - on_conflict text; - begin - if src_table in ('content'::regclass, 'directory'::regclass) then - select_fields := 'array_agg(D.id)'; - group_entries := 'group by S.id'; - on_conflict := format(' - (%s) do update - set %s=array( - select distinct unnest( - %s.' || dst_table::text || ' || excluded.' || dst_table::text || ' - ) - )', - src_table, dst_table, rel_table, rel_table - ); - else - select_fields := 'D.id'; - group_entries := ''; - on_conflict := 'do nothing'; - end if; - - execute format( - 'insert into %s - select S.id, ' || select_fields || ' - from tmp_relation_add as V - inner join %s as S on (S.sha1 = V.src) - inner join %s as D on (D.sha1 = V.dst) - ' || group_entries || ' - on conflict ' || on_conflict, - rel_table, src_table, dst_table - ); - end; -$$; - -create or replace function swh_provenance_relation_get( - rel_table regclass, src_table regclass, dst_table regclass, filter rel_flt, sha1s sha1_git[] -) - returns table ( - src sha1_git, - dst sha1_git, - path unix_path - ) - language plpgsql - stable -as $$ - declare - src_field text; - dst_field text; - proj_unnested text; - filter_inner_result text; - filter_outer_result text; - begin - if rel_table = 'revision_before_revision'::regclass then - src_field := 'prev'; - dst_field := 'next'; - else - src_field := src_table::text; - dst_field := dst_table::text; - end if; - - if src_table in ('content'::regclass, 'directory'::regclass) then - proj_unnested := 'unnest(R.' || dst_field || ') as dst'; - else - proj_unnested := 'R.' || dst_field || ' as dst'; - end if; - - case filter - when 'filter-src'::rel_flt then - filter_inner_result := 'where S.sha1 = any($1)'; - filter_outer_result := ''; - when 'filter-dst'::rel_flt then - filter_inner_result := ''; - filter_outer_result := 'where D.sha1 = any($1)'; - else - filter_inner_result := ''; - filter_outer_result := ''; - end case; - - return query execute format( - 'select CL.src, D.sha1 as dst, NULL::unix_path as path - from (select S.sha1 as src, ' || proj_unnested || ' - from %s as R - inner join %s as S on (S.id = R.' || src_field || ') - ' || filter_inner_result || ') as CL - inner join %s as D on (D.id = CL.dst) - ' || filter_outer_result, - rel_table, src_table, dst_table - ) using sha1s; - end; -$$; - -\endif --- :dbflavor_with_path \endif -- :dbflavor_norm diff --git a/swh/provenance/sql/upgrades/004.sql b/swh/provenance/sql/upgrades/004.sql new file mode 100644 --- /dev/null +++ b/swh/provenance/sql/upgrades/004.sql @@ -0,0 +1,26 @@ +-- SWH provenance DB schema upgrade +-- from_version: 3 +-- to_version: 4 +-- description: rename db flavor (without-path heving been removed) +-- will fail if the db is using a without-path flavor. +alter type database_flavor rename value 'with-path' to 'normalized'; +alter type database_flavor rename value 'with-path-denormalized' to 'denormalized'; + +alter type database_flavor rename to database_flavor_old; + +create type database_flavor as enum ( + 'normalized', + 'denormalized' +); +comment on type database_flavor is 'Flavor of the current database'; + +drop function swh_get_dbflavor; + +alter table dbflavor + alter column flavor type database_flavor using flavor::text::database_flavor; + +create function swh_get_dbflavor() returns database_flavor language sql stable as $$ + select coalesce((select flavor from dbflavor), 'normalized'); +$$; + +drop type database_flavor_old; diff --git a/swh/provenance/storage/interface.py b/swh/provenance/storage/interface.py --- a/swh/provenance/storage/interface.py +++ b/swh/provenance/storage/interface.py @@ -223,7 +223,3 @@ provenance model. This method is used only in tests. """ ... - - @remote_api_endpoint("with_path") - def with_path(self) -> bool: - ... diff --git a/swh/provenance/storage/journal.py b/swh/provenance/storage/journal.py --- a/swh/provenance/storage/journal.py +++ b/swh/provenance/storage/journal.py @@ -147,6 +147,3 @@ self, relation: RelationType ) -> Dict[Sha1Git, Set[RelationData]]: return self.storage.relation_get_all(relation) - - def with_path(self) -> bool: - return self.storage.with_path() diff --git a/swh/provenance/storage/postgresql.py b/swh/provenance/storage/postgresql.py --- a/swh/provenance/storage/postgresql.py +++ b/swh/provenance/storage/postgresql.py @@ -51,7 +51,7 @@ class ProvenanceStoragePostgreSql: - current_version = 3 + current_version = 4 def __init__( self, page_size: Optional[int] = None, raise_on_commit: bool = False, **kwargs @@ -221,18 +221,17 @@ @statsd.timed(metric=STORAGE_DURATION_METRIC, tags={"method": "location_add"}) @handle_raise_on_commit def location_add(self, paths: Dict[Sha1Git, bytes]) -> bool: - if self.with_path(): - values = [(path,) for path in paths.values()] - if values: - sql = """ - INSERT INTO location(path) VALUES %s - ON CONFLICT DO NOTHING - """ - page_size = self.page_size or len(values) - with self.transaction() as cursor: - psycopg2.extras.execute_values( - cursor, sql, argslist=values, page_size=page_size - ) + values = [(path,) for path in paths.values()] + if values: + sql = """ + INSERT INTO location(path) VALUES %s + ON CONFLICT DO NOTHING + """ + page_size = self.page_size or len(values) + with self.transaction() as cursor: + psycopg2.extras.execute_values( + cursor, sql, argslist=values, page_size=page_size + ) return True @statsd.timed(metric=STORAGE_DURATION_METRIC, tags={"method": "location_get_all"}) @@ -392,7 +391,3 @@ src = row.pop("src") result.setdefault(src, set()).add(RelationData(**row)) return result - - @statsd.timed(metric=STORAGE_DURATION_METRIC, tags={"method": "with_path"}) - def with_path(self) -> bool: - return "with-path" in self.flavor diff --git a/swh/provenance/tests/conftest.py b/swh/provenance/tests/conftest.py --- a/swh/provenance/tests/conftest.py +++ b/swh/provenance/tests/conftest.py @@ -37,7 +37,7 @@ partial( initialize_database_for_module, modname="provenance", - flavor="with-path", + flavor="normalized", version=ProvenanceStoragePostgreSql.current_version, ) ], @@ -78,7 +78,7 @@ init_admin_extensions("swh.provenance", postgres_provenance.dsn) populate_database_for_package( - "swh.provenance", postgres_provenance.dsn, flavor="with-path" + "swh.provenance", postgres_provenance.dsn, flavor="normalized" ) # in test sessions, we DO want to raise any exception occurring at commit time with get_provenance( diff --git a/swh/provenance/tests/test_cli.py b/swh/provenance/tests/test_cli.py --- a/swh/provenance/tests/test_cli.py +++ b/swh/provenance/tests/test_cli.py @@ -3,7 +3,7 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from typing import Dict, List, Set +from typing import Dict, List from _pytest.monkeypatch import MonkeyPatch from click.testing import CliRunner @@ -56,18 +56,13 @@ } -@pytest.mark.parametrize( - "flavor, dbtables", (("with-path", TABLES), ("without-path", TABLES)) -) +@pytest.mark.parametrize("flavor", ("normalized", "denormalized")) def test_cli_db_create_and_init_db_with_flavor( monkeypatch: MonkeyPatch, postgresql: psycopg2.extensions.connection, flavor: str, - dbtables: Set[str], ) -> None: - """Test that 'swh db init provenance' works with flavors - - for both with-path and without-path flavors""" + """Test that 'swh db init provenance' works with flavors""" dbname = f"{flavor}-db" @@ -99,11 +94,11 @@ f"and table_catalog = '{dbname}'" ) tables = set(x for (x,) in cur.fetchall()) - assert tables == dbtables + assert tables == TABLES def test_cli_init_db_default_flavor(postgresql: psycopg2.extensions.connection) -> None: - "Test that 'swh db init provenance' defaults to a with-path flavored DB" + "Test that 'swh db init provenance' defaults to a normalized flavored DB" dbname = postgresql.dsn init_admin_extensions("swh.provenance", dbname) @@ -112,7 +107,7 @@ with postgresql.cursor() as cur: cur.execute("select swh_get_dbflavor()") - assert cur.fetchone() == ("with-path",) + assert cur.fetchone() == ("normalized",) @pytest.mark.origin_layer diff --git a/swh/provenance/tests/test_provenance_db.py b/swh/provenance/tests/test_provenance_db.py --- a/swh/provenance/tests/test_provenance_db.py +++ b/swh/provenance/tests/test_provenance_db.py @@ -10,8 +10,6 @@ def test_provenance_flavor(provenance: ProvenanceInterface) -> None: if isinstance(provenance.storage, ProvenanceStoragePostgreSql): assert provenance.storage.flavor in ( - "with-path", - "without-path", - "with-path-denormalized", - "without-path-denormalized", + "normalized", + "denormalized", ) diff --git a/swh/provenance/tests/test_provenance_storage.py b/swh/provenance/tests/test_provenance_storage.py --- a/swh/provenance/tests/test_provenance_storage.py +++ b/swh/provenance/tests/test_provenance_storage.py @@ -106,11 +106,7 @@ for entry in dir["entries"] } assert provenance_storage.location_add(paths) - - if provenance_storage.with_path(): - assert provenance_storage.location_get_all() == paths - else: - assert not provenance_storage.location_get_all() + assert provenance_storage.location_get_all() == paths @pytest.mark.origin_layer def test_provenance_storage_origin( @@ -272,18 +268,6 @@ # Test content_find_first and content_find_all, first only executing the # revision-content algorithm, then adding the origin-revision layer. - def adapt_result( - result: Optional[ProvenanceResult], with_path: bool - ) -> Optional[ProvenanceResult]: - if result is not None: - return ProvenanceResult( - result.content, - result.revision, - result.date, - result.origin, - result.path if with_path else b"", - ) - return result # Execute the revision-content algorithm on both storages. revisions = [ @@ -293,28 +277,23 @@ revision_add(provenance, archive, revisions) revision_add(Provenance(provenance_storage), archive, revisions) - assert adapt_result( - ProvenanceResult( - content=hash_to_bytes("20329687bb9c1231a7e05afe86160343ad49b494"), - revision=hash_to_bytes("c0d8929936631ecbcf9147be6b8aa13b13b014e4"), - date=datetime.fromtimestamp(1000000000.0, timezone.utc), - origin=None, - path=b"A/B/C/a", - ), - provenance_storage.with_path(), + assert ProvenanceResult( + content=hash_to_bytes("20329687bb9c1231a7e05afe86160343ad49b494"), + revision=hash_to_bytes("c0d8929936631ecbcf9147be6b8aa13b13b014e4"), + date=datetime.fromtimestamp(1000000000.0, timezone.utc), + origin=None, + path=b"A/B/C/a", ) == provenance_storage.content_find_first( hash_to_bytes("20329687bb9c1231a7e05afe86160343ad49b494") ) for cnt in {cnt["sha1_git"] for cnt in data["content"]}: - assert adapt_result( - provenance.storage.content_find_first(cnt), - provenance_storage.with_path(), + assert provenance.storage.content_find_first( + cnt ) == provenance_storage.content_find_first(cnt) - assert { - adapt_result(occur, provenance_storage.with_path()) - for occur in provenance.storage.content_find_all(cnt) - } == set(provenance_storage.content_find_all(cnt)) + assert set(provenance.storage.content_find_all(cnt)) == set( + provenance_storage.content_find_all(cnt) + ) @pytest.mark.origin_layer def test_provenance_storage_find_origin_layer( @@ -341,18 +320,6 @@ # Test content_find_first and content_find_all, first only executing the # revision-content algorithm, then adding the origin-revision layer. - def adapt_result( - result: Optional[ProvenanceResult], with_path: bool - ) -> Optional[ProvenanceResult]: - if result is not None: - return ProvenanceResult( - result.content, - result.revision, - result.date, - result.origin, - result.path if with_path else b"", - ) - return result # Execute the origin-revision algorithm on both storages. origins = [ @@ -363,28 +330,23 @@ origin_add(provenance, archive, origins) origin_add(Provenance(provenance_storage), archive, origins) - assert adapt_result( - ProvenanceResult( - content=hash_to_bytes("20329687bb9c1231a7e05afe86160343ad49b494"), - revision=hash_to_bytes("c0d8929936631ecbcf9147be6b8aa13b13b014e4"), - date=datetime.fromtimestamp(1000000000.0, timezone.utc), - origin="https://cmdbts2", - path=b"A/B/C/a", - ), - provenance_storage.with_path(), + assert ProvenanceResult( + content=hash_to_bytes("20329687bb9c1231a7e05afe86160343ad49b494"), + revision=hash_to_bytes("c0d8929936631ecbcf9147be6b8aa13b13b014e4"), + date=datetime.fromtimestamp(1000000000.0, timezone.utc), + origin="https://cmdbts2", + path=b"A/B/C/a", ) == provenance_storage.content_find_first( hash_to_bytes("20329687bb9c1231a7e05afe86160343ad49b494") ) for cnt in {cnt["sha1_git"] for cnt in data["content"]}: - assert adapt_result( - provenance.storage.content_find_first(cnt), - provenance_storage.with_path(), + assert provenance.storage.content_find_first( + cnt ) == provenance_storage.content_find_first(cnt) - assert { - adapt_result(occur, provenance_storage.with_path()) - for occur in provenance.storage.content_find_all(cnt) - } == set(provenance_storage.content_find_all(cnt)) + assert set(provenance.storage.content_find_all(cnt)) == set( + provenance_storage.content_find_all(cnt) + ) def test_types(self, provenance_storage: ProvenanceStorageInterface) -> None: """Checks all methods of ProvenanceStorageInterface are implemented by this @@ -479,15 +441,14 @@ dsts = {rel.dst for rels in data.values() for rel in rels} if dst != "origin": assert entity_add(storage, EntityType(dst), dsts) - if storage.with_path(): - assert storage.location_add( - { - hashlib.sha1(rel.path).digest(): rel.path - for rels in data.values() - for rel in rels - if rel.path is not None - } - ) + assert storage.location_add( + { + hashlib.sha1(rel.path).digest(): rel.path + for rels in data.values() + for rel in rels + if rel.path is not None + } + ) assert data assert storage.relation_add(relation, data) @@ -496,7 +457,6 @@ relation_compare_result( storage.relation_get(relation, [src_sha1]), {src_sha1: data[src_sha1]}, - storage.with_path(), ) for dst_sha1 in dsts: relation_compare_result( @@ -510,22 +470,18 @@ for src_sha1, rels in data.items() if dst_sha1 in {rel.dst for rel in rels} }, - storage.with_path(), ) relation_compare_result( - storage.relation_get_all(relation), data, storage.with_path() + storage.relation_get_all(relation), + data, ) def relation_compare_result( computed: Dict[Sha1Git, Set[RelationData]], expected: Dict[Sha1Git, Set[RelationData]], - with_path: bool, ) -> None: assert { - src_sha1: { - RelationData(dst=rel.dst, path=rel.path if with_path else None) - for rel in rels - } + src_sha1: {RelationData(dst=rel.dst, path=rel.path) for rel in rels} for src_sha1, rels in expected.items() } == computed diff --git a/swh/provenance/tests/test_provenance_storage_without_path.py b/swh/provenance/tests/test_provenance_storage_denormalized.py rename from swh/provenance/tests/test_provenance_storage_without_path.py rename to swh/provenance/tests/test_provenance_storage_denormalized.py --- a/swh/provenance/tests/test_provenance_storage_without_path.py +++ b/swh/provenance/tests/test_provenance_storage_denormalized.py @@ -17,7 +17,7 @@ partial( initialize_database_for_module, modname="provenance", - flavor="without-path", + flavor="denormalized", version=ProvenanceStoragePostgreSql.current_version, ) ], diff --git a/swh/provenance/tests/test_provenance_storage_rabbitmq.py b/swh/provenance/tests/test_provenance_storage_rabbitmq.py --- a/swh/provenance/tests/test_provenance_storage_rabbitmq.py +++ b/swh/provenance/tests/test_provenance_storage_rabbitmq.py @@ -10,7 +10,7 @@ from swh.provenance import get_provenance_storage from swh.provenance.storage.interface import ProvenanceStorageInterface -from .test_provenance_storage import TestProvenanceStorage # noqa: F401 +from .test_provenance_storage import TestProvenanceStorage as _TestProvenanceStorage @pytest.fixture() @@ -41,3 +41,8 @@ yield storage finally: server.stop() + + +@pytest.mark.rabbitmq +class TestProvenanceStorageRabbitMQ(_TestProvenanceStorage): + pass diff --git a/swh/provenance/tests/test_provenance_storage_with_path_denormalized.py b/swh/provenance/tests/test_provenance_storage_with_path_denormalized.py deleted file mode 100644 --- a/swh/provenance/tests/test_provenance_storage_with_path_denormalized.py +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright (C) 2021-2022 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -from functools import partial - -from pytest_postgresql import factories - -from swh.core.db.db_utils import initialize_database_for_module -from swh.provenance.storage.postgresql import ProvenanceStoragePostgreSql - -from .test_provenance_storage import TestProvenanceStorage # noqa: F401 - -provenance_postgresql_proc = factories.postgresql_proc( - load=[ - partial( - initialize_database_for_module, - modname="provenance", - flavor="with-path-denormalized", - version=ProvenanceStoragePostgreSql.current_version, - ) - ], -) diff --git a/swh/provenance/tests/test_provenance_storage_without_path_denormalized.py b/swh/provenance/tests/test_provenance_storage_without_path_denormalized.py deleted file mode 100644 --- a/swh/provenance/tests/test_provenance_storage_without_path_denormalized.py +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright (C) 2021-2022 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -from functools import partial - -from pytest_postgresql import factories - -from swh.core.db.db_utils import initialize_database_for_module -from swh.provenance.storage.postgresql import ProvenanceStoragePostgreSql - -from .test_provenance_storage import TestProvenanceStorage # noqa: F401 - -provenance_postgresql_proc = factories.postgresql_proc( - load=[ - partial( - initialize_database_for_module, - modname="provenance", - flavor="without-path-denormalized", - version=ProvenanceStoragePostgreSql.current_version, - ) - ], -) diff --git a/swh/provenance/tests/test_revision_content_layer.py b/swh/provenance/tests/test_revision_content_layer.py --- a/swh/provenance/tests/test_revision_content_layer.py +++ b/swh/provenance/tests/test_revision_content_layer.py @@ -191,11 +191,6 @@ "revision": set(), } - def maybe_path(path: str) -> Optional[bytes]: - if provenance.storage.with_path(): - return path.encode("utf-8") - return None - for synth_rev in synthetic_revision_content_result(syntheticfile): revision = revisions[synth_rev["sha1"]] entry = RevisionEntry( @@ -250,7 +245,7 @@ # check for R-C (direct) entries # these are added directly in the content_early_in_rev table rows["content_in_revision"] |= set( - (x["dst"], x["src"], maybe_path(x["path"])) for x in synth_rev["R_C"] + (x["dst"], x["src"], x["path"].encode()) for x in synth_rev["R_C"] ) assert rows["content_in_revision"] == { (src, rel.dst, rel.path) @@ -277,7 +272,7 @@ # ... + a number of rows in the "directory_in_rev" table... # check for R-D entries rows["directory_in_revision"] |= set( - (x["dst"], x["src"], maybe_path(x["path"])) for x in synth_rev["R_D"] + (x["dst"], x["src"], x["path"].encode()) for x in synth_rev["R_D"] ) assert rows["directory_in_revision"] == { (src, rel.dst, rel.path) @@ -296,7 +291,7 @@ # for content of the directory. # check for D-C entries rows["content_in_directory"] |= set( - (x["dst"], x["src"], maybe_path(x["path"])) for x in synth_rev["D_C"] + (x["dst"], x["src"], x["path"].encode()) for x in synth_rev["D_C"] ) assert rows["content_in_directory"] == { (src, rel.dst, rel.path) @@ -312,14 +307,13 @@ == provenance.storage.content_get([dc["dst"]])[dc["dst"]].timestamp() ), synth_rev["msg"] - if provenance.storage.with_path(): - # check for location entries - rows["location"] |= set(x["path"].encode() for x in synth_rev["R_C"]) - rows["location"] |= set(x["path"].encode() for x in synth_rev["D_C"]) - rows["location"] |= set(x["path"].encode() for x in synth_rev["R_D"]) - assert rows["location"] == set( - provenance.storage.location_get_all().values() - ), synth_rev["msg"] + # check for location entries + rows["location"] |= set(x["path"].encode() for x in synth_rev["R_C"]) + rows["location"] |= set(x["path"].encode() for x in synth_rev["D_C"]) + rows["location"] |= set(x["path"].encode() for x in synth_rev["R_D"]) + assert rows["location"] == set( + provenance.storage.location_get_all().values() + ), synth_rev["msg"] @pytest.mark.parametrize( @@ -353,11 +347,6 @@ for revision in data["revision"] ] - def maybe_path(path: str) -> str: - if provenance.storage.with_path(): - return path - return "" - if batch: revision_add(provenance, archive, revisions, lower=lower, mindepth=mindepth) else: @@ -376,12 +365,12 @@ for rc in synth_rev["R_C"]: expected_occurrences.setdefault(rc["dst"].hex(), []).append( - (rev_id, rev_ts, None, maybe_path(rc["path"])) + (rev_id, rev_ts, None, rc["path"]) ) for dc in synth_rev["D_C"]: assert dc["prefix"] is not None # to please mypy expected_occurrences.setdefault(dc["dst"].hex(), []).append( - (rev_id, rev_ts, None, maybe_path(dc["prefix"] + "/" + dc["path"])) + (rev_id, rev_ts, None, dc["prefix"] + "/" + dc["path"]) ) for content_id, results in expected_occurrences.items(): @@ -396,11 +385,7 @@ ) for occur in provenance.content_find_all(hash_to_bytes(content_id)) ] - if provenance.storage.with_path(): - # this is not true if the db stores no path, because a same content - # that appears several times in a given revision may be reported - # only once by content_find_all() - assert len(db_occurrences) == len(expected) + assert len(db_occurrences) == len(expected) assert set(db_occurrences) == set(expected) @@ -478,5 +463,4 @@ assert occur.revision.hex() == rev_id assert occur.date.timestamp() == ts assert occur.origin is None - if provenance.storage.with_path(): - assert occur.path.decode() in paths + assert occur.path.decode() in paths