diff --git a/swh/storage/postgresql/converters.py b/swh/storage/postgresql/converters.py --- a/swh/storage/postgresql/converters.py +++ b/swh/storage/postgresql/converters.py @@ -214,8 +214,12 @@ db_revision["committer_date"], db_revision["committer_date_offset_bytes"], ) - assert author, "author is None" - assert committer, "committer is None" + assert (author is None) == ( + db_revision["author_fullname"] is None + ), "author is unexpectedly None" + assert (committer is None) == ( + db_revision["committer_fullname"] is None + ), "committer is unexpectedly None" parents = [] if "parents" in db_revision: @@ -234,9 +238,9 @@ return Revision( id=db_revision["id"], - author=author, + author=author, # type: ignore # will pass in swh-model v5.1.0 date=date, - committer=committer, + committer=committer, # type: ignore # will pass in swh-model v5.1.0 committer_date=committer_date, type=RevisionType(db_revision["type"]), directory=db_revision["directory"], diff --git a/swh/storage/sql/40-funcs.sql b/swh/storage/sql/40-funcs.sql --- a/swh/storage/sql/40-funcs.sql +++ b/swh/storage/sql/40-funcs.sql @@ -562,7 +562,7 @@ select committer_fullname as fullname, committer_name as name, committer_email as email from tmp_revision ) insert into person (fullname, name, email) select distinct on (fullname) fullname, name, email from t - where not exists ( + where t.fullname is not null and not exists ( select 1 from person p where t.fullname = p.fullname diff --git a/swh/storage/sql/60-indexes.sql b/swh/storage/sql/60-indexes.sql --- a/swh/storage/sql/60-indexes.sql +++ b/swh/storage/sql/60-indexes.sql @@ -149,6 +149,14 @@ validate constraint revision_date_offset_not_null; alter table revision validate constraint revision_committer_date_offset_not_null; + + -- if the author is null, then the date must be null + alter table revision add constraint revision_author_date_check check ((date is null) or (author is not null)) not valid; + alter table revision validate constraint revision_author_date_check; + + -- if the committer is null, then the committer_date must be null + alter table revision add constraint revision_committer_date_check check ((committer_date is null) or (committer is not null)) not valid; + alter table revision validate constraint revision_committer_date_check; \endif \if :dbflavor_default diff --git a/swh/storage/sql/upgrades/183.sql b/swh/storage/sql/upgrades/183.sql new file mode 100644 --- /dev/null +++ b/swh/storage/sql/upgrades/183.sql @@ -0,0 +1,42 @@ +-- SWH DB schema upgrade +-- from_version: 182 +-- to_version: 183 +-- description: add support for NULL as revision.author or revision.committer + +insert into dbversion(version, release, description) + values(183, now(), 'Work In Progress'); + +select swh_get_dbflavor() = 'read_replica' as dbflavor_read_replica \gset +select swh_get_dbflavor() != 'read_replica' as dbflavor_does_deduplication \gset +select swh_get_dbflavor() = 'mirror' as dbflavor_mirror \gset +select swh_get_dbflavor() = 'default' as dbflavor_default \gset + +\if :dbflavor_does_deduplication + -- if the author is null, then the date must be null + alter table revision add constraint revision_author_date_check check ((date is null) or (author is not null)) not valid; + alter table revision validate constraint revision_author_date_check; + + -- if the committer is null, then the committer_date must be null + alter table revision add constraint revision_committer_date_check check ((committer_date is null) or (committer is not null)) not valid; + alter table revision validate constraint revision_committer_date_check; +\endif + +create or replace function swh_person_add_from_revision() + returns void + language plpgsql +as $$ +begin + with t as ( + select author_fullname as fullname, author_name as name, author_email as email from tmp_revision + union + select committer_fullname as fullname, committer_name as name, committer_email as email from tmp_revision + ) insert into person (fullname, name, email) + select distinct on (fullname) fullname, name, email from t + where t.fullname is not null and not exists ( + select 1 + from person p + where t.fullname = p.fullname + ); + return; +end +$$; diff --git a/swh/storage/tests/storage_tests.py b/swh/storage/tests/storage_tests.py --- a/swh/storage/tests/storage_tests.py +++ b/swh/storage/tests/storage_tests.py @@ -1,9 +1,10 @@ -# Copyright (C) 2015-2021 The Software Heritage developers +# Copyright (C) 2015-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from collections import defaultdict +import contextlib import datetime from datetime import timedelta import inspect @@ -55,6 +56,16 @@ ) +@contextlib.contextmanager +def disable_attrs_validator(): + v = attr.validators.get_disabled() + try: + attr.validators.set_disabled(True) + yield + finally: + attr.validators.set_disabled(v) + + def transform_entries( storage: StorageInterface, dir_: Directory, *, prefix: bytes = b"" ) -> Iterator[Dict[str, Any]]: @@ -1308,6 +1319,52 @@ end_missing = swh_storage.revision_missing(ids) assert set(end_missing) == set(ids) - {revision.id} + def test_revision_add_no_author_or_date(self, swh_storage, sample_data): + full_revision = sample_data.revision + + with disable_attrs_validator(): + # TODO: remove context manager when support for author=None + # lands in swh-model + revision = attr.evolve(full_revision, author=None, date=None) + revision = attr.evolve(revision, id=revision.compute_hash()) + actual_result = swh_storage.revision_add([revision]) + assert actual_result == {"revision:add": 1} + + end_missing = swh_storage.revision_missing([revision.id]) + assert list(end_missing) == [] + + assert list(swh_storage.journal_writer.journal.objects) == [ + ("revision", revision) + ] + + with disable_attrs_validator(): + # TODO: remove context manager when support for author=None + # lands in swh-model + assert swh_storage.revision_get([revision.id]) == [revision] + + def test_revision_add_no_committer_or_date(self, swh_storage, sample_data): + full_revision = sample_data.revision + + with disable_attrs_validator(): + # TODO: remove context manager when support for author=None + # lands in swh-model + revision = attr.evolve(full_revision, committer=None, committer_date=None) + revision = attr.evolve(revision, id=revision.compute_hash()) + actual_result = swh_storage.revision_add([revision]) + assert actual_result == {"revision:add": 1} + + end_missing = swh_storage.revision_missing([revision.id]) + assert list(end_missing) == [] + + assert list(swh_storage.journal_writer.journal.objects) == [ + ("revision", revision) + ] + + with disable_attrs_validator(): + # TODO: remove context manager when support for author=None + # lands in swh-model + assert swh_storage.revision_get([revision.id]) == [revision] + def test_extid_add_git(self, swh_storage, sample_data): gitids = [