diff --git a/requirements-swh.txt b/requirements-swh.txt --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -1,4 +1,4 @@ swh.core[db,http] >= 2 swh.counters >= v0.8.0 -swh.model >= 4.4.0 +swh.model >= 6.0.0 swh.objstorage >= 0.2.2 diff --git a/swh/storage/postgresql/converters.py b/swh/storage/postgresql/converters.py --- a/swh/storage/postgresql/converters.py +++ b/swh/storage/postgresql/converters.py @@ -214,8 +214,12 @@ db_revision["committer_date"], db_revision["committer_date_offset_bytes"], ) - assert author, "author is None" - assert committer, "committer is None" + assert (author is None) == ( + db_revision["author_fullname"] is None + ), "author is unexpectedly None" + assert (committer is None) == ( + db_revision["committer_fullname"] is None + ), "committer is unexpectedly None" parents = [] if "parents" in db_revision: diff --git a/swh/storage/proxies/buffer.py b/swh/storage/proxies/buffer.py --- a/swh/storage/proxies/buffer.py +++ b/swh/storage/proxies/buffer.py @@ -65,8 +65,10 @@ if revision.message: s += len(revision.message) - s += len(revision.author.fullname) - s += len(revision.committer.fullname) + if revision.author is not None: + s += len(revision.author.fullname) + if revision.committer is not None: + s += len(revision.committer.fullname) s += sum(len(h) + len(v) for h, v in revision.extra_headers) return s diff --git a/swh/storage/sql/40-funcs.sql b/swh/storage/sql/40-funcs.sql --- a/swh/storage/sql/40-funcs.sql +++ b/swh/storage/sql/40-funcs.sql @@ -562,7 +562,7 @@ select committer_fullname as fullname, committer_name as name, committer_email as email from tmp_revision ) insert into person (fullname, name, email) select distinct on (fullname) fullname, name, email from t - where not exists ( + where t.fullname is not null and not exists ( select 1 from person p where t.fullname = p.fullname diff --git a/swh/storage/sql/60-indexes.sql b/swh/storage/sql/60-indexes.sql --- a/swh/storage/sql/60-indexes.sql +++ b/swh/storage/sql/60-indexes.sql @@ -149,6 +149,14 @@ validate constraint revision_date_offset_not_null; alter table revision validate constraint revision_committer_date_offset_not_null; + + -- if the author is null, then the date must be null + alter table revision add constraint revision_author_date_check check ((date is null) or (author is not null)) not valid; + alter table revision validate constraint revision_author_date_check; + + -- if the committer is null, then the committer_date must be null + alter table revision add constraint revision_committer_date_check check ((committer_date is null) or (committer is not null)) not valid; + alter table revision validate constraint revision_committer_date_check; \endif \if :dbflavor_default diff --git a/swh/storage/sql/upgrades/183.sql b/swh/storage/sql/upgrades/183.sql new file mode 100644 --- /dev/null +++ b/swh/storage/sql/upgrades/183.sql @@ -0,0 +1,42 @@ +-- SWH DB schema upgrade +-- from_version: 182 +-- to_version: 183 +-- description: add support for NULL as revision.author or revision.committer + +insert into dbversion(version, release, description) + values(183, now(), 'Work In Progress'); + +select swh_get_dbflavor() = 'read_replica' as dbflavor_read_replica \gset +select swh_get_dbflavor() != 'read_replica' as dbflavor_does_deduplication \gset +select swh_get_dbflavor() = 'mirror' as dbflavor_mirror \gset +select swh_get_dbflavor() = 'default' as dbflavor_default \gset + +\if :dbflavor_does_deduplication + -- if the author is null, then the date must be null + alter table revision add constraint revision_author_date_check check ((date is null) or (author is not null)) not valid; + alter table revision validate constraint revision_author_date_check; + + -- if the committer is null, then the committer_date must be null + alter table revision add constraint revision_committer_date_check check ((committer_date is null) or (committer is not null)) not valid; + alter table revision validate constraint revision_committer_date_check; +\endif + +create or replace function swh_person_add_from_revision() + returns void + language plpgsql +as $$ +begin + with t as ( + select author_fullname as fullname, author_name as name, author_email as email from tmp_revision + union + select committer_fullname as fullname, committer_name as name, committer_email as email from tmp_revision + ) insert into person (fullname, name, email) + select distinct on (fullname) fullname, name, email from t + where t.fullname is not null and not exists ( + select 1 + from person p + where t.fullname = p.fullname + ); + return; +end +$$; diff --git a/swh/storage/tests/storage_tests.py b/swh/storage/tests/storage_tests.py --- a/swh/storage/tests/storage_tests.py +++ b/swh/storage/tests/storage_tests.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2021 The Software Heritage developers +# Copyright (C) 2015-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -1150,8 +1150,12 @@ revision, synthetic=False, metadata=None, - committer=Person.from_fullname(revision.committer.fullname), - author=Person.from_fullname(revision.author.fullname), + author=None + if revision.author is None + else Person.from_fullname(revision.author.fullname), + committer=None + if revision.committer is None + else Person.from_fullname(revision.committer.fullname), type=RevisionType.GIT, ) for revision in revisions @@ -1308,6 +1312,40 @@ end_missing = swh_storage.revision_missing(ids) assert set(end_missing) == set(ids) - {revision.id} + def test_revision_add_no_author_or_date(self, swh_storage, sample_data): + full_revision = sample_data.revision + + revision = attr.evolve(full_revision, author=None, date=None) + revision = attr.evolve(revision, id=revision.compute_hash()) + actual_result = swh_storage.revision_add([revision]) + assert actual_result == {"revision:add": 1} + + end_missing = swh_storage.revision_missing([revision.id]) + assert list(end_missing) == [] + + assert list(swh_storage.journal_writer.journal.objects) == [ + ("revision", revision) + ] + + assert swh_storage.revision_get([revision.id]) == [revision] + + def test_revision_add_no_committer_or_date(self, swh_storage, sample_data): + full_revision = sample_data.revision + + revision = attr.evolve(full_revision, committer=None, committer_date=None) + revision = attr.evolve(revision, id=revision.compute_hash()) + actual_result = swh_storage.revision_add([revision]) + assert actual_result == {"revision:add": 1} + + end_missing = swh_storage.revision_missing([revision.id]) + assert list(end_missing) == [] + + assert list(swh_storage.journal_writer.journal.objects) == [ + ("revision", revision) + ] + + assert swh_storage.revision_get([revision.id]) == [revision] + def test_extid_add_git(self, swh_storage, sample_data): gitids = [ diff --git a/swh/storage/tests/test_tenacious.py b/swh/storage/tests/test_tenacious.py --- a/swh/storage/tests/test_tenacious.py +++ b/swh/storage/tests/test_tenacious.py @@ -364,7 +364,7 @@ def test_tenacious_proxy_storage_rate_limit( object_type, add_func_name, objects, bad1, bad2 ): - storage = get_tenacious_storage(error_rate_limit={"errors": 1, "window_size": 3}) + storage = get_tenacious_storage(error_rate_limit={"errors": 1, "window_size": 2}) tenacious = storage.storage in_memory = tenacious.storage assert isinstance(tenacious, TenaciousProxyStorage)