diff --git a/requirements-test.txt b/requirements-test.txt --- a/requirements-test.txt +++ b/requirements-test.txt @@ -6,6 +6,7 @@ pytest < 7.0.0 # v7.0.0 removed _pytest.tmpdir.TempdirFactory, which is used by some of the pytest plugins we use pytest-django pytest-mock +pytest-postgresql requests-mock != 1.9.0, != 1.9.1 swh.core[http] >= 0.0.95 swh.loader.git >= 0.8.0 diff --git a/swh/web/auth/management/__init__.py b/swh/web/auth/management/__init__.py new file mode 100644 diff --git a/swh/web/auth/management/commands/__init__.py b/swh/web/auth/management/commands/__init__.py new file mode 100644 diff --git a/swh/web/auth/management/commands/sync_mailmaps.py b/swh/web/auth/management/commands/sync_mailmaps.py new file mode 100644 --- /dev/null +++ b/swh/web/auth/management/commands/sync_mailmaps.py @@ -0,0 +1,118 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU Affero General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import psycopg2 +import psycopg2.extensions +from psycopg2.extras import execute_values + +from django.core.management.base import BaseCommand +from django.db import transaction +from django.db.models import F +from django.db.models.query import QuerySet +from django.utils import timezone + +from swh.web.auth.models import UserMailmap + +DISABLE_MAILMAPS_QUERY = """\ +UPDATE person + SET displayname = NULL + FROM (VALUES %s) AS emails (email) + WHERE person.email = emails.email +""" + +REFRESH_MAILMAPS_QUERY = """\ +UPDATE person + SET displayname = displaynames.displayname + FROM (VALUES %s) AS displaynames (email, displayname) + WHERE + person.email = displaynames.email + AND person.displayname IS DISTINCT FROM displaynames.displayname +""" + + +class Command(BaseCommand): + help = "Synchronize the mailmaps with swh.storage" + + def add_arguments(self, parser): + parser.add_argument("storage_dbconn", type=str) + parser.add_argument( + "--perform", + action="store_true", + help="Perform actions (instead of the default dry-run)", + ) + + def disable_mailmaps( + self, + storage_db: psycopg2.extensions.connection, + mailmaps: "QuerySet[UserMailmap]", + ): + """Return the SQL to disable a set of mailmaps""" + + execute_values( + storage_db.cursor(), + DISABLE_MAILMAPS_QUERY, + ((mailmap.from_email.encode("utf-8"),) for mailmap in mailmaps), + ) + + def refresh_mailmaps( + self, + storage_db: psycopg2.extensions.connection, + mailmaps: "QuerySet[UserMailmap]", + ): + + execute_values( + storage_db.cursor(), + REFRESH_MAILMAPS_QUERY, + ( + ( + mailmap.from_email.encode("utf-8"), + mailmap.full_display_name.encode("utf-8"), + ) + for mailmap in mailmaps + ), + ) + + def handle(self, *args, **options): + verified_mailmaps = UserMailmap.objects.filter(from_email_verified=True) + + # Always refresh display names for person entries with known emails + to_refresh = verified_mailmaps.filter(display_name_activated=True) + + # Only remove display_names if they've been deactivated since they've last been + # processed + to_disable = verified_mailmaps.filter( + display_name_activated=False, + mailmap_last_processing_date__lt=F("last_update_date"), + ) + + process_start = timezone.now() + with transaction.atomic(): + self.stdout.write( + "%d mailmaps to disable, %d mailmaps to refresh%s" + % ( + to_disable.count(), + to_refresh.count(), + (" (dry run)" if not options["perform"] else ""), + ) + ) + + with psycopg2.connect(options["storage_dbconn"]) as db: + self.disable_mailmaps(db, to_disable.select_for_update()) + self.refresh_mailmaps(db, to_refresh.select_for_update()) + if not options["perform"]: + db.rollback() + else: + db.commit() + + if options["perform"]: + updated = to_disable.update( + mailmap_last_processing_date=process_start + ) + to_refresh.update(mailmap_last_processing_date=process_start) + else: + updated = to_disable.count() + to_refresh.count() + + self.stdout.write( + self.style.SUCCESS(f"Synced {updated} mailmaps to swh.storage database") + ) diff --git a/swh/web/auth/models.py b/swh/web/auth/models.py --- a/swh/web/auth/models.py +++ b/swh/web/auth/models.py @@ -20,6 +20,19 @@ db_table = "oidc_user_offline_tokens" +class UserMailmapManager(models.Manager): + """A queryset manager which defers all fields named `*_date` by default, to avoid + resetting them to an old value involuntarily.""" + + def get_queryset(self): + to_defer = [ + field.name + for field in UserMailmap._meta.get_fields() + if field.name.endswith("_date") + ] + return super().get_queryset().defer(*to_defer) + + class UserMailmap(models.Model): """ Model storing mailmap settings submitted by users. @@ -61,3 +74,13 @@ class Meta: app_label = "swh_web_auth" db_table = "user_mailmap" + + # Defer _date fields by default to avoid updating them by mistake + objects = UserMailmapManager() + + @property + def full_display_name(self) -> str: + if self.to_email is not None and self.to_email_verified: + return "%s <%s>" % (self.display_name, self.to_email) + else: + return self.display_name diff --git a/swh/web/tests/auth/test_mailmap.py b/swh/web/tests/auth/test_mailmap.py --- a/swh/web/tests/auth/test_mailmap.py +++ b/swh/web/tests/auth/test_mailmap.py @@ -3,8 +3,17 @@ # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information +from io import StringIO +from typing import Dict + +from psycopg2.extras import execute_values import pytest +from django.core.management import call_command +from django.db import transaction + +from swh.model.model import Person +from swh.web.auth.models import UserMailmap from swh.web.auth.utils import MAILMAP_PERMISSION from swh.web.common.utils import reverse from swh.web.tests.utils import check_api_post_response, create_django_permission @@ -43,3 +52,232 @@ url = reverse(view_name) resp = check_api_post_response(api_client, url, status_code=500) assert "exception" in resp.data + + +def populate_mailmap(): + for (verified, activated) in ( + (False, False), + (False, True), + (True, False), + (True, True), + ): + verified_str = "V" if verified else "" + activated_str = "A" if activated else "" + UserMailmap.objects.create( + from_email=f"from_email{verified_str}{activated_str}@example.com", + display_name=f"Display Name {verified_str} {activated_str}".strip(), + from_email_verified=verified, + display_name_activated=activated, + ) + + +def call_sync_mailmaps(*args) -> str: + out = StringIO() + err = StringIO() + + call_command("sync_mailmaps", *args, stdout=out, stderr=err) + + out.seek(0) + err.seek(0) + assert err.read() == "" + return out.read() + + +MAILMAP_KNOWN_FULLNAMES = ( + "Original Name ", + "Original Name V ", + "Original Name A ", + "Original Name V A ", + "Original Name V A 2 ", + "Original Name V A 3 ", +) + + +MAILMAP_KNOWN_PEOPLE = tuple( + Person.from_fullname(f.encode()) for f in MAILMAP_KNOWN_FULLNAMES +) + + +def init_stub_storage_db(postgresql): + cur = postgresql.cursor() + cur.execute( + """ + CREATE TABLE person ( + fullname bytea PRIMARY KEY, + name bytea, + email bytea, + displayname bytea + ) + """ + ) + execute_values( + cur, + "INSERT INTO person (fullname, name, email) VALUES %s", + (p.to_dict() for p in MAILMAP_KNOWN_PEOPLE), + template="(%(fullname)s, %(name)s, %(email)s)", + ) + cur.execute("CREATE INDEX ON person (email)") + postgresql.commit() + cur.close() + + return postgresql.dsn + + +def get_displaynames(postgresql) -> Dict[str, str]: + with postgresql.cursor() as cur: + cur.execute( + "SELECT fullname, displayname FROM person WHERE displayname IS NOT NULL" + ) + + return {bytes(f).decode("utf-8"): bytes(d).decode("utf-8") for (f, d) in cur} + + +@pytest.mark.django_db(transaction=True) +def test_sync_mailmaps_dry_run(postgresql): + with transaction.atomic(): + populate_mailmap() + + dsn = init_stub_storage_db(postgresql) + + out = call_sync_mailmaps(dsn) + + assert "(dry run)" in out + assert "Synced 1 mailmaps to swh.storage database" in out + + assert get_displaynames(postgresql) == {} + + assert ( + UserMailmap.objects.filter( + from_email_verified=True, + display_name_activated=True, + mailmap_last_processing_date__isnull=False, + ).count() + == 0 + ) + + +@pytest.mark.django_db(transaction=True) +def test_sync_mailmaps_perform(postgresql): + with transaction.atomic(): + populate_mailmap() + + dsn = init_stub_storage_db(postgresql) + + out = call_sync_mailmaps("--perform", dsn) + + assert "(dry run)" not in out + assert "Synced 1 mailmaps to swh.storage database" in out + + expected_displaynames = { + "Original Name V A ": "Display Name V A", + "Original Name V A 2 ": "Display Name V A", + "Original Name V A 3 ": "Display Name V A", + } + + assert get_displaynames(postgresql) == expected_displaynames + + assert ( + UserMailmap.objects.filter( + from_email_verified=True, + display_name_activated=True, + mailmap_last_processing_date__isnull=False, + ).count() + == 1 + ) + + +@pytest.mark.django_db(transaction=True) +def test_sync_mailmaps_with_to_email(postgresql): + with transaction.atomic(): + populate_mailmap() + + dsn = init_stub_storage_db(postgresql) + + call_sync_mailmaps("--perform", dsn) + + expected_displaynames = { + "Original Name V A ": "Display Name V A", + "Original Name V A 2 ": "Display Name V A", + "Original Name V A 3 ": "Display Name V A", + } + + assert get_displaynames(postgresql) == expected_displaynames + + # Add a non-valid to_email + with transaction.atomic(): + for mailmap in UserMailmap.objects.filter( + from_email_verified=True, display_name_activated=True + ): + mailmap.to_email = "to_email@example.com" + mailmap.save() + + call_sync_mailmaps("--perform", dsn) + + assert get_displaynames(postgresql) == expected_displaynames + + # Verify the relevant to_email + with transaction.atomic(): + for mailmap in UserMailmap.objects.filter( + from_email_verified=True, display_name_activated=True + ): + mailmap.to_email_verified = True + mailmap.save() + + call_sync_mailmaps("--perform", dsn) + + new_displayname = "Display Name V A " + expected_displaynames = { + "Original Name V A ": new_displayname, + "Original Name V A 2 ": new_displayname, + "Original Name V A 3 ": new_displayname, + } + + assert get_displaynames(postgresql) == expected_displaynames + + +@pytest.mark.django_db(transaction=True) +def test_sync_mailmaps_disable(postgresql): + """Check that disabling a mailmap only happens once""" + with transaction.atomic(): + populate_mailmap() + + dsn = init_stub_storage_db(postgresql) + + # Do the initial mailmap sync + call_sync_mailmaps("--perform", dsn) + + assert len(get_displaynames(postgresql)) == 3 + + # Disable a display name + with transaction.atomic(): + # Cannot use update() because `last_update_date` would not be updated + for mailmap in UserMailmap.objects.filter( + from_email_verified=True, display_name_activated=True + ): + mailmap.display_name_activated = False + mailmap.save() + + # Sync mailmaps again + out = call_sync_mailmaps("--perform", dsn) + assert "1 mailmaps to disable" in out + + assert get_displaynames(postgresql) == {} + + # Update a displayname by hand + with postgresql.cursor() as cur: + cur.execute( + "UPDATE person SET displayname='Manual Display Name' " + "WHERE fullname='Original Name V A '" + ) + + expected_displaynames = { + "Original Name V A ": "Manual Display Name" + } + + assert get_displaynames(postgresql) == expected_displaynames + + # Sync mailmaps one last time. No mailmaps should be disabled + out = call_sync_mailmaps("--perform", dsn) + assert "0 mailmaps to disable" in out + + assert get_displaynames(postgresql) == expected_displaynames