diff --git a/swh/deposit/api/common.py b/swh/deposit/api/common.py --- a/swh/deposit/api/common.py +++ b/swh/deposit/api/common.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2021 The Software Heritage developers +# Copyright (C) 2017-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -27,7 +27,7 @@ from swh.deposit.api.checks import check_metadata from swh.deposit.api.converters import convert_status_detail from swh.deposit.auth import HasDepositPermission, KeycloakBasicAuthentication -from swh.deposit.models import Deposit +from swh.deposit.models import Deposit, DEPOSIT_METADATA_ONLY from swh.deposit.parsers import parse_xml from swh.deposit.utils import NAMESPACES, compute_metadata_context from swh.model import hashutil @@ -868,6 +868,7 @@ if isinstance(swhid_ref, QualifiedSWHID): deposit.swhid = str(extended_swhid_from_qualified(swhid_ref)) deposit.swhid_context = str(swhid_ref) + deposit.type = DEPOSIT_METADATA_ONLY deposit.complete_date = depo_request.date deposit.reception_date = depo_request.date deposit.save() diff --git a/swh/deposit/migrations/0021_deposit_origin_url_20201124_1438.py b/swh/deposit/migrations/0021_deposit_origin_url_20201124_1438.py --- a/swh/deposit/migrations/0021_deposit_origin_url_20201124_1438.py +++ b/swh/deposit/migrations/0021_deposit_origin_url_20201124_1438.py @@ -21,7 +21,7 @@ migrations.AddField( model_name="deposit", name="origin_url", field=models.TextField(null=True), ), - migrations.RunPython(fill_origin_url), + # migrations.RunPython(fill_origin_url), migrations.AlterField( model_name="deposit", name="external_id", field=models.TextField(null=True), ), diff --git a/swh/deposit/migrations/0022_auto_20220223_1542.py b/swh/deposit/migrations/0022_auto_20220223_1542.py new file mode 100644 --- /dev/null +++ b/swh/deposit/migrations/0022_auto_20220223_1542.py @@ -0,0 +1,58 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from django.db import migrations, models + +from swh.deposit.config import DEPOSIT_STATUS_LOAD_SUCCESS +from swh.deposit.models import ( + DEPOSIT_CODE, + DEPOSIT_METADATA_ONLY, + DEPOSIT_TYPES, + Deposit, +) + + +def fill_deposit_type(apps, schema_editor): + """Fill the new field metadata_only on existing data. This will mark metadata only + deposits all deposits whose status is done, their complete date is exactly the + reception date, and they have their swhid filled in. + + """ + + for deposit in Deposit.objects.all(): + deposit.type = ( + DEPOSIT_METADATA_ONLY + if ( + deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS + and deposit.complete_date == deposit.reception_date + and deposit.complete_date is not None + and deposit.swhid is not None + and deposit.swhid_context is not None + ) + else DEPOSIT_CODE + ) + deposit.save() + + +class Migration(migrations.Migration): + + dependencies = [ + ("deposit", "0021_deposit_origin_url_20201124_1438"), + ] + + operations = [ + migrations.AddField( + model_name="deposit", + name="type", + field=models.CharField( + choices=DEPOSIT_TYPES, default=DEPOSIT_CODE, max_length=4, + ), + preserve_default=False, + ), + # Migrate and make the operations possibly reversible + migrations.RunPython( + fill_deposit_type, reverse_code=migrations.RunPython.noop, + ), + ] diff --git a/swh/deposit/models.py b/swh/deposit/models.py --- a/swh/deposit/models.py +++ b/swh/deposit/models.py @@ -114,6 +114,15 @@ ) +DEPOSIT_METADATA_ONLY = "meta" +DEPOSIT_CODE = "code" + +DEPOSIT_TYPES = [ + (DEPOSIT_METADATA_ONLY, DEPOSIT_METADATA_ONLY), + (DEPOSIT_CODE, DEPOSIT_CODE), +] + + class Deposit(models.Model): """Deposit reception table @@ -147,6 +156,8 @@ load_task_id = models.TextField( blank=True, null=True, verbose_name="Scheduler's associated loading task id" ) + type = models.CharField(max_length=4, choices=DEPOSIT_TYPES, default=DEPOSIT_CODE) + raw_metadata: Optional[str] = None class Meta: @@ -156,12 +167,14 @@ def __str__(self): d = { "id": self.id, + "type": self.type, + "status": self.status, "reception_date": self.reception_date, + "complete_date": self.complete_date, "collection": self.collection.name, "external_id": self.external_id, "origin_url": self.origin_url, "client": self.client.username, - "status": self.status, } if self.status in (DEPOSIT_STATUS_REJECTED): diff --git a/swh/deposit/tests/api/test_deposit_private_list.py b/swh/deposit/tests/api/test_deposit_private_list.py --- a/swh/deposit/tests/api/test_deposit_private_list.py +++ b/swh/deposit/tests/api/test_deposit_private_list.py @@ -8,7 +8,7 @@ from swh.deposit.api.converters import convert_status_detail from swh.deposit.config import DEPOSIT_STATUS_LOAD_SUCCESS, PRIVATE_LIST_DEPOSITS -from swh.deposit.models import DepositClient +from swh.deposit.models import DEPOSIT_CODE, DEPOSIT_METADATA_ONLY, DepositClient from swh.deposit.tests.conftest import internal_create_deposit STATUS_DETAIL = { @@ -37,6 +37,8 @@ partial_deposit_with_metadata.save() deposit1 = partial_deposit_with_metadata deposit2 = partial_deposit_only_metadata + deposit2.type = DEPOSIT_METADATA_ONLY + deposit2.save() deposit3 = partial_deposit main_url = reverse(PRIVATE_LIST_DEPOSITS) @@ -56,6 +58,7 @@ expected_status_detail = convert_status_detail(STATUS_DETAIL) assert deposit_d["status_detail"] == expected_status_detail assert deposit_d["raw_metadata"] is not None + assert deposit_d["type"] == DEPOSIT_CODE assert ( deposit_d["raw_metadata"] == deposit1.depositrequest_set.filter(type="metadata")[0].raw_metadata @@ -77,6 +80,7 @@ assert deposit2_d["id"] == deposit2.id assert deposit2_d["status"] == deposit2.status assert deposit2_d["raw_metadata"] is not None + assert deposit2_d["type"] == DEPOSIT_METADATA_ONLY assert ( deposit2_d["raw_metadata"] == deposit2.depositrequest_set.filter(type="metadata")[0].raw_metadata @@ -97,6 +101,7 @@ deposit3_d = data_p3["results"][0] assert deposit3_d["id"] == deposit3.id assert deposit3_d["status"] == deposit3.status + assert deposit3_d["type"] == DEPOSIT_CODE assert not deposit3.depositrequest_set.filter( type="metadata" ), "No metadata type request for that deposit" diff --git a/swh/deposit/tests/conftest.py b/swh/deposit/tests/conftest.py --- a/swh/deposit/tests/conftest.py +++ b/swh/deposit/tests/conftest.py @@ -200,6 +200,7 @@ ("PORT", postgresql_proc.port), # noqa } ) + with django_db_blocker.unblock(): setup_databases( verbosity=request.config.option.verbose, interactive=False, keepdb=False diff --git a/swh/deposit/tests_migration/test_migrations.py b/swh/deposit/tests_migration/test_migrations.py --- a/swh/deposit/tests_migration/test_migrations.py +++ b/swh/deposit/tests_migration/test_migrations.py @@ -1,4 +1,4 @@ -# Copyright (C) 2021 The Software Heritage developers +# Copyright (C) 2021-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -8,6 +8,16 @@ # assert something is not there, trigger the next migration and check the last state is # as expected. That's what's the following scenarios do. +from datetime import datetime, timezone + +from swh.deposit.config import DEPOSIT_STATUS_LOAD_SUCCESS +from swh.model.hashutil import hash_to_bytes +from swh.model.swhids import CoreSWHID, ObjectType, QualifiedSWHID + + +def now() -> datetime: + return datetime.now(tz=timezone.utc) + def test_migrations_20_rename_swhid_column_in_deposit_model(migrator): """Ensures the 20 migration renames appropriately the swh_id* Deposit columns""" @@ -45,3 +55,77 @@ new_deposit = new_state.apps.get_model("deposit", "Deposit") assert hasattr(new_deposit, "origin_url") is True + + +def test_migrations_22_add_deposit_type_column_model_and_data(migrator): + """22 migration should add the type column and migrate old values with new type""" + from swh.deposit.models import ( + DEPOSIT_CODE, + DEPOSIT_METADATA_ONLY, + Deposit, + DepositClient, + DepositCollection, + ) + + old_state = migrator.apply_initial_migration( + ("deposit", "0021_deposit_origin_url_20201124_1438") + ) + old_deposit = old_state.apps.get_model("deposit", "Deposit") + + collection = DepositCollection.objects.create(name="hello") + + client = DepositClient.objects.create(username="name", collections=[collection.id]) + + # Create old deposits to make sure they are migrated properly + deposit1 = old_deposit.objects.create( + status="partial", client_id=client.id, collection_id=collection.id + ) + deposit2 = old_deposit.objects.create( + status="verified", client_id=client.id, collection_id=collection.id + ) + + origin = "https://hal.archives-ouvertes.fr/hal-01727745" + directory_id = "42a13fc721c8716ff695d0d62fc851d641f3a12b" + release_id = hash_to_bytes("548b3c0a2bb43e1fca191e24b5803ff6b3bc7c10") + snapshot_id = hash_to_bytes("e5e82d064a9c3df7464223042e0c55d72ccff7f0") + + date_now = now() + # metadata deposit + deposit3 = old_deposit.objects.create( + status=DEPOSIT_STATUS_LOAD_SUCCESS, + client_id=client.id, + collection_id=collection.id, + swhid=CoreSWHID( + object_type=ObjectType.DIRECTORY, object_id=hash_to_bytes(directory_id), + ), + swhid_context=QualifiedSWHID( + object_type=ObjectType.DIRECTORY, + object_id=hash_to_bytes(directory_id), + origin=origin, + visit=CoreSWHID(object_type=ObjectType.SNAPSHOT, object_id=snapshot_id), + anchor=CoreSWHID(object_type=ObjectType.RELEASE, object_id=release_id), + path=b"/", + ), + ) + # work around (complete date is installed on creation) + deposit3.complete_date = date_now + deposit3.reception_date = date_now + deposit3.save() + + assert hasattr(old_deposit, "type") is False + + # Migrate to the latest schema + new_state = migrator.apply_tested_migration(("deposit", "0022_auto_20220223_1542")) + new_deposit = new_state.apps.get_model("deposit", "Deposit") + + assert hasattr(new_deposit, "type") is True + + assert Deposit().type == DEPOSIT_CODE + + all_deposits = Deposit.objects.all() + assert len(all_deposits) == 3 + for deposit in all_deposits: + if deposit.id in (deposit1.id, deposit2.id): + assert deposit.type == DEPOSIT_CODE + else: + assert deposit.id == deposit3.id and deposit.type == DEPOSIT_METADATA_ONLY