diff --git a/swh/deposit/migrations/0018_deposit_migrate_swhids.py b/swh/deposit/migrations/0018_deposit_migrate_swhids.py new file mode 100644 --- /dev/null +++ b/swh/deposit/migrations/0018_deposit_migrate_swhids.py @@ -0,0 +1,115 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import logging + +from django.db import migrations, models +from typing import Optional + +from swh.deposit.config import DEPOSIT_STATUS_LOAD_SUCCESS +from swh.model.hashutil import hash_to_hex +from swh.model.identifiers import ( + parse_persistent_identifier, + persistent_identifier, + DIRECTORY, + REVISION, + SNAPSHOT, +) +from swh.storage import get_storage as get_storage_client + + +logger = logging.getLogger(__name__) + + +swh_storage = None + + +def get_storage(): + """Instantiate a storage client + + """ + global swh_storage + if not swh_storage: + swh_storage = get_storage_client( + cls="remote", url="http://uffizi.internal.sofwareheritage.org:5002" + ) + return swh_storage + + +def get_snapshot(origin: str, revision_id: str) -> Optional[str]: + """Retrieve the snapshot targetting the revision_id for the given origin. + + """ + storage = get_storage() + all_visits = storage.origin_visit_get(origin) + for visit in all_visits: + if not visit["snapshot"]: + continue + detail_snapshot = storage.snapshot_get(visit["snapshot"]) + if not detail_snapshot: + continue + for branch in detail_snapshot: + if branch["target_type"] == "revision": + revision = branch["target"] + if revision == hash_to_hex(revision): + # Found the snapshot + return hash_to_hex(visit["snapshot"]) + + +def migrate_deposit_swhid_context_not_null(apps, schema_editor): + """Migrate deposit SWHIDs to the new format. + + Migrate deposit SWHIDs to the new format. Only deposit with status done and + swh_id_context not null are concerned. + + """ + for deposit in models.Deposit.objects.filter( + status=DEPOSIT_STATUS_LOAD_SUCCESS, swh_id_context__isnull=False + ): + obj_dir = parse_persistent_identifier(deposit.swh_id_context) + obj_rev = parse_persistent_identifier(deposit.swh_anchor_id) + + dir_id = obj_dir.object_id + origin = obj_dir.metadata["origin"] + rev_id = obj_rev.object_id + + snp_id = get_snapshot(origin, rev_id) # retrieve the snapshot from the archive + if not snp_id: + logger.warning("Snapshot not found for deposit id %s!", deposit.id) + continue + deposit.swh_id_context = persistent_identifier( + DIRECTORY, + dir_id, + metadata={ + "origin": origin, + "visit": persistent_identifier(SNAPSHOT, snp_id), + "anchor": persistent_identifier(REVISION, rev_id), + "path": "/", + }, + ) + + deposit.save() + + +def migrate_deposit_swhid_context_null(apps, schema_editor): + """Migrate deposit SWHIDs to the new format. + + Migrate deposit whose swh_id_context is not set (initial deposits not migrated at + the time). Only deposit with status done and swh_id_context null are concerned. + + Note: Those deposits have their swh_id being the SWHPIDs of the revision! So we can + align them as well. + + """ + pass + + +class Migration(migrations.Migration): + dependencies = [ + ("deposit", "0017_auto_20190925_0906.py"), + ] + + operations = [ + migrations.RunPython(migrate_deposit_swhid_context_not_null), + migrations.RunPython(migrate_deposit_swhid_context_null), + ]