Changeset View
Changeset View
Standalone View
Standalone View
swh/deposit/migrations/0018_migrate_swhids.py
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||
from __future__ import unicode_literals | from __future__ import unicode_literals | ||||
import os | import os | ||||
import logging | import logging | ||||
from django.db import migrations | from django.db import migrations | ||||
from typing import Any, Dict, Optional, Tuple | from typing import Any, Dict, Optional, Tuple | ||||
from swh.core import config | from swh.core import config | ||||
from swh.deposit.config import DEPOSIT_STATUS_LOAD_SUCCESS | from swh.deposit.config import DEPOSIT_STATUS_LOAD_SUCCESS | ||||
from swh.model.hashutil import hash_to_bytes, hash_to_hex | from swh.model.hashutil import hash_to_bytes, hash_to_hex | ||||
from swh.model.identifiers import ( | from swh.model.identifiers import ( | ||||
parse_persistent_identifier, | parse_swhid, | ||||
persistent_identifier, | swhid, | ||||
DIRECTORY, | DIRECTORY, | ||||
REVISION, | REVISION, | ||||
SNAPSHOT, | SNAPSHOT, | ||||
) | ) | ||||
from swh.storage import get_storage as get_storage_client | from swh.storage import get_storage as get_storage_client | ||||
SWH_PROVIDER_URL = "https://www.softwareheritage.org" | SWH_PROVIDER_URL = "https://www.softwareheritage.org" | ||||
▲ Show 20 Lines • Show All 76 Lines • ▼ Show 20 Lines | def migrate_deposit_swhid_context_not_null(apps, schema_editor): | ||||
if not storage: | if not storage: | ||||
logging.warning("Nothing to do") | logging.warning("Nothing to do") | ||||
return None | return None | ||||
Deposit = apps.get_model("deposit", "Deposit") | Deposit = apps.get_model("deposit", "Deposit") | ||||
for deposit in Deposit.objects.filter( | for deposit in Deposit.objects.filter( | ||||
status=DEPOSIT_STATUS_LOAD_SUCCESS, swh_id_context__isnull=False | status=DEPOSIT_STATUS_LOAD_SUCCESS, swh_id_context__isnull=False | ||||
): | ): | ||||
obj_dir = parse_persistent_identifier(deposit.swh_id_context) | obj_dir = parse_swhid(deposit.swh_id_context) | ||||
assert obj_dir.object_type == DIRECTORY | assert obj_dir.object_type == DIRECTORY | ||||
obj_rev = parse_persistent_identifier(deposit.swh_anchor_id) | obj_rev = parse_swhid(deposit.swh_anchor_id) | ||||
assert obj_rev.object_type == REVISION | assert obj_rev.object_type == REVISION | ||||
if set(obj_dir.metadata.keys()) != {"origin"}: | if set(obj_dir.metadata.keys()) != {"origin"}: | ||||
# Assuming the migration is already done for that deposit | # Assuming the migration is already done for that deposit | ||||
logger.warning( | logger.warning( | ||||
"Deposit id %s: Migration already done, skipping", deposit.id | "Deposit id %s: Migration already done, skipping", deposit.id | ||||
) | ) | ||||
continue | continue | ||||
Show All 21 Lines | ): | ||||
# Reference the old values to do some checks later | # Reference the old values to do some checks later | ||||
old_swh_id = deposit.swh_id | old_swh_id = deposit.swh_id | ||||
old_swh_id_context = deposit.swh_id_context | old_swh_id_context = deposit.swh_id_context | ||||
old_swh_anchor_id = deposit.swh_anchor_id | old_swh_anchor_id = deposit.swh_anchor_id | ||||
old_swh_anchor_id_context = deposit.swh_anchor_id_context | old_swh_anchor_id_context = deposit.swh_anchor_id_context | ||||
# Update | # Update | ||||
deposit.swh_id_context = persistent_identifier( | deposit.swh_id_context = swhid( | ||||
DIRECTORY, | DIRECTORY, | ||||
dir_id, | dir_id, | ||||
metadata={ | metadata={ | ||||
"origin": origin, | "origin": origin, | ||||
"visit": persistent_identifier(SNAPSHOT, snp_id), | "visit": swhid(SNAPSHOT, snp_id), | ||||
"anchor": persistent_identifier(REVISION, rev_id), | "anchor": swhid(REVISION, rev_id), | ||||
"path": "/", | "path": "/", | ||||
}, | }, | ||||
) | ) | ||||
# Ensure only deposit.swh_id_context changed | # Ensure only deposit.swh_id_context changed | ||||
logging.debug("deposit.id: {deposit.id}") | logging.debug("deposit.id: {deposit.id}") | ||||
logging.debug("deposit.swh_id: %s -> %s", old_swh_id, deposit.swh_id) | logging.debug("deposit.swh_id: %s -> %s", old_swh_id, deposit.swh_id) | ||||
assert old_swh_id == deposit.swh_id | assert old_swh_id == deposit.swh_id | ||||
▲ Show 20 Lines • Show All 81 Lines • ▼ Show 20 Lines | def migrate_deposit_swhid_context_null(apps, schema_editor): | ||||
storage = get_storage() | storage = get_storage() | ||||
if not storage: | if not storage: | ||||
logging.warning("Nothing to do") | logging.warning("Nothing to do") | ||||
return None | return None | ||||
Deposit = apps.get_model("deposit", "Deposit") | Deposit = apps.get_model("deposit", "Deposit") | ||||
for deposit in Deposit.objects.filter( | for deposit in Deposit.objects.filter( | ||||
status=DEPOSIT_STATUS_LOAD_SUCCESS, swh_id_context__isnull=True | status=DEPOSIT_STATUS_LOAD_SUCCESS, swh_id_context__isnull=True | ||||
): | ): | ||||
obj_rev = parse_persistent_identifier(deposit.swh_id) | obj_rev = parse_swhid(deposit.swh_id) | ||||
if obj_rev.object_type == DIRECTORY: | if obj_rev.object_type == DIRECTORY: | ||||
# Assuming the migration is already done for that deposit | # Assuming the migration is already done for that deposit | ||||
logger.warning( | logger.warning( | ||||
"Deposit id %s: Migration already done, skipping", deposit.id | "Deposit id %s: Migration already done, skipping", deposit.id | ||||
) | ) | ||||
continue | continue | ||||
# Ensuring Migration not done | # Ensuring Migration not done | ||||
Show All 34 Lines | ): | ||||
logger.warning( | logger.warning( | ||||
"Deposit id %s: Snapshot targeting revision %s not found!", | "Deposit id %s: Snapshot targeting revision %s not found!", | ||||
deposit.id, | deposit.id, | ||||
rev_id, | rev_id, | ||||
) | ) | ||||
continue | continue | ||||
# New SWHIDs ids | # New SWHIDs ids | ||||
deposit.swh_id = persistent_identifier(DIRECTORY, dir_id) | deposit.swh_id = swhid(DIRECTORY, dir_id) | ||||
deposit.swh_id_context = persistent_identifier( | deposit.swh_id_context = swhid( | ||||
DIRECTORY, | DIRECTORY, | ||||
dir_id, | dir_id, | ||||
metadata={ | metadata={ | ||||
"origin": origin, | "origin": origin, | ||||
"visit": persistent_identifier(SNAPSHOT, snp_id), | "visit": swhid(SNAPSHOT, snp_id), | ||||
"anchor": persistent_identifier(REVISION, rev_id), | "anchor": swhid(REVISION, rev_id), | ||||
"path": "/", | "path": "/", | ||||
}, | }, | ||||
) | ) | ||||
# Realign the remaining deposit SWHIDs fields | # Realign the remaining deposit SWHIDs fields | ||||
deposit.swh_anchor_id = persistent_identifier(REVISION, rev_id) | deposit.swh_anchor_id = swhid(REVISION, rev_id) | ||||
deposit.swh_anchor_id_context = persistent_identifier( | deposit.swh_anchor_id_context = swhid( | ||||
REVISION, rev_id, metadata={"origin": origin,} | REVISION, rev_id, metadata={"origin": origin,} | ||||
) | ) | ||||
# Ensure only deposit.swh_id_context changed | # Ensure only deposit.swh_id_context changed | ||||
logging.debug("deposit.id: {deposit.id}") | logging.debug("deposit.id: {deposit.id}") | ||||
logging.debug("deposit.swh_id: %s -> %s", old_swh_id, deposit.swh_id) | logging.debug("deposit.swh_id: %s -> %s", old_swh_id, deposit.swh_id) | ||||
assert old_swh_id != deposit.swh_id | assert old_swh_id != deposit.swh_id | ||||
ardumont: That's a migration script so, pffffiou nothing to do ;)
Wondering how to exclude those from… | |||||
Not Done Inline ActionsIMO migration scripts should be tested too vlorentz: IMO migration scripts should be tested too | |||||
Done Inline Actionsok then, but from now on ;) Although, wait this one got tested! ardumont: ok then, but from now on ;)
It's been deployed already.
Although, wait this one got tested! | |||||
Done Inline Actions
Nope, it did not in the end ¯\_(ツ)_/¯... But yeah, ok, will do the necessary work in the future ;) [1] D3153 ardumont: > Although, wait this one got tested!
Nope, it did not in the end ¯\_(ツ)_/¯...
It got battle… | |||||
logging.debug( | logging.debug( | ||||
"deposit.swh_id_context: %s -> %s", | "deposit.swh_id_context: %s -> %s", | ||||
old_swh_id_context, | old_swh_id_context, | ||||
deposit.swh_id_context, | deposit.swh_id_context, | ||||
) | ) | ||||
assert old_swh_id_context != deposit.swh_id_context | assert old_swh_id_context != deposit.swh_id_context | ||||
assert deposit.swh_id_context is not None | assert deposit.swh_id_context is not None | ||||
logging.debug( | logging.debug( | ||||
Show All 30 Lines |
That's a migration script so, pffffiou nothing to do ;)
Wondering how to exclude those from the coverage now though.