# -*- coding: utf-8 -*-
# Copyright (C) 2020  The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information

from __future__ import unicode_literals

import logging
import os
from typing import Any, Dict, Optional, Tuple

from django.db import migrations

from swh.core import config
from swh.deposit.config import DEPOSIT_STATUS_LOAD_SUCCESS
from swh.model.hashutil import hash_to_bytes, hash_to_hex
from swh.model.identifiers import DIRECTORY, REVISION, SNAPSHOT, parse_swhid, swhid
from swh.storage import get_storage as get_storage_client
from swh.storage.algos.snapshot import snapshot_id_get_from_revision

SWH_PROVIDER_URL = "https://www.softwareheritage.org"


logger = logging.getLogger(__name__)


swh_storage = None


def get_storage() -> Optional[Any]:
    """Instantiate a storage client

    """
    settings = os.environ.get("DJANGO_SETTINGS_MODULE")
    if settings != "swh.deposit.settings.production":  # Bypass for now
        return None

    global swh_storage

    if not swh_storage:
        config_file = os.environ.get("SWH_CONFIG_FILENAME")
        if not config_file:
            raise ValueError(
                "Production: SWH_CONFIG_FILENAME must be set to the"
                " configuration file needed!"
            )

        if not os.path.exists(config_file):
            raise ValueError(
                "Production: configuration file %s does not exist!" % (config_file,)
            )

        conf = config.load_named_config(config_file)
        if not conf:
            raise ValueError(
                "Production: configuration %s does not exist." % (config_file,)
            )

        storage_config = conf.get("storage")
        if not storage_config:
            raise ValueError(
                "Production: invalid configuration; missing 'storage' config entry."
            )

        swh_storage = get_storage_client(**storage_config)

    return swh_storage


def migrate_deposit_swhid_context_not_null(apps, schema_editor):
    """Migrate deposit SWHIDs to the new format.

    Migrate deposit SWHIDs to the new format. Only deposit with status done and
    swh_id_context not null are concerned.

    """
    storage = get_storage()
    if not storage:
        logging.warning("Nothing to do")
        return None

    Deposit = apps.get_model("deposit", "Deposit")
    for deposit in Deposit.objects.filter(
        status=DEPOSIT_STATUS_LOAD_SUCCESS, swh_id_context__isnull=False
    ):
        obj_dir = parse_swhid(deposit.swh_id_context)
        assert obj_dir.object_type == DIRECTORY

        obj_rev = parse_swhid(deposit.swh_anchor_id)
        assert obj_rev.object_type == REVISION

        if set(obj_dir.metadata.keys()) != {"origin"}:
            # Assuming the migration is already done for that deposit
            logger.warning(
                "Deposit id %s: Migration already done, skipping", deposit.id
            )
            continue

        # Starting migration

        dir_id = obj_dir.object_id
        origin = obj_dir.metadata["origin"]

        check_origin = storage.origin_get([origin])[0]
        if not check_origin:
            logger.warning("Deposit id %s: Origin %s not found!", deposit.id, origin)
            continue

        rev_id = obj_rev.object_id
        # Find the snapshot targeting the revision
        snp_id = snapshot_id_get_from_revision(storage, origin, hash_to_bytes(rev_id))
        if snp_id is None:
            logger.warning(
                "Deposit id %s: Snapshot targeting revision %s not found!",
                deposit.id,
                rev_id,
            )
            continue

        # Reference the old values to do some checks later
        old_swh_id = deposit.swh_id
        old_swh_id_context = deposit.swh_id_context
        old_swh_anchor_id = deposit.swh_anchor_id
        old_swh_anchor_id_context = deposit.swh_anchor_id_context

        # Update
        deposit.swh_id_context = swhid(
            DIRECTORY,
            dir_id,
            metadata={
                "origin": origin,
                "visit": swhid(SNAPSHOT, snp_id.hex()),
                "anchor": swhid(REVISION, rev_id),
                "path": "/",
            },
        )

        # Ensure only deposit.swh_id_context changed
        logging.debug("deposit.id: {deposit.id}")
        logging.debug("deposit.swh_id: %s -> %s", old_swh_id, deposit.swh_id)
        assert old_swh_id == deposit.swh_id
        logging.debug(
            "deposit.swh_id_context: %s -> %s",
            old_swh_id_context,
            deposit.swh_id_context,
        )
        assert old_swh_id_context != deposit.swh_id_context
        logging.debug(
            "deposit.swh_anchor_id: %s -> %s", old_swh_anchor_id, deposit.swh_anchor_id
        )
        assert old_swh_anchor_id == deposit.swh_anchor_id
        logging.debug(
            "deposit.swh_anchor_id_context: %s -> %s",
            old_swh_anchor_id_context,
            deposit.swh_anchor_id_context,
        )
        assert old_swh_anchor_id_context == deposit.swh_anchor_id_context

        # Commit
        deposit.save()


def resolve_origin(deposit_id: int, provider_url: str, external_id: str) -> str:
    """Resolve the origin from provider-url and external-id

    For some edge case, only the external_id is used as there is some old inconsistency
    from testing which exists.

    """
    map_edge_case_origin: Dict[Tuple[int, str], str] = {
        (
            76,
            "hal-01588782",
        ): "https://inria.halpreprod.archives-ouvertes.fr/hal-01588782",
        (
            87,
            "hal-01588927",
        ): "https://inria.halpreprod.archives-ouvertes.fr/hal-01588927",
        (89, "hal-01588935"): "https://hal-preprod.archives-ouvertes.fr/hal-01588935",
        (
            88,
            "hal-01588928",
        ): "https://inria.halpreprod.archives-ouvertes.fr/hal-01588928",
        (
            90,
            "hal-01588942",
        ): "https://inria.halpreprod.archives-ouvertes.fr/hal-01588942",
        (143, "hal-01592430"): "https://hal-preprod.archives-ouvertes.fr/hal-01592430",
        (
            75,
            "hal-01588781",
        ): "https://inria.halpreprod.archives-ouvertes.fr/hal-01588781",
    }
    origin = map_edge_case_origin.get((deposit_id, external_id))
    if origin:
        return origin

    # Some simpler origin edge cases (mostly around the initial deposits)
    map_origin = {
        (
            SWH_PROVIDER_URL,
            "je-suis-gpl",
        ): "https://forge.softwareheritage.org/source/jesuisgpl/",
        (
            SWH_PROVIDER_URL,
            "external-id",
        ): "https://hal.archives-ouvertes.fr/external-id",
    }
    key = (provider_url, external_id)
    return map_origin.get(key, f"{provider_url.rstrip('/')}/{external_id}")


def migrate_deposit_swhid_context_null(apps, schema_editor):
    """Migrate deposit SWHIDs to the new format.

    Migrate deposit whose swh_id_context is not set (initial deposits not migrated at
    the time). Only deposit with status done and swh_id_context null are concerned.

    Note: Those deposits have their swh_id being the SWHPIDs of the revision! So we can
    align them as well.

    """
    storage = get_storage()
    if not storage:
        logging.warning("Nothing to do")
        return None
    Deposit = apps.get_model("deposit", "Deposit")
    for deposit in Deposit.objects.filter(
        status=DEPOSIT_STATUS_LOAD_SUCCESS, swh_id_context__isnull=True
    ):
        obj_rev = parse_swhid(deposit.swh_id)
        if obj_rev.object_type == DIRECTORY:
            # Assuming the migration is already done for that deposit
            logger.warning(
                "Deposit id %s: Migration already done, skipping", deposit.id
            )
            continue

        # Ensuring Migration not done
        assert obj_rev.object_type == REVISION

        assert deposit.swh_id is not None
        assert deposit.swh_id_context is None
        assert deposit.swh_anchor_id is None
        assert deposit.swh_anchor_id_context is None

        rev_id = obj_rev.object_id
        rev_id_bytes = hash_to_bytes(rev_id)
        revision = storage.revision_get([rev_id_bytes])[0]
        if not revision:
            logger.warning("Deposit id %s: Revision %s not found!", deposit.id, rev_id)
            continue

        provider_url = deposit.client.provider_url
        external_id = deposit.external_id

        origin = resolve_origin(deposit.id, provider_url, external_id)
        check_origin = storage.origin_get([origin])[0]
        if not check_origin:
            logger.warning("Deposit id %s: Origin %s not found!", deposit.id, origin)
            continue

        dir_id = hash_to_hex(revision["directory"])

        # Reference the old values to do some checks later
        old_swh_id = deposit.swh_id
        old_swh_id_context = deposit.swh_id_context
        old_swh_anchor_id = deposit.swh_anchor_id
        old_swh_anchor_id_context = deposit.swh_anchor_id_context

        # retrieve the snapshot from the archive
        snp_id = snapshot_id_get_from_revision(storage, origin, rev_id_bytes)
        if snp_id is None:
            logger.warning(
                "Deposit id %s: Snapshot targeting revision %s not found!",
                deposit.id,
                rev_id,
            )
            continue

        # New SWHIDs ids
        deposit.swh_id = swhid(DIRECTORY, dir_id)
        deposit.swh_id_context = swhid(
            DIRECTORY,
            dir_id,
            metadata={
                "origin": origin,
                "visit": swhid(SNAPSHOT, snp_id.hex()),
                "anchor": swhid(REVISION, rev_id),
                "path": "/",
            },
        )
        # Realign the remaining deposit SWHIDs fields
        deposit.swh_anchor_id = swhid(REVISION, rev_id)
        deposit.swh_anchor_id_context = swhid(
            REVISION, rev_id, metadata={"origin": origin,}
        )

        # Ensure only deposit.swh_id_context changed
        logging.debug("deposit.id: {deposit.id}")
        logging.debug("deposit.swh_id: %s -> %s", old_swh_id, deposit.swh_id)

        assert old_swh_id != deposit.swh_id
        logging.debug(
            "deposit.swh_id_context: %s -> %s",
            old_swh_id_context,
            deposit.swh_id_context,
        )
        assert old_swh_id_context != deposit.swh_id_context
        assert deposit.swh_id_context is not None
        logging.debug(
            "deposit.swh_anchor_id: %s -> %s", old_swh_anchor_id, deposit.swh_anchor_id
        )
        assert deposit.swh_anchor_id == old_swh_id
        assert deposit.swh_anchor_id is not None
        logging.debug(
            "deposit.swh_anchor_id_context: %s -> %s",
            old_swh_anchor_id_context,
            deposit.swh_anchor_id_context,
        )
        assert deposit.swh_anchor_id_context is not None

        deposit.save()


class Migration(migrations.Migration):
    dependencies = [
        ("deposit", "0017_auto_20190925_0906"),
    ]

    operations = [
        # Migrate and make the operations possibly reversible
        # https://docs.djangoproject.com/en/3.0/ref/migration-operations/#django.db.migrations.operations.RunPython.noop  # noqa
        migrations.RunPython(
            migrate_deposit_swhid_context_not_null,
            reverse_code=migrations.RunPython.noop,
        ),
        migrations.RunPython(
            migrate_deposit_swhid_context_null, reverse_code=migrations.RunPython.noop
        ),
    ]
