# Copyright (C) 2020  The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information

# flake8: noqa
# because of long lines

import copy
import datetime
import json
from unittest.mock import MagicMock, Mock, call

from swh.model.model import (
    MetadataAuthority,
    MetadataAuthorityType,
    MetadataFetcher,
    Origin,
    RawExtrinsicMetadata,
)
from swh.model.swhids import CoreSWHID, ExtendedObjectType, ExtendedSWHID
from swh.storage.migrate_extrinsic_metadata import (
    DEPOSIT_COLS,
    cran_package_from_url,
    handle_row,
)

FETCHER = MetadataFetcher(
    name="migrate-extrinsic-metadata-from-revisions", version="0.0.1",
)
SWH_AUTHORITY = MetadataAuthority(
    type=MetadataAuthorityType.REGISTRY,
    url="https://softwareheritage.org/",
    metadata={},
)
SWH_DEPOSIT_AUTHORITY = MetadataAuthority(
    type=MetadataAuthorityType.DEPOSIT_CLIENT,
    url="https://www.softwareheritage.org",
    metadata={},
)
HAL_AUTHORITY = MetadataAuthority(
    type=MetadataAuthorityType.DEPOSIT_CLIENT,
    url="https://hal.archives-ouvertes.fr/",
    metadata={},
)
INTEL_AUTHORITY = MetadataAuthority(
    type=MetadataAuthorityType.DEPOSIT_CLIENT,
    url="https://software.intel.com",
    metadata={},
)

DIRECTORY_ID = b"a" * 20
DIRECTORY_SWHID = ExtendedSWHID(
    object_type=ExtendedObjectType.DIRECTORY, object_id=DIRECTORY_ID
)


def get_mock_deposit_cur(row_dicts):
    rows = [tuple(d[key] for key in DEPOSIT_COLS) for d in row_dicts]
    deposit_cur = MagicMock()
    deposit_cur.__iter__.side_effect = [iter(rows)]
    return deposit_cur


def test_deposit_1():
    """Has a provider and xmlns, and the metadata is in the revision twice
    (at the root of the metadata dict, and in
    metadata->extrinsic->raw->origin_metadata)"""
    extrinsic_metadata = {
        "title": "Je suis GPL",
        "@xmlns": "http://www.w3.org/2005/Atom",
        "client": "swh",
        "codemeta:url": "https://forge.softwareheritage.org/source/jesuisgpl/",
        "@xmlns:codemeta": "https://doi.org/10.5063/SCHEMA/CODEMETA-2.0",
        "codemeta:author": {
            "codemeta:name": "Stefano Zacchiroli",
            "codemeta:jobTitle": "Maintainer",
        },
        "codemeta:license": {
            "codemeta:url": "https://spdx.org/licenses/GPL-3.0-or-later.html",
            "codemeta:name": "GNU General Public License v3.0 or later",
        },
        # ...
    }
    original_artifacts = [
        {
            "length": 80880,
            "filename": "archive.zip",
            "checksums": {
                "sha1": "bad32a47a359e0e16ebdca2ad2dc6a771dac8f71",
                "sha256": "182b7ee3b7b5b550e83d3bcfed029bb2f625ee760ebfe9557d5fd072bd4e22e4",
            },
        }
    ]

    row = {
        "id": b"\x02#\x10\xdf\x16\xfd\x9eMO\x81\xfe6\xa1B\xe8-\xb9w\xc0\x1d",
        "directory": DIRECTORY_ID,
        "date": datetime.datetime(2018, 1, 5, 0, 0, tzinfo=datetime.timezone.utc),
        "committer_date": datetime.datetime(
            2018, 1, 5, 0, 0, tzinfo=datetime.timezone.utc
        ),
        "type": "tar",
        "message": b"swh: Deposit 467 in collection swh",
        "metadata": {
            "client": "swh",
            "extrinsic": {
                "raw": {
                    "origin": {
                        "url": "https://www.softwareheritage.org/check-deposit-2020-03-11T11:07:18.424476",
                        "type": "deposit",
                    },
                    "branch_name": "master",
                    "origin_metadata": {
                        "tool": {
                            "name": "swh-deposit",
                            "version": "0.0.1",
                            "configuration": {"sword_version": 2},
                        },
                        "metadata": extrinsic_metadata,
                    },
                },
                "when": "2020-03-11T11:11:36.336283+00:00",
                "provider": "https://deposit.softwareheritage.org/1/private/467/meta/",
            },
            "original_artifact": original_artifacts,
            **extrinsic_metadata,
        },
    }

    origin_url = (
        "https://www.softwareheritage.org/check-deposit-2020-03-11T11:07:18.424476"
    )

    swhid = (
        f"swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea"
        f";origin={origin_url}"
        f";visit=swh:1:snp:14433c19dbb03ad57c86b58b53a800d6a0e32dd3"
        f";anchor=swh:1:rev:022310df16fd9e4d4f81fe36a142e82db977c01d"
        f";path=/"
    )

    deposit_rows = [
        {
            "deposit.id": 467,
            "deposit.external_id": "check-deposit-2020-03-11T11:07:18.424476",
            "deposit.swhid_context": swhid,
            "deposit.status": "success",
            "deposit_request.metadata": extrinsic_metadata,
            "deposit_request.date": datetime.datetime(
                2020, 3, 11, 11, 7, 18, 688410, tzinfo=datetime.timezone.utc
            ),
            "deposit_client.provider_url": "https://www.softwareheritage.org",
            "deposit_collection.name": "swh",
            "auth_user.username": "swh",
        },
        {
            "deposit.id": 467,
            "deposit.external_id": "check-deposit-2020-03-11T11:07:18.424476",
            "deposit.swhid_context": swhid,
            "deposit.status": "success",
            "deposit_request.metadata": None,
            "deposit_request.date": datetime.datetime(
                2020, 3, 11, 11, 7, 18, 669428, tzinfo=datetime.timezone.utc
            ),
            "deposit_client.provider_url": "https://www.softwareheritage.org",
            "deposit_collection.name": "swh",
            "auth_user.username": "swh",
        },
    ]

    storage = Mock()

    def origin_get(urls):
        assert urls == [origin_url]
        return [Origin(url=origin_url)]

    storage.origin_get.side_effect = origin_get
    deposit_cur = get_mock_deposit_cur(deposit_rows)
    handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False)

    deposit_cur.execute.assert_called_once()
    deposit_cur.__iter__.assert_called_once()

    assert storage.method_calls == [
        call.origin_get([origin_url]),
        call.raw_extrinsic_metadata_add(
            [
                RawExtrinsicMetadata(
                    target=DIRECTORY_SWHID,
                    discovery_date=datetime.datetime(
                        2020, 3, 11, 11, 7, 18, 688410, tzinfo=datetime.timezone.utc
                    ),
                    authority=SWH_DEPOSIT_AUTHORITY,
                    fetcher=FETCHER,
                    format="sword-v2-atom-codemeta-v2-in-json",
                    metadata=json.dumps(extrinsic_metadata).encode(),
                    origin=origin_url,
                    revision=CoreSWHID.from_string(
                        "swh:1:rev:022310df16fd9e4d4f81fe36a142e82db977c01d"
                    ),
                ),
            ]
        ),
        call.raw_extrinsic_metadata_add(
            [
                RawExtrinsicMetadata(
                    target=DIRECTORY_SWHID,
                    discovery_date=datetime.datetime(
                        2020, 3, 11, 11, 11, 36, 336283, tzinfo=datetime.timezone.utc
                    ),
                    authority=SWH_AUTHORITY,
                    fetcher=FETCHER,
                    format="original-artifacts-json",
                    metadata=json.dumps(original_artifacts).encode(),
                    origin=origin_url,
                    revision=CoreSWHID.from_string(
                        "swh:1:rev:022310df16fd9e4d4f81fe36a142e82db977c01d"
                    ),
                ),
            ]
        ),
    ]


def test_deposit_2_without_xmlns():
    """Has a provider, no xmlns, and the metadata is only in
    metadata->extrinsic->raw->origin_metadata)"""
    extrinsic_metadata = {
        "{http://www.w3.org/2005/Atom}id": "hal-01243573",
        "{http://www.w3.org/2005/Atom}author": {
            "{http://www.w3.org/2005/Atom}name": "HAL",
            "{http://www.w3.org/2005/Atom}email": "hal@ccsd.cnrs.fr",
        },
        "{http://www.w3.org/2005/Atom}client": "hal",
        "{http://www.w3.org/2005/Atom}external_identifier": "hal-01243573",
        "{https://doi.org/10.5063/SCHEMA/CODEMETA-2.0}url": "https://hal-test.archives-ouvertes.fr/hal-01243573",
        "{https://doi.org/10.5063/SCHEMA/CODEMETA-2.0}name": "The assignment problem",
        "{https://doi.org/10.5063/SCHEMA/CODEMETA-2.0}author": {
            "{https://doi.org/10.5063/SCHEMA/CODEMETA-2.0}name": "Morane Gruenpeter"
        },
        "{https://doi.org/10.5063/SCHEMA/CODEMETA-2.0}version": 1,
        "{https://doi.org/10.5063/SCHEMA/CODEMETA-2.0}identifier": "10.5281/zenodo.438684",
        "{https://doi.org/10.5063/SCHEMA/CODEMETA-2.0}dateCreated": "2017-11-16T14:54:23+01:00",
    }
    original_artifacts = [
        {
            "length": 208357,
            "filename": "archive.zip",
            "checksums": {
                "sha1": "fa0aec08e8a44ea144dba7ce366c8b5d66c14453",
                "sha256": "f53c05fe947e88ce83751a93bd522b1f88478ea2e7b984c07fc7a7c68128bf87",
            },
        }
    ]

    row = {
        "id": b"\x01\x16\xca\xb7\x19d\xd5\x9c\x85p\xb4\xc5r\x9b(\xbd\xd6<\x9bF",
        "directory": DIRECTORY_ID,
        "date": datetime.datetime(
            2018, 1, 17, 12, 54, 0, 723882, tzinfo=datetime.timezone.utc
        ),
        "committer_date": datetime.datetime(
            2018, 1, 17, 12, 54, 0, 723882, tzinfo=datetime.timezone.utc
        ),
        "type": "tar",
        "message": b"hal: Deposit 82 in collection hal",
        "metadata": {
            "extrinsic": {
                "raw": {
                    "origin": {
                        "url": "https://hal.archives-ouvertes.fr/hal-01243573",
                        "type": "deposit",
                    },
                    "origin_metadata": {
                        "tool": {
                            "name": "swh-deposit",
                            "version": "0.0.1",
                            "configuration": {"sword_version": 2},
                        },
                        "metadata": extrinsic_metadata,
                        "provider": {
                            "metadata": {},
                            "provider_url": "https://hal.archives-ouvertes.fr/",
                            "provider_name": "hal",
                            "provider_type": "deposit_client",
                        },
                    },
                },
                "when": "2020-05-15T14:27:21.462270+00:00",
                "provider": "https://deposit.softwareheritage.org/1/private/82/meta/",
            },
            "original_artifact": original_artifacts,
        },
    }

    swhid = (
        "swh:1:dir:e04b2a7b8a8838da0693e9fd992a10d6fd211b50"
        ";origin=https://hal.archives-ouvertes.fr/hal-01243573"
        ";visit=swh:1:snp:abc9ae594245a740235b6c039f044352a5f723ec"
        ";anchor=swh:1:rev:0116cab71964d59c8570b4c5729b28bdd63c9b46"
        ";path=/"
    )

    deposit_rows = [
        {
            "deposit.id": 82,
            "deposit.external_id": "hal-01243573",
            "deposit.swhid_context": swhid,
            "deposit.status": "success",
            "deposit_request.metadata": None,
            "deposit_request.date": datetime.datetime(
                2018, 1, 17, 12, 54, 1, 533972, tzinfo=datetime.timezone.utc
            ),
            "deposit_client.provider_url": "https://hal.archives-ouvertes.fr/",
            "deposit_collection.name": "hal",
            "auth_user.username": "hal",
        },
        {
            "deposit.id": 82,
            "deposit.external_id": "hal-01243573",
            "deposit.swhid_context": swhid,
            "deposit.status": "success",
            "deposit_request.metadata": extrinsic_metadata,
            "deposit_request.date": datetime.datetime(
                2018, 1, 17, 12, 54, 0, 413748, tzinfo=datetime.timezone.utc
            ),
            "deposit_client.provider_url": "https://hal.archives-ouvertes.fr/",
            "deposit_collection.name": "hal",
            "auth_user.username": "hal",
        },
    ]

    origin_url = "https://hal.archives-ouvertes.fr/hal-01243573"

    storage = Mock()

    def origin_get(urls):
        assert urls == [origin_url]
        return [Origin(url=origin_url)]

    storage.origin_get.side_effect = origin_get
    deposit_cur = get_mock_deposit_cur(deposit_rows)
    handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False)

    deposit_cur.execute.assert_called_once()
    deposit_cur.__iter__.assert_called_once()

    assert storage.method_calls == [
        call.origin_get([origin_url]),
        call.raw_extrinsic_metadata_add(
            [
                RawExtrinsicMetadata(
                    target=DIRECTORY_SWHID,
                    discovery_date=datetime.datetime(
                        2018, 1, 17, 12, 54, 0, 413748, tzinfo=datetime.timezone.utc
                    ),
                    authority=HAL_AUTHORITY,
                    fetcher=FETCHER,
                    format="sword-v2-atom-codemeta-v2-in-json-with-expanded-namespaces",
                    metadata=json.dumps(extrinsic_metadata).encode(),
                    origin=origin_url,
                    revision=CoreSWHID.from_string(
                        "swh:1:rev:0116cab71964d59c8570b4c5729b28bdd63c9b46"
                    ),
                ),
            ]
        ),
        call.raw_extrinsic_metadata_add(
            [
                RawExtrinsicMetadata(
                    target=DIRECTORY_SWHID,
                    discovery_date=datetime.datetime(
                        2020, 5, 15, 14, 27, 21, 462270, tzinfo=datetime.timezone.utc
                    ),
                    authority=SWH_AUTHORITY,
                    fetcher=FETCHER,
                    format="original-artifacts-json",
                    metadata=json.dumps(original_artifacts).encode(),
                    origin=origin_url,
                    revision=CoreSWHID.from_string(
                        "swh:1:rev:0116cab71964d59c8570b4c5729b28bdd63c9b46"
                    ),
                ),
            ]
        ),
    ]


def test_deposit_2_with_xmlns():
    """Has a provider, xmlns, and the metadata is only in
    metadata->extrinsic->raw->origin_metadata)"""
    extrinsic_metadata = {
        "title": "Je suis GPL",
        "@xmlns": "http://www.w3.org/2005/Atom",
        "client": "swh",
        "codemeta:url": "https://forge.softwareheritage.org/source/jesuisgpl/",
        "@xmlns:codemeta": "https://doi.org/10.5063/SCHEMA/CODEMETA-2.0",
        "codemeta:author": {
            "codemeta:name": "Stefano Zacchiroli",
            "codemeta:jobTitle": "Maintainer",
        },
        "codemeta:license": {
            "codemeta:url": "https://spdx.org/licenses/GPL-3.0-or-later.html",
            "codemeta:name": "GNU General Public License v3.0 or later",
        },
        "external_identifier": "je-suis-gpl",
        "codemeta:dateCreated": "2018-01-05",
    }
    original_artifacts = [
        {
            "length": 80880,
            "filename": "archive.zip",
            "checksums": {
                "sha1": "bad32a47a359e0e16ebdca2ad2dc6a771dac8f71",
                "sha256": "182b7ee3b7b5b550e83d3bcfed029bb2f625ee760ebfe9557d5fd072bd4e22e4",
            },
        }
    ]

    row = {
        "id": b'\x01"\x96nP\x93\x17\xae\xcejA\xd0\xf0\x88\xdas<\xc0\x9d\x0f',
        "directory": DIRECTORY_ID,
        "date": datetime.datetime(2018, 1, 5, 0, 0, tzinfo=datetime.timezone.utc),
        "committer_date": datetime.datetime(
            2018, 1, 5, 0, 0, tzinfo=datetime.timezone.utc
        ),
        "type": "tar",
        "message": b"swh: Deposit 687 in collection swh",
        "metadata": {
            "extrinsic": {
                "raw": {
                    "origin": {
                        "url": "https://www.softwareheritage.org/check-deposit-2020-06-26T13:50:07.564420",
                        "type": "deposit",
                    },
                    "origin_metadata": {
                        "tool": {
                            "name": "swh-deposit",
                            "version": "0.0.1",
                            "configuration": {"sword_version": 2},
                        },
                        "metadata": extrinsic_metadata,
                        "provider": {
                            "metadata": {},
                            "provider_url": "https://www.softwareheritage.org",
                            "provider_name": "swh",
                            "provider_type": "deposit_client",
                        },
                    },
                },
                "when": "2020-06-26T13:50:22.640625+00:00",
                "provider": "https://deposit.softwareheritage.org/1/private/687/meta/",
            },
            "original_artifact": original_artifacts,
        },
    }

    swhid = (
        "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea"
        ";origin=https://www.softwareheritage.org/check-deposit-2020-06-26T13:50:07.564420"
        ";visit=swh:1:snp:8fd469e280fb0724175c64906627f619143d5bdb"
        ";anchor=swh:1:rev:0122966e509317aece6a41d0f088da733cc09d0f"
        ";path=/"
    )
    deposit_rows = [
        {
            "deposit.id": 687,
            "deposit.external_id": "check-deposit-2020-06-26T13:50:07.564420",
            "deposit.swhid_context": swhid,
            "deposit.status": "success",
            "deposit_request.metadata": extrinsic_metadata,
            "deposit_request.date": datetime.datetime(
                2020, 6, 26, 13, 50, 8, 216113, tzinfo=datetime.timezone.utc
            ),
            "deposit_client.provider_url": "https://www.softwareheritage.org",
            "deposit_collection.name": "swh",
            "auth_user.username": "swh",
        },
        {
            "deposit.id": 687,
            "deposit.external_id": "check-deposit-2020-06-26T13:50:07.564420",
            "deposit.swhid_context": swhid,
            "deposit.status": "success",
            "deposit_request.metadata": None,
            "deposit_request.date": datetime.datetime(
                2020, 6, 26, 13, 50, 8, 150498, tzinfo=datetime.timezone.utc
            ),
            "deposit_client.provider_url": "https://www.softwareheritage.org",
            "deposit_collection.name": "swh",
            "auth_user.username": "swh",
        },
    ]

    origin_url = (
        "https://www.softwareheritage.org/check-deposit-2020-06-26T13:50:07.564420"
    )

    storage = Mock()

    def origin_get(urls):
        assert urls == [origin_url]
        return [Origin(url=origin_url)]

    storage.origin_get.side_effect = origin_get
    deposit_cur = get_mock_deposit_cur(deposit_rows)
    handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False)

    deposit_cur.execute.assert_called_once()
    deposit_cur.__iter__.assert_called_once()

    assert storage.method_calls == [
        call.origin_get([origin_url]),
        call.raw_extrinsic_metadata_add(
            [
                RawExtrinsicMetadata(
                    target=DIRECTORY_SWHID,
                    discovery_date=datetime.datetime(
                        2020, 6, 26, 13, 50, 8, 216113, tzinfo=datetime.timezone.utc
                    ),
                    authority=SWH_DEPOSIT_AUTHORITY,
                    fetcher=FETCHER,
                    format="sword-v2-atom-codemeta-v2-in-json",
                    metadata=json.dumps(extrinsic_metadata).encode(),
                    origin=origin_url,
                    revision=CoreSWHID.from_string(
                        "swh:1:rev:0122966e509317aece6a41d0f088da733cc09d0f"
                    ),
                ),
            ]
        ),
        call.raw_extrinsic_metadata_add(
            [
                RawExtrinsicMetadata(
                    target=DIRECTORY_SWHID,
                    discovery_date=datetime.datetime(
                        2020, 6, 26, 13, 50, 22, 640625, tzinfo=datetime.timezone.utc
                    ),
                    authority=SWH_AUTHORITY,
                    fetcher=FETCHER,
                    format="original-artifacts-json",
                    metadata=json.dumps(original_artifacts).encode(),
                    origin=origin_url,
                    revision=CoreSWHID.from_string(
                        "swh:1:rev:0122966e509317aece6a41d0f088da733cc09d0f"
                    ),
                ),
            ]
        ),
    ]


def test_deposit_2_with_json_in_json_and_no_xmlns():
    """New formats introduced in https://forge.softwareheritage.org/D4105 ,
    where the raw metadata is itself JSONed inside the metadata JSON tree
    and https://forge.softwareheritage.org/D4065 where the @xmlns declarations
    are stripped before being sent to the deposit DB"""
    extrinsic_metadata = {
        "id": "hal-02960679",
        "author": {"name": "HAL", "email": "hal@ccsd.cnrs.fr"},
        "client": "hal",
        "codemeta:url": "https://hal.archives-ouvertes.fr/hal-02960679",
        "codemeta:name": "Compressive Spectral Clustering Toolbox",
        "codemeta:author": [
            {"codemeta:name": "Nicolas Tremblay", "codemeta:affiliation": "PANAMA"},
            {"codemeta:name": "Gilles Puy", "codemeta:affiliation": "PANAMA"},
            {"codemeta:name": "R{\\'e}mi Gribonval", "codemeta:affiliation": "PANAMA"},
            {"codemeta:name": "Pierre Vandergheynst"},
        ],
        # ...
    }

    original_artifacts = [
        {
            "url": "https://deposit.softwareheritage.org/1/private/1037/raw/",
            "length": 4546913,
            "filename": "archive.zip",
            "checksums": {
                "sha1": "01a0069c626a383de9a17ace40ecfd588e5c4f26",
                "sha256": "c780a6de91286c70ceecc69fe0c6d201d3fe944aa89e193f3a89ae85dc25c3b1",
            },
        }
    ]

    row = {
        "id": b"J\x9dc{\xa5\x07\xa2\xb93e%\x04(\xe6\xe3\xf0!\xf1\x94\xd0",
        "directory": DIRECTORY_ID,
        "date": datetime.datetime(2016, 1, 29, 0, 0, tzinfo=datetime.timezone.utc),
        "committer_date": datetime.datetime(
            2020, 10, 8, 0, 0, tzinfo=datetime.timezone.utc
        ),
        "type": "tar",
        "message": b"hal: Deposit 1037 in collection hal",
        "metadata": {
            "extrinsic": {
                "raw": {
                    "origin": {
                        "url": "https://hal.archives-ouvertes.fr/hal-02960679",
                        "type": "deposit",
                    },
                    "origin_metadata": {
                        "tool": {
                            "name": "swh-deposit",
                            "version": "0.2.0",
                            "configuration": {"sword_version": "2"},
                        },
                        "metadata": json.dumps(extrinsic_metadata),
                        "provider": {
                            "metadata": {},
                            "provider_url": "https://hal.archives-ouvertes.fr/",
                            "provider_name": "hal",
                            "provider_type": "deposit_client",
                        },
                    },
                },
                "when": "2020-10-09T13:38:25.888646+00:00",
                "provider": "https://deposit.softwareheritage.org/1/private/1037/meta/",
            },
            "original_artifact": original_artifacts,
        },
    }

    swhid = (
        "swh:1:dir:8bfdf74037ae1c51335995891c6226e0f85e46e2"
        ";origin=https://hal.archives-ouvertes.fr/hal-02960679"
        ";visit=swh:1:snp:bc4a2ddf84dd0cc13d74e1970a1471c2574ed6aa"
        ";anchor=swh:1:rev:4a9d637ba507a2b93365250428e6e3f021f194d0"
        ";path=/"
    )
    deposit_rows = [
        {
            "deposit.id": 1037,
            "deposit.external_id": "hal-02960679",
            "deposit.swhid_context": swhid,
            "deposit.status": "done",
            "deposit_request.metadata": None,
            "deposit_request.date": datetime.datetime(
                2020, 10, 9, 13, 38, 8, 269611, tzinfo=datetime.timezone.utc,
            ),
            "deposit_client.provider_url": "https://hal.archives-ouvertes.fr/",
            "deposit_collection.name": "hal",
            "auth_user.username": "hal",
        },
        {
            "deposit.id": 1037,
            "deposit.external_id": "hal-02960679",
            "deposit.swhid_context": swhid,
            "deposit.status": "done",
            "deposit_request.metadata": extrinsic_metadata,
            "deposit_request.date": datetime.datetime(
                2020, 10, 9, 13, 38, 7, 394544, tzinfo=datetime.timezone.utc,
            ),
            "deposit_client.provider_url": "https://hal.archives-ouvertes.fr/",
            "deposit_collection.name": "hal",
            "auth_user.username": "hal",
        },
    ]

    origin_url = "https://hal.archives-ouvertes.fr/hal-02960679"

    storage = Mock()

    def origin_get(urls):
        assert urls == [origin_url]
        return [Origin(url=origin_url)]

    storage.origin_get.side_effect = origin_get
    deposit_cur = get_mock_deposit_cur(deposit_rows)
    handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False)

    deposit_cur.execute.assert_called_once()
    deposit_cur.__iter__.assert_called_once()

    assert storage.method_calls == [
        call.origin_get([origin_url]),
        call.raw_extrinsic_metadata_add(
            [
                RawExtrinsicMetadata(
                    target=DIRECTORY_SWHID,
                    discovery_date=datetime.datetime(
                        2020, 10, 9, 13, 38, 7, 394544, tzinfo=datetime.timezone.utc
                    ),
                    authority=HAL_AUTHORITY,
                    fetcher=FETCHER,
                    format="sword-v2-atom-codemeta-v2-in-json",
                    metadata=json.dumps(extrinsic_metadata).encode(),
                    origin=origin_url,
                    revision=CoreSWHID.from_string(
                        "swh:1:rev:4a9d637ba507a2b93365250428e6e3f021f194d0"
                    ),
                ),
            ]
        ),
        call.raw_extrinsic_metadata_add(
            [
                RawExtrinsicMetadata(
                    target=DIRECTORY_SWHID,
                    discovery_date=datetime.datetime(
                        2020, 10, 9, 13, 38, 25, 888646, tzinfo=datetime.timezone.utc
                    ),
                    authority=SWH_AUTHORITY,
                    fetcher=FETCHER,
                    format="original-artifacts-json",
                    metadata=json.dumps(original_artifacts).encode(),
                    origin=origin_url,
                    revision=CoreSWHID.from_string(
                        "swh:1:rev:4a9d637ba507a2b93365250428e6e3f021f194d0"
                    ),
                ),
            ]
        ),
    ]


def test_deposit_3_and_wrong_external_id_in_metadata():
    extrinsic_metadata = {
        "title": "VTune Perf tool",
        "@xmlns": "http://www.w3.org/2005/Atom",
        "client": "swh",
        "codemeta:url": "https://software.intel.com/en-us/vtune",
        "@xmlns:codemeta": "https://doi.org/10.5063/SCHEMA/CODEMETA-2.0",
        "codemeta:author": {
            "codemeta:name": "VTune developer",
            "codemeta:jobTitle": "Software Engineer",
        },
        "external_identifier": "vtune-perf-tool",
        "codemeta:dateCreated": "2019-05-14",
        "codemeta:description": "Modified version of Linux Perf tool which is used by Intel VTune Amplifier",
    }
    source_original_artifacts = [
        {
            "name": "archive.zip",
            "sha1": "07251dbb1d904d143fd7da9935701f17670d4d9b",
            "length": 4350528,
            "sha256": "1f7d111ac79e468002f3edf4b7b2487538d41f6bea362d49b2eb08a537efafb6",
            "sha1_git": "e2d894efcaad4ff36f09eda3b3c0096416b03429",
            "blake2s256": "e2c08b82efbc361fbb2d28aa8352668cd71217f165f63de16b61ed61ace7509d",
            "archive_type": "zip",
        }
    ]
    dest_original_artifacts = [
        {
            "length": 4350528,
            "archive_type": "zip",
            "filename": "archive.zip",
            "checksums": {
                "sha1": "07251dbb1d904d143fd7da9935701f17670d4d9b",
                "sha256": "1f7d111ac79e468002f3edf4b7b2487538d41f6bea362d49b2eb08a537efafb6",
                "sha1_git": "e2d894efcaad4ff36f09eda3b3c0096416b03429",
                "blake2s256": "e2c08b82efbc361fbb2d28aa8352668cd71217f165f63de16b61ed61ace7509d",
            },
        }
    ]

    row = {
        "id": b"\t5`S\xc4\x9a\xd0\xf9\xe6.Q\xc2\x9d>a|y\x11@\xdf",
        "directory": DIRECTORY_ID,
        "date": datetime.datetime(2019, 5, 14, 0, 0, tzinfo=datetime.timezone.utc),
        "committer_date": datetime.datetime(
            2019, 5, 14, 0, 0, tzinfo=datetime.timezone.utc
        ),
        "type": "tar",
        "message": b"intel: Deposit 268 in collection intel",
        "metadata": {
            **extrinsic_metadata,
            "original_artifact": source_original_artifacts,
        },
    }

    swhid = (
        "swh:1:dir:527c8e4a67d391f2bf1bbc86dd94af5d5cfc8ef7"
        ";origin=https://software.intel.com/f80482de-90a8-4c32-bce4-6f6918d492ff"
        ";visit=swh:1:snp:49d60943d9c061da1aba6266a811412f9db8de2e"
        ";anchor=swh:1:rev:09356053c49ad0f9e62e51c29d3e617c791140df"
        ";path=/"
    )
    deposit_rows = [
        {
            "deposit.id": 268,
            "deposit.external_id": "f80482de-90a8-4c32-bce4-6f6918d492ff",
            "deposit.swhid_context": swhid,
            "deposit.status": "success",
            "deposit_request.metadata": extrinsic_metadata,
            "deposit_request.date": datetime.datetime(
                2019, 5, 14, 7, 49, 36, 775072, tzinfo=datetime.timezone.utc
            ),
            "deposit_client.provider_url": "https://software.intel.com",
            "deposit_collection.name": "intel",
            "auth_user.username": "intel",
        },
        {
            "deposit.id": 268,
            "deposit.external_id": "f80482de-90a8-4c32-bce4-6f6918d492ff",
            "deposit.swhid_context": swhid,
            "deposit.status": "success",
            "deposit_request.metadata": None,
            "deposit_request.date": datetime.datetime(
                2019, 5, 14, 7, 49, 36, 477061, tzinfo=datetime.timezone.utc
            ),
            "deposit_client.provider_url": "https://software.intel.com",
            "deposit_collection.name": "intel",
            "auth_user.username": "intel",
        },
        {
            "deposit.id": 268,
            "deposit.external_id": "f80482de-90a8-4c32-bce4-6f6918d492ff",
            "deposit.swhid_context": swhid,
            "deposit.status": "success",
            "deposit_request.metadata": extrinsic_metadata,
            "deposit_request.date": datetime.datetime(
                2019, 5, 14, 7, 28, 33, 210100, tzinfo=datetime.timezone.utc
            ),
            "deposit_client.provider_url": "https://software.intel.com",
            "deposit_collection.name": "intel",
            "auth_user.username": "intel",
        },
        {
            "deposit.id": 268,
            "deposit.external_id": "f80482de-90a8-4c32-bce4-6f6918d492ff",
            "deposit.swhid_context": swhid,
            "deposit.status": "success",
            "deposit_request.metadata": None,
            "deposit_request.date": datetime.datetime(
                2019, 5, 14, 7, 28, 33, 41454, tzinfo=datetime.timezone.utc
            ),
            "deposit_client.provider_url": "https://software.intel.com",
            "deposit_collection.name": "intel",
            "auth_user.username": "intel",
        },
    ]

    origin_url = "https://software.intel.com/f80482de-90a8-4c32-bce4-6f6918d492ff"

    storage = Mock()

    def origin_get(urls):
        assert urls == [origin_url]
        return [Origin(url=origin_url)]

    storage.origin_get.side_effect = origin_get
    deposit_cur = get_mock_deposit_cur(deposit_rows)
    handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False)

    deposit_cur.execute.assert_called_once()
    deposit_cur.__iter__.assert_called_once()

    assert storage.method_calls == [
        call.origin_get([origin_url]),
        call.raw_extrinsic_metadata_add(
            [
                RawExtrinsicMetadata(
                    target=DIRECTORY_SWHID,
                    discovery_date=datetime.datetime(
                        2019, 5, 14, 7, 49, 36, 775072, tzinfo=datetime.timezone.utc
                    ),
                    authority=INTEL_AUTHORITY,
                    fetcher=FETCHER,
                    format="sword-v2-atom-codemeta-v2-in-json",
                    metadata=json.dumps(extrinsic_metadata).encode(),
                    origin=origin_url,
                    revision=CoreSWHID.from_string(
                        "swh:1:rev:09356053c49ad0f9e62e51c29d3e617c791140df"
                    ),
                ),
            ]
        ),
        call.raw_extrinsic_metadata_add(
            [
                RawExtrinsicMetadata(
                    target=DIRECTORY_SWHID,
                    discovery_date=datetime.datetime(
                        2019, 5, 14, 7, 28, 33, 210100, tzinfo=datetime.timezone.utc
                    ),
                    authority=INTEL_AUTHORITY,
                    fetcher=FETCHER,
                    format="sword-v2-atom-codemeta-v2-in-json",
                    metadata=json.dumps(extrinsic_metadata).encode(),
                    origin=origin_url,
                    revision=CoreSWHID.from_string(
                        "swh:1:rev:09356053c49ad0f9e62e51c29d3e617c791140df"
                    ),
                ),
            ]
        ),
        call.raw_extrinsic_metadata_add(
            [
                RawExtrinsicMetadata(
                    target=DIRECTORY_SWHID,
                    discovery_date=datetime.datetime(
                        2019, 5, 14, 7, 49, 36, 775072, tzinfo=datetime.timezone.utc
                    ),
                    authority=SWH_AUTHORITY,
                    fetcher=FETCHER,
                    format="original-artifacts-json",
                    metadata=json.dumps(dest_original_artifacts).encode(),
                    origin=origin_url,
                    revision=CoreSWHID.from_string(
                        "swh:1:rev:09356053c49ad0f9e62e51c29d3e617c791140df"
                    ),
                ),
            ]
        ),
    ]


def test_deposit_3_and_no_swhid():
    extrinsic_metadata = {
        "id": "hal-02337300",
        "@xmlns": "http://www.w3.org/2005/Atom",
        "author": {"name": "HAL", "email": "hal@ccsd.cnrs.fr"},
        "client": "hal",
        "codemeta:url": "https://hal.archives-ouvertes.fr/hal-02337300",
        "codemeta:name": "R package SMM, Simulation and Estimation of Multi-State Discrete-Time Semi-Markov and Markov Models",
        "@xmlns:codemeta": "https://doi.org/10.5063/SCHEMA/CODEMETA-2.0",
        "codemeta:author": [
            # ...
        ],
        # ...
    }
    original_artifacts = [
        # ...
    ]

    row = {
        "id": b"\x91\xe5\xca\x8b'K\xf1\xa8cFd2\xd7Q\xf7A\xbc\x94\xba&",
        "directory": DIRECTORY_ID,
        "date": datetime.datetime(2017, 1, 1, 0, 0, tzinfo=datetime.timezone.utc),
        "committer_date": datetime.datetime(
            2019, 11, 6, 14, 47, 30, tzinfo=datetime.timezone.utc
        ),
        "type": "tar",
        "message": b"hal: Deposit 342 in collection hal",
        "metadata": {**extrinsic_metadata, "original_artifact": original_artifacts,},
    }
    storage = Mock()

    deposit_cur = None
    handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False)

    assert storage.method_calls == []


def test_deposit_3_and_unknown_deposit():
    extrinsic_metadata = {
        "title": "Je suis GPL",
        "@xmlns": "http://www.w3.org/2005/Atom",
        "client": "swh",
        "codemeta:url": "https://forge.softwareheritage.org/source/jesuisgpl/",
        "@xmlns:codemeta": "https://doi.org/10.5063/SCHEMA/CODEMETA-2.0",
        "codemeta:author": {
            "codemeta:name": "Stefano Zacchiroli",
            "codemeta:jobTitle": "Maintainer",
        },
        # ...
    }

    row = {
        "id": b"\x8e\x9c\xee\x14\xa6\xad9\xbc\xa44pw\xb8\x7f\xb5\xbb\xd8\x95;\xb1",
        "directory": DIRECTORY_ID,
        "date": datetime.datetime(
            2018, 7, 23, 12, 25, 45, 907132, tzinfo=datetime.timezone.utc
        ),
        "committer_date": datetime.datetime(
            2018, 7, 23, 12, 25, 45, 907132, tzinfo=datetime.timezone.utc
        ),
        "type": "tar",
        "message": b"swh: Deposit 159 in collection swh",
        "metadata": extrinsic_metadata,
    }

    origin_url = "https://software.intel.com/f80482de-90a8-4c32-bce4-6f6918d492ff"

    storage = Mock()

    deposit_cur = None
    handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False)

    assert storage.method_calls == []


def test_deposit_4_without_xmlns():
    extrinsic_metadata = {
        "{http://www.w3.org/2005/Atom}id": "hal-01243573",
        "{http://www.w3.org/2005/Atom}author": {
            "{http://www.w3.org/2005/Atom}name": "HAL",
            "{http://www.w3.org/2005/Atom}email": "hal@ccsd.cnrs.fr",
        },
        "{http://www.w3.org/2005/Atom}client": "hal",
        "{http://www.w3.org/2005/Atom}external_identifier": "hal-01243573",
        "{https://doi.org/10.5063/SCHEMA/CODEMETA-2.0}url": "https://hal-test.archives-ouvertes.fr/hal-01243573",
        "{https://doi.org/10.5063/SCHEMA/CODEMETA-2.0}name": "The assignment problem",
        "{https://doi.org/10.5063/SCHEMA/CODEMETA-2.0}author": {
            "{https://doi.org/10.5063/SCHEMA/CODEMETA-2.0}name": "Morane Gruenpeter"
        },
        # ...
    }

    row = {
        "id": b"\x03\x98\x7f\x05n\xafE\x96\xcd \xd7\xb2\xee\x01\xc9\xb8L\xed\xdf\xa8",
        "directory": DIRECTORY_ID,
        "date": datetime.datetime(
            2018, 1, 17, 12, 49, 30, 902891, tzinfo=datetime.timezone.utc
        ),
        "committer_date": datetime.datetime(
            2018, 1, 17, 12, 49, 30, 902891, tzinfo=datetime.timezone.utc
        ),
        "type": "tar",
        "message": b": Deposit 79 in collection hal",
        "metadata": extrinsic_metadata,
    }

    swhid = (
        "swh:1:dir:e04b2a7b8a8838da0693e9fd992a10d6fd211b50"
        ";origin=https://hal.archives-ouvertes.fr/hal-01243573"
        ";visit=swh:1:snp:c31851534c86676a040fb10f438728c90f1c9d55"
        ";anchor=swh:1:rev:43549ebbe70c9cdf0be1647e6319392eaa06f3a3"
        ";path=/"
    )
    deposit_rows = [
        {
            "deposit.id": 79,
            "deposit.external_id": "hal-01243573",
            "deposit.swhid_context": swhid,
            "deposit.status": "success",
            "deposit_request.metadata": None,
            "deposit_request.date": datetime.datetime(
                2018, 1, 17, 12, 49, 31, 208347, tzinfo=datetime.timezone.utc
            ),
            "deposit_client.provider_url": "https://hal.archives-ouvertes.fr/",
            "deposit_collection.name": "hal",
            "auth_user.username": "hal",
        },
        {
            "deposit.id": 79,
            "deposit.external_id": "hal-01243573",
            "deposit.swhid_context": swhid,
            "deposit.status": "success",
            "deposit_request.metadata": extrinsic_metadata,
            "deposit_request.date": datetime.datetime(
                2018, 1, 17, 12, 49, 30, 645576, tzinfo=datetime.timezone.utc
            ),
            "deposit_client.provider_url": "https://hal.archives-ouvertes.fr/",
            "deposit_collection.name": "hal",
            "auth_user.username": "hal",
        },
    ]

    origin_url = "https://hal.archives-ouvertes.fr/hal-01243573"

    storage = Mock()

    def origin_get(urls):
        assert urls == [origin_url]
        return [Origin(url=origin_url)]

    storage.origin_get.side_effect = origin_get
    deposit_cur = get_mock_deposit_cur(deposit_rows)
    handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False)

    deposit_cur.execute.assert_called_once()
    deposit_cur.__iter__.assert_called_once()

    assert storage.method_calls == [
        call.origin_get([origin_url]),
        call.raw_extrinsic_metadata_add(
            [
                RawExtrinsicMetadata(
                    target=DIRECTORY_SWHID,
                    discovery_date=datetime.datetime(
                        2018, 1, 17, 12, 49, 30, 645576, tzinfo=datetime.timezone.utc
                    ),
                    authority=HAL_AUTHORITY,
                    fetcher=FETCHER,
                    format="sword-v2-atom-codemeta-v2-in-json-with-expanded-namespaces",
                    metadata=json.dumps(extrinsic_metadata).encode(),
                    origin=origin_url,
                    revision=CoreSWHID.from_string(
                        "swh:1:rev:03987f056eaf4596cd20d7b2ee01c9b84ceddfa8"
                    ),
                ),
            ]
        ),
        # note: no original artifacts
    ]


def test_deposit_4_wrong_origin():
    extrinsic_metadata = {
        "{http://www.w3.org/2005/Atom}id": "hal-01588781",
        "{http://www.w3.org/2005/Atom}author": {
            "{http://www.w3.org/2005/Atom}name": "HAL",
            "{http://www.w3.org/2005/Atom}email": "hal@ccsd.cnrs.fr",
        },
        "{http://www.w3.org/2005/Atom}client": "hal",
        "{http://www.w3.org/2005/Atom}external_identifier": "hal-01588781",
        "{https://doi.org/10.5063/SCHEMA/CODEMETA-2.0}url": "https://inria.halpreprod.archives-ouvertes.fr/hal-01588781",
        "{https://doi.org/10.5063/SCHEMA/CODEMETA-2.0}name": "The assignment problem ",
        "{https://doi.org/10.5063/SCHEMA/CODEMETA-2.0}author": {
            "{https://doi.org/10.5063/SCHEMA/CODEMETA-2.0}name": "Morane Gruenpeter",
            "{https://doi.org/10.5063/SCHEMA/CODEMETA-2.0}affiliation": "Initiative pour la Recherche et l'Innovation sur le Logiciel Libre",
        },
        # ...
    }

    row = {
        "id": b"-{\xcec\x1f\xc7\x91\x08\x03\x11\xeb\x83\\GB\x8eXjn\xa4",
        "directory": DIRECTORY_ID,
        "date": datetime.datetime(
            2018, 1, 10, 13, 14, 51, 77033, tzinfo=datetime.timezone.utc
        ),
        "committer_date": datetime.datetime(
            2018, 1, 10, 13, 14, 51, 77033, tzinfo=datetime.timezone.utc
        ),
        "type": "tar",
        "message": b": Deposit 75 in collection hal",
        "metadata": extrinsic_metadata,
    }

    swhid = (
        "swh:1:dir:d8971c651fe256942aa4499a3ccdbaa305d3bade"
        ";origin=https://inria.halpreprod.archives-ouvertes.fr/hal-01588781"
        ";visit=swh:1:snp:7c70cc8ea5b79e376605fd6e9b3b04d98861ffc0"
        ";anchor=swh:1:rev:2d7bce631fc791080311eb835c47428e586a6ea4"
        ";path=/"
    )
    deposit_rows = [
        {
            "deposit.id": 75,
            "deposit.external_id": "hal-01588781",
            "deposit.swhid_context": swhid,
            "deposit.status": "success",
            "deposit_request.metadata": None,
            "deposit_request.date": datetime.datetime(
                2018, 1, 10, 13, 14, 51, 523963, tzinfo=datetime.timezone.utc
            ),
            "deposit_client.provider_url": "https://hal.archives-ouvertes.fr/",
            "deposit_collection.name": "hal",
            "auth_user.username": "hal",
        },
        {
            "deposit.id": 75,
            "deposit.external_id": "hal-01588781",
            "deposit.swhid_context": swhid,
            "deposit.status": "success",
            "deposit_request.metadata": extrinsic_metadata,
            "deposit_request.date": datetime.datetime(
                2018, 1, 10, 13, 14, 50, 555143, tzinfo=datetime.timezone.utc
            ),
            "deposit_client.provider_url": "https://hal.archives-ouvertes.fr/",
            "deposit_collection.name": "hal",
            "auth_user.username": "hal",
        },
    ]

    origin_url = "https://inria.halpreprod.archives-ouvertes.fr/hal-01588781"

    storage = Mock()

    def origin_get(urls):
        assert urls == [origin_url]
        return [Origin(url=origin_url)]

    storage.origin_get.side_effect = origin_get
    deposit_cur = get_mock_deposit_cur(deposit_rows)
    handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False)

    deposit_cur.execute.assert_called_once()
    deposit_cur.__iter__.assert_called_once()

    assert storage.method_calls == [
        call.origin_get([origin_url]),
        call.raw_extrinsic_metadata_add(
            [
                RawExtrinsicMetadata(
                    target=DIRECTORY_SWHID,
                    discovery_date=datetime.datetime(
                        2018, 1, 10, 13, 14, 50, 555143, tzinfo=datetime.timezone.utc
                    ),
                    authority=HAL_AUTHORITY,
                    fetcher=FETCHER,
                    format="sword-v2-atom-codemeta-v2-in-json-with-expanded-namespaces",
                    metadata=json.dumps(extrinsic_metadata).encode(),
                    origin=origin_url,
                    revision=CoreSWHID.from_string(
                        "swh:1:rev:2d7bce631fc791080311eb835c47428e586a6ea4"
                    ),
                ),
            ]
        ),
        # note: no original artifacts
    ]


def test_deposit_missing_metadata_in_revision():
    extrinsic_metadata = {
        "id": "hal-01243573",
        "@xmlns": "http://www.w3.org/2005/Atom",
        "author": {"name": "HAL", "email": "hal@ccsd.cnrs.fr"},
        "client": "hal",
        "committer": "Administrateur Du Ccsd",
        "codemeta:url": "https://hal-test.archives-ouvertes.fr/hal-01243573",
        "codemeta:name": "The assignment problem",
        "@xmlns:codemeta": "https://doi.org/10.5063/SCHEMA/CODEMETA-2.0",
        "codemeta:author": {"codemeta:name": "Morane Gruenpeter"},
        "codemeta:version": "1",
        "codemeta:identifier": {"#text": "10.5281/zenodo.438684", "@name": "doi",},
        "external_identifier": "hal-01243573",
        "codemeta:dateCreated": "2017-11-16T14:54:23+01:00",
    }
    source_original_artifacts = [
        {
            "name": "archive.zip",
            "sha1": "e8e46324970cd5af7f98c5a86f33f47fa4a41b4a",
            "length": 118650,
            "sha256": "fec81b63d666c43524f966bbd3263da5bee55051d2b48c1659cca5f56fd953e5",
            "sha1_git": "9da2bbd08bec590b36ede2ed43d74cd510b10a79",
            "blake2s256": "5d0973ba3644cc2bcfdb41ff1891744337d6aa9547a7e59fe466f684b027f295",
            "archive_type": "zip",
        }
    ]
    dest_original_artifacts = [
        {
            "length": 118650,
            "archive_type": "zip",
            "filename": "archive.zip",
            "checksums": {
                "sha1": "e8e46324970cd5af7f98c5a86f33f47fa4a41b4a",
                "sha256": "fec81b63d666c43524f966bbd3263da5bee55051d2b48c1659cca5f56fd953e5",
                "sha1_git": "9da2bbd08bec590b36ede2ed43d74cd510b10a79",
                "blake2s256": "5d0973ba3644cc2bcfdb41ff1891744337d6aa9547a7e59fe466f684b027f295",
            },
        }
    ]

    row = {
        "id": b"\x03@v\xf3\xf4\x1e\xe1 N\xb9\xf6@\x82\xcb\xe6\xe9P\xd7\xbb\x8a",
        "directory": DIRECTORY_ID,
        "date": datetime.datetime(
            2019, 2, 25, 15, 49, 16, 594536, tzinfo=datetime.timezone.utc
        ),
        "committer_date": datetime.datetime(
            2019, 2, 25, 15, 49, 16, 594536, tzinfo=datetime.timezone.utc
        ),
        "type": "tar",
        "message": b"hal: Deposit 229 in collection hal",
        "metadata": {"original_artifact": source_original_artifacts},
    }

    swhid = (
        "swh:1:dir:3d65b6f065118cb856272829b459f0dfa55549aa"
        ";origin=https://hal-test.archives-ouvertes.fr/hal-01243573"
        ";visit=swh:1:snp:322c54ff4023d3216a994bc9ff9ee524ed80ee1f"
        ";anchor=swh:1:rev:034076f3f41ee1204eb9f64082cbe6e950d7bb8a"
        ";path=/"
    )
    deposit_rows = [
        {
            "deposit.id": 229,
            "deposit.external_id": "hal-01243573",
            "deposit.swhid_context": swhid,
            "deposit.status": "success",
            "deposit_request.metadata": None,
            "deposit_request.date": datetime.datetime(
                2019, 2, 25, 15, 54, 30, 102072, tzinfo=datetime.timezone.utc
            ),
            "deposit_client.provider_url": "https://hal.archives-ouvertes.fr/",
            "deposit_collection.name": "hal",
            "auth_user.username": "hal",
        },
        {
            "deposit.id": 229,
            "deposit.external_id": "hal-01243573",
            "deposit.swhid_context": swhid,
            "deposit.status": "success",
            "deposit_request.metadata": extrinsic_metadata,
            "deposit_request.date": datetime.datetime(
                2019, 2, 25, 15, 49, 12, 302745, tzinfo=datetime.timezone.utc
            ),
            "deposit_client.provider_url": "https://hal.archives-ouvertes.fr/",
            "deposit_collection.name": "hal",
            "auth_user.username": "hal",
        },
    ]

    origin_url = "https://hal.archives-ouvertes.fr/hal-01243573"
    # /!\ not https://hal-test.archives-ouvertes.fr/hal-01243573
    #     do not trust the metadata!

    storage = Mock()

    def origin_get(urls):
        assert urls == [origin_url]
        return [Origin(url=origin_url)]

    storage.origin_get.side_effect = origin_get
    deposit_cur = get_mock_deposit_cur(deposit_rows)
    handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False)

    deposit_cur.execute.assert_called_once()
    deposit_cur.__iter__.assert_called_once()

    assert storage.method_calls == [
        call.origin_get([origin_url]),
        call.raw_extrinsic_metadata_add(
            [
                RawExtrinsicMetadata(
                    target=DIRECTORY_SWHID,
                    discovery_date=datetime.datetime(
                        2019, 2, 25, 15, 49, 12, 302745, tzinfo=datetime.timezone.utc
                    ),
                    authority=HAL_AUTHORITY,
                    fetcher=FETCHER,
                    format="sword-v2-atom-codemeta-v2-in-json",
                    metadata=json.dumps(extrinsic_metadata).encode(),
                    origin=origin_url,
                    revision=CoreSWHID.from_string(
                        "swh:1:rev:034076f3f41ee1204eb9f64082cbe6e950d7bb8a"
                    ),
                ),
            ]
        ),
        call.raw_extrinsic_metadata_add(
            [
                RawExtrinsicMetadata(
                    target=DIRECTORY_SWHID,
                    discovery_date=datetime.datetime(
                        2019, 2, 25, 15, 54, 30, 102072, tzinfo=datetime.timezone.utc
                    ),
                    authority=SWH_AUTHORITY,
                    fetcher=FETCHER,
                    format="original-artifacts-json",
                    metadata=json.dumps(dest_original_artifacts).encode(),
                    origin=origin_url,
                    revision=CoreSWHID.from_string(
                        "swh:1:rev:034076f3f41ee1204eb9f64082cbe6e950d7bb8a"
                    ),
                ),
            ]
        ),
    ]
