Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/deposit/tests/test_deposit.py
# Copyright (C) 2019-2020 The Software Heritage developers | # Copyright (C) 2019-2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import json | import json | ||||
import re | import re | ||||
from typing import List | from typing import List | ||||
import attr | import attr | ||||
import pytest | import pytest | ||||
from swh.core.pytest_plugin import requests_mock_datadir_factory | from swh.core.pytest_plugin import requests_mock_datadir_factory | ||||
from swh.loader.package.deposit.loader import DepositLoader | from swh.loader.package.deposit.loader import ApiClient, DepositLoader | ||||
from swh.loader.package.loader import now | from swh.loader.package.loader import now | ||||
from swh.loader.package.tests.common import check_metadata_paths | from swh.loader.package.tests.common import check_metadata_paths | ||||
from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats | from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats | ||||
from swh.model.hashutil import hash_to_bytes, hash_to_hex | from swh.model.hashutil import hash_to_bytes, hash_to_hex | ||||
from swh.model.identifiers import SWHID | from swh.model.identifiers import SWHID | ||||
from swh.model.model import ( | from swh.model.model import ( | ||||
MetadataAuthority, | MetadataAuthority, | ||||
MetadataAuthorityType, | MetadataAuthorityType, | ||||
Show All 13 Lines | def requests_mock_datadir(requests_mock_datadir): | ||||
"""Enhance default mock data to mock put requests as the loader does some | """Enhance default mock data to mock put requests as the loader does some | ||||
internal update queries there. | internal update queries there. | ||||
""" | """ | ||||
requests_mock_datadir.put(re.compile("https")) | requests_mock_datadir.put(re.compile("https")) | ||||
return requests_mock_datadir | return requests_mock_datadir | ||||
def test_deposit_init_ok(swh_config, swh_loader_config): | def test_deposit_init_ok(swh_storage, deposit_client, swh_loader_config): | ||||
url = "some-url" | url = "some-url" | ||||
deposit_id = 999 | deposit_id = 999 | ||||
loader = DepositLoader(url, deposit_id) # Something that does not exist | loader = DepositLoader( | ||||
swh_storage, url, deposit_id, deposit_client | |||||
) # Something that does not exist | |||||
assert loader.url == url | assert loader.url == url | ||||
assert loader.client is not None | assert loader.client is not None | ||||
assert loader.client.base_url == swh_loader_config["deposit"]["url"] | assert loader.client.base_url == swh_loader_config["deposit"]["url"] | ||||
def test_deposit_loading_unknown_deposit(swh_config, requests_mock_datadir): | def test_deposit_from_configfile(swh_config): | ||||
"""Ensure the deposit instantiation is ok | |||||
""" | |||||
loader = DepositLoader.from_configfile(url="some-url", deposit_id="666") | |||||
assert isinstance(loader.client, ApiClient) | |||||
def test_deposit_loading_unknown_deposit( | |||||
swh_storage, deposit_client, requests_mock_datadir | |||||
): | |||||
"""Loading an unknown deposit should fail | """Loading an unknown deposit should fail | ||||
no origin, no visit, no snapshot | no origin, no visit, no snapshot | ||||
""" | """ | ||||
# private api url form: 'https://deposit.s.o/1/private/hal/666/raw/' | # private api url form: 'https://deposit.s.o/1/private/hal/666/raw/' | ||||
url = "some-url" | url = "some-url" | ||||
unknown_deposit_id = 667 | unknown_deposit_id = 667 | ||||
loader = DepositLoader(url, unknown_deposit_id) # does not exist | loader = DepositLoader( | ||||
swh_storage, url, unknown_deposit_id, deposit_client | |||||
) # does not exist | |||||
actual_load_status = loader.load() | actual_load_status = loader.load() | ||||
assert actual_load_status == {"status": "failed"} | assert actual_load_status == {"status": "failed"} | ||||
stats = get_stats(loader.storage) | stats = get_stats(loader.storage) | ||||
assert { | assert { | ||||
"content": 0, | "content": 0, | ||||
"directory": 0, | "directory": 0, | ||||
"origin": 0, | "origin": 0, | ||||
"origin_visit": 0, | "origin_visit": 0, | ||||
"release": 0, | "release": 0, | ||||
"revision": 0, | "revision": 0, | ||||
"skipped_content": 0, | "skipped_content": 0, | ||||
"snapshot": 0, | "snapshot": 0, | ||||
} == stats | } == stats | ||||
requests_mock_datadir_missing_one = requests_mock_datadir_factory( | requests_mock_datadir_missing_one = requests_mock_datadir_factory( | ||||
ignore_urls=[f"{DEPOSIT_URL}/666/raw/",] | ignore_urls=[f"{DEPOSIT_URL}/666/raw/",] | ||||
) | ) | ||||
def test_deposit_loading_failure_to_retrieve_1_artifact( | def test_deposit_loading_failure_to_retrieve_1_artifact( | ||||
swh_config, requests_mock_datadir_missing_one | swh_storage, deposit_client, requests_mock_datadir_missing_one | ||||
): | ): | ||||
"""Deposit with missing artifact ends up with an uneventful/partial visit | """Deposit with missing artifact ends up with an uneventful/partial visit | ||||
""" | """ | ||||
# private api url form: 'https://deposit.s.o/1/private/hal/666/raw/' | # private api url form: 'https://deposit.s.o/1/private/hal/666/raw/' | ||||
url = "some-url-2" | url = "some-url-2" | ||||
deposit_id = 666 | deposit_id = 666 | ||||
loader = DepositLoader(url, deposit_id) | loader = DepositLoader(swh_storage, url, deposit_id, deposit_client) | ||||
actual_load_status = loader.load() | actual_load_status = loader.load() | ||||
assert actual_load_status["status"] == "uneventful" | assert actual_load_status["status"] == "uneventful" | ||||
assert actual_load_status["snapshot_id"] is not None | assert actual_load_status["snapshot_id"] is not None | ||||
assert_last_visit_matches(loader.storage, url, status="partial", type="deposit") | assert_last_visit_matches(loader.storage, url, status="partial", type="deposit") | ||||
stats = get_stats(loader.storage) | stats = get_stats(loader.storage) | ||||
assert { | assert { | ||||
"content": 0, | "content": 0, | ||||
"directory": 0, | "directory": 0, | ||||
"origin": 1, | "origin": 1, | ||||
"origin_visit": 1, | "origin_visit": 1, | ||||
"release": 0, | "release": 0, | ||||
"revision": 0, | "revision": 0, | ||||
"skipped_content": 0, | "skipped_content": 0, | ||||
"snapshot": 1, | "snapshot": 1, | ||||
} == stats | } == stats | ||||
def test_revision_metadata_structure(swh_config, requests_mock_datadir): | def test_deposit_revision_metadata_structure( | ||||
swh_storage, deposit_client, requests_mock_datadir | |||||
): | |||||
url = "https://hal-test.archives-ouvertes.fr/some-external-id" | url = "https://hal-test.archives-ouvertes.fr/some-external-id" | ||||
deposit_id = 666 | deposit_id = 666 | ||||
loader = DepositLoader(url, deposit_id) | loader = DepositLoader(swh_storage, url, deposit_id, deposit_client) | ||||
actual_load_status = loader.load() | actual_load_status = loader.load() | ||||
assert actual_load_status["status"] == "eventful" | assert actual_load_status["status"] == "eventful" | ||||
assert actual_load_status["snapshot_id"] is not None | assert actual_load_status["snapshot_id"] is not None | ||||
expected_revision_id = hash_to_bytes("637318680351f5d78856d13264faebbd91efe9bb") | expected_revision_id = hash_to_bytes("637318680351f5d78856d13264faebbd91efe9bb") | ||||
revision = loader.storage.revision_get([expected_revision_id])[0] | revision = loader.storage.revision_get([expected_revision_id])[0] | ||||
assert revision is not None | assert revision is not None | ||||
Show All 12 Lines | ): | ||||
for original_artifact in revision.metadata["original_artifact"]: | for original_artifact in revision.metadata["original_artifact"]: | ||||
check_metadata_paths( | check_metadata_paths( | ||||
original_artifact, | original_artifact, | ||||
paths=[("filename", str), ("length", int), ("checksums", dict),], | paths=[("filename", str), ("length", int), ("checksums", dict),], | ||||
) | ) | ||||
def test_deposit_loading_ok(swh_config, requests_mock_datadir): | def test_deposit_loading_ok(swh_storage, deposit_client, requests_mock_datadir): | ||||
url = "https://hal-test.archives-ouvertes.fr/some-external-id" | url = "https://hal-test.archives-ouvertes.fr/some-external-id" | ||||
deposit_id = 666 | deposit_id = 666 | ||||
loader = DepositLoader(url, deposit_id) | loader = DepositLoader(swh_storage, url, deposit_id, deposit_client) | ||||
actual_load_status = loader.load() | actual_load_status = loader.load() | ||||
expected_snapshot_id = "b2b327b33dc85818bd23c3ccda8b7e675a66ecbd" | expected_snapshot_id = "b2b327b33dc85818bd23c3ccda8b7e675a66ecbd" | ||||
assert actual_load_status == { | assert actual_load_status == { | ||||
"status": "eventful", | "status": "eventful", | ||||
"snapshot_id": expected_snapshot_id, | "snapshot_id": expected_snapshot_id, | ||||
} | } | ||||
▲ Show 20 Lines • Show All 79 Lines • ▼ Show 20 Lines | expected_body = { | ||||
"directory_id": hash_to_hex(revision.directory), | "directory_id": hash_to_hex(revision.directory), | ||||
"snapshot_id": expected_snapshot_id, | "snapshot_id": expected_snapshot_id, | ||||
"origin_url": url, | "origin_url": url, | ||||
} | } | ||||
assert body == expected_body | assert body == expected_body | ||||
def test_deposit_loading_ok_2(swh_config, requests_mock_datadir): | def test_deposit_loading_ok_2(swh_storage, deposit_client, requests_mock_datadir): | ||||
"""Field dates should be se appropriately | """Field dates should be se appropriately | ||||
""" | """ | ||||
external_id = "some-external-id" | external_id = "some-external-id" | ||||
url = f"https://hal-test.archives-ouvertes.fr/{external_id}" | url = f"https://hal-test.archives-ouvertes.fr/{external_id}" | ||||
deposit_id = 777 | deposit_id = 777 | ||||
loader = DepositLoader(url, deposit_id) | loader = DepositLoader(swh_storage, url, deposit_id, deposit_client) | ||||
actual_load_status = loader.load() | actual_load_status = loader.load() | ||||
expected_snapshot_id = "3e68440fdd7c81d283f8f3aebb6f0c8657864192" | expected_snapshot_id = "3e68440fdd7c81d283f8f3aebb6f0c8657864192" | ||||
assert actual_load_status == { | assert actual_load_status == { | ||||
"status": "eventful", | "status": "eventful", | ||||
"snapshot_id": expected_snapshot_id, | "snapshot_id": expected_snapshot_id, | ||||
} | } | ||||
▲ Show 20 Lines • Show All 171 Lines • Show Last 20 Lines |