Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/deposit/tests/test_deposit.py
Show All 9 Lines | |||||
import pytest | import pytest | ||||
from swh.core.pytest_plugin import requests_mock_datadir_factory | from swh.core.pytest_plugin import requests_mock_datadir_factory | ||||
from swh.loader.package.deposit.loader import ApiClient, DepositLoader | from swh.loader.package.deposit.loader import ApiClient, DepositLoader | ||||
from swh.loader.package.loader import now | from swh.loader.package.loader import now | ||||
from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats | from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats | ||||
from swh.model.hashutil import hash_to_bytes, hash_to_hex | from swh.model.hashutil import hash_to_bytes, hash_to_hex | ||||
from swh.model.model import ( | from swh.model.model import ( | ||||
MetadataAuthority, | |||||
MetadataAuthorityType, | |||||
MetadataFetcher, | |||||
Origin, | Origin, | ||||
Person, | Person, | ||||
RawExtrinsicMetadata, | RawExtrinsicMetadata, | ||||
Revision, | Release, | ||||
RevisionType, | |||||
Snapshot, | Snapshot, | ||||
SnapshotBranch, | SnapshotBranch, | ||||
TargetType, | TargetType, | ||||
Timestamp, | Timestamp, | ||||
TimestampWithTimezone, | TimestampWithTimezone, | ||||
) | ) | ||||
from swh.model.model import MetadataAuthority, MetadataAuthorityType, MetadataFetcher | |||||
from swh.model.model import ObjectType as ModelObjectType | |||||
from swh.model.swhids import CoreSWHID, ExtendedObjectType, ExtendedSWHID, ObjectType | from swh.model.swhids import CoreSWHID, ExtendedObjectType, ExtendedSWHID, ObjectType | ||||
DEPOSIT_URL = "https://deposit.softwareheritage.org/1/private" | DEPOSIT_URL = "https://deposit.softwareheritage.org/1/private" | ||||
@pytest.fixture | @pytest.fixture | ||||
def requests_mock_datadir(requests_mock_datadir): | def requests_mock_datadir(requests_mock_datadir): | ||||
"""Enhance default mock data to mock put requests as the loader does some | """Enhance default mock data to mock put requests as the loader does some | ||||
▲ Show 20 Lines • Show All 126 Lines • ▼ Show 20 Lines | |||||
def test_deposit_loading_ok(swh_storage, deposit_client, requests_mock_datadir): | def test_deposit_loading_ok(swh_storage, deposit_client, requests_mock_datadir): | ||||
url = "https://hal-test.archives-ouvertes.fr/some-external-id" | url = "https://hal-test.archives-ouvertes.fr/some-external-id" | ||||
deposit_id = 666 | deposit_id = 666 | ||||
loader = DepositLoader( | loader = DepositLoader( | ||||
swh_storage, url, deposit_id, deposit_client, default_filename="archive.zip" | swh_storage, url, deposit_id, deposit_client, default_filename="archive.zip" | ||||
) | ) | ||||
actual_load_status = loader.load() | actual_load_status = loader.load() | ||||
expected_snapshot_id = "b2b327b33dc85818bd23c3ccda8b7e675a66ecbd" | expected_snapshot_id = "1090aaadc9fd1a77798bf6187d309145cbd23c53" | ||||
assert actual_load_status == { | assert actual_load_status == { | ||||
"status": "eventful", | "status": "eventful", | ||||
"snapshot_id": expected_snapshot_id, | "snapshot_id": expected_snapshot_id, | ||||
} | } | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loader.storage, | loader.storage, | ||||
url, | url, | ||||
status="full", | status="full", | ||||
type="deposit", | type="deposit", | ||||
snapshot=hash_to_bytes(expected_snapshot_id), | snapshot=hash_to_bytes(expected_snapshot_id), | ||||
) | ) | ||||
revision_id_hex = "637318680351f5d78856d13264faebbd91efe9bb" | release_id_hex = "77c127bff4f9137baf26774fe19e29d82a41f69d" | ||||
revision_id = hash_to_bytes(revision_id_hex) | release_id = hash_to_bytes(release_id_hex) | ||||
expected_snapshot = Snapshot( | expected_snapshot = Snapshot( | ||||
id=hash_to_bytes(expected_snapshot_id), | id=hash_to_bytes(expected_snapshot_id), | ||||
branches={ | branches={ | ||||
b"HEAD": SnapshotBranch( | b"HEAD": SnapshotBranch(target=release_id, target_type=TargetType.RELEASE,), | ||||
target=revision_id, target_type=TargetType.REVISION, | |||||
), | |||||
}, | }, | ||||
) | ) | ||||
check_snapshot(expected_snapshot, storage=loader.storage) | check_snapshot(expected_snapshot, storage=loader.storage) | ||||
revision = loader.storage.revision_get([revision_id])[0] | release = loader.storage.release_get([release_id])[0] | ||||
date = TimestampWithTimezone( | date = TimestampWithTimezone( | ||||
timestamp=Timestamp(seconds=1507389428, microseconds=0), | timestamp=Timestamp(seconds=1507389428, microseconds=0), | ||||
offset=0, | offset=0, | ||||
negative_utc=False, | negative_utc=False, | ||||
) | ) | ||||
person = Person( | person = Person( | ||||
fullname=b"Software Heritage", | fullname=b"Software Heritage", | ||||
name=b"Software Heritage", | name=b"Software Heritage", | ||||
email=b"robot@softwareheritage.org", | email=b"robot@softwareheritage.org", | ||||
) | ) | ||||
assert revision == Revision( | assert release == Release( | ||||
id=revision_id, | id=release_id, | ||||
name=b"HEAD", | |||||
message=b"hal: Deposit 666 in collection hal", | message=b"hal: Deposit 666 in collection hal", | ||||
author=person, | author=person, | ||||
committer=person, | |||||
date=date, | date=date, | ||||
committer_date=date, | target_type=ModelObjectType.DIRECTORY, | ||||
type=RevisionType.TAR, | target=b"\xfd-\xf1-\xc5SL\x1d\xa1\xe9\x18\x0b\x91Q\x02\xfbo`\x1d\x19", | ||||
directory=b"\xfd-\xf1-\xc5SL\x1d\xa1\xe9\x18\x0b\x91Q\x02\xfbo`\x1d\x19", | |||||
synthetic=True, | synthetic=True, | ||||
metadata=None, | metadata=None, | ||||
parents=(), | |||||
extra_headers=(), | |||||
) | ) | ||||
# check metadata | # check metadata | ||||
fetcher = MetadataFetcher(name="swh-deposit", version="0.0.1",) | fetcher = MetadataFetcher(name="swh-deposit", version="0.0.1",) | ||||
authority = MetadataAuthority( | authority = MetadataAuthority( | ||||
type=MetadataAuthorityType.DEPOSIT_CLIENT, | type=MetadataAuthorityType.DEPOSIT_CLIENT, | ||||
Show All 9 Lines | def test_deposit_loading_ok(swh_storage, deposit_client, requests_mock_datadir): | ||||
all_metadata_raw: List[str] = raw_meta["metadata_raw"] | all_metadata_raw: List[str] = raw_meta["metadata_raw"] | ||||
# 2 raw metadata xml + 1 json dict | # 2 raw metadata xml + 1 json dict | ||||
assert len(orig_meta.results) == len(all_metadata_raw) + 1 | assert len(orig_meta.results) == len(all_metadata_raw) + 1 | ||||
orig_meta0 = orig_meta.results[0] | orig_meta0 = orig_meta.results[0] | ||||
assert orig_meta0.authority == authority | assert orig_meta0.authority == authority | ||||
assert orig_meta0.fetcher == fetcher | assert orig_meta0.fetcher == fetcher | ||||
# Check directory metadata | # Check directory metadata | ||||
assert release.target_type == ModelObjectType.DIRECTORY | |||||
directory_swhid = CoreSWHID( | directory_swhid = CoreSWHID( | ||||
object_type=ObjectType.DIRECTORY, object_id=revision.directory | object_type=ObjectType.DIRECTORY, object_id=release.target | ||||
) | ) | ||||
actual_dir_meta = loader.storage.raw_extrinsic_metadata_get( | actual_dir_meta = loader.storage.raw_extrinsic_metadata_get( | ||||
directory_swhid, authority | directory_swhid, authority | ||||
) | ) | ||||
assert actual_dir_meta.next_page_token is None | assert actual_dir_meta.next_page_token is None | ||||
assert len(actual_dir_meta.results) == len(all_metadata_raw) | assert len(actual_dir_meta.results) == len(all_metadata_raw) | ||||
for dir_meta in actual_dir_meta.results: | for dir_meta in actual_dir_meta.results: | ||||
assert dir_meta.authority == authority | assert dir_meta.authority == authority | ||||
assert dir_meta.fetcher == fetcher | assert dir_meta.fetcher == fetcher | ||||
assert dir_meta.metadata.decode() in all_metadata_raw | assert dir_meta.metadata.decode() in all_metadata_raw | ||||
# Retrieve the information for deposit status update query to the deposit | # Retrieve the information for deposit status update query to the deposit | ||||
urls = [ | urls = [ | ||||
m | m | ||||
for m in requests_mock_datadir.request_history | for m in requests_mock_datadir.request_history | ||||
if m.url == f"{DEPOSIT_URL}/{deposit_id}/update/" | if m.url == f"{DEPOSIT_URL}/{deposit_id}/update/" | ||||
] | ] | ||||
assert len(urls) == 1 | assert len(urls) == 1 | ||||
update_query = urls[0] | update_query = urls[0] | ||||
body = update_query.json() | body = update_query.json() | ||||
expected_body = { | expected_body = { | ||||
"status": "done", | "status": "done", | ||||
"revision_id": revision_id_hex, | "release_id": release_id_hex, | ||||
"directory_id": hash_to_hex(revision.directory), | "directory_id": hash_to_hex(release.target), | ||||
"snapshot_id": expected_snapshot_id, | "snapshot_id": expected_snapshot_id, | ||||
"origin_url": url, | "origin_url": url, | ||||
} | } | ||||
assert body == expected_body | assert body == expected_body | ||||
stats = get_stats(loader.storage) | stats = get_stats(loader.storage) | ||||
assert { | assert { | ||||
"content": 303, | "content": 303, | ||||
"directory": 12, | "directory": 12, | ||||
"origin": 1, | "origin": 1, | ||||
"origin_visit": 1, | "origin_visit": 1, | ||||
"release": 0, | "release": 1, | ||||
"revision": 1, | "revision": 0, | ||||
"skipped_content": 0, | "skipped_content": 0, | ||||
"snapshot": 1, | "snapshot": 1, | ||||
} == stats | } == stats | ||||
def test_deposit_loading_ok_2(swh_storage, deposit_client, requests_mock_datadir): | def test_deposit_loading_ok_2(swh_storage, deposit_client, requests_mock_datadir): | ||||
"""Field dates should be se appropriately | """Field dates should be se appropriately | ||||
""" | """ | ||||
external_id = "some-external-id" | external_id = "some-external-id" | ||||
url = f"https://hal-test.archives-ouvertes.fr/{external_id}" | url = f"https://hal-test.archives-ouvertes.fr/{external_id}" | ||||
deposit_id = 777 | deposit_id = 777 | ||||
loader = DepositLoader( | loader = DepositLoader( | ||||
swh_storage, url, deposit_id, deposit_client, default_filename="archive.zip" | swh_storage, url, deposit_id, deposit_client, default_filename="archive.zip" | ||||
) | ) | ||||
actual_load_status = loader.load() | actual_load_status = loader.load() | ||||
expected_snapshot_id = "3e68440fdd7c81d283f8f3aebb6f0c8657864192" | expected_snapshot_id = "f87b25c121d9ab3ff0219b04b92d83f8c6f368f4" | ||||
assert actual_load_status == { | assert actual_load_status == { | ||||
"status": "eventful", | "status": "eventful", | ||||
"snapshot_id": expected_snapshot_id, | "snapshot_id": expected_snapshot_id, | ||||
} | } | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loader.storage, | loader.storage, | ||||
url, | url, | ||||
status="full", | status="full", | ||||
type="deposit", | type="deposit", | ||||
snapshot=hash_to_bytes(expected_snapshot_id), | snapshot=hash_to_bytes(expected_snapshot_id), | ||||
) | ) | ||||
revision_id = "564d18943d71be80d0d73b43a77cfb205bcde96c" | release_id = "c6891941d4033f4fb1dbf39b501c819ac618f957" | ||||
expected_snapshot = Snapshot( | expected_snapshot = Snapshot( | ||||
id=hash_to_bytes(expected_snapshot_id), | id=hash_to_bytes(expected_snapshot_id), | ||||
branches={ | branches={ | ||||
b"HEAD": SnapshotBranch( | b"HEAD": SnapshotBranch( | ||||
target=hash_to_bytes(revision_id), target_type=TargetType.REVISION | target=hash_to_bytes(release_id), target_type=TargetType.RELEASE | ||||
) | ) | ||||
}, | }, | ||||
) | ) | ||||
check_snapshot(expected_snapshot, storage=loader.storage) | check_snapshot(expected_snapshot, storage=loader.storage) | ||||
raw_meta = loader.client.metadata_get(deposit_id) | raw_meta = loader.client.metadata_get(deposit_id) | ||||
# Ensure the date fields are set appropriately in the revision | # Ensure the date fields are set appropriately in the release | ||||
# Retrieve the revision | # Retrieve the release | ||||
revision = loader.storage.revision_get([hash_to_bytes(revision_id)])[0] | release = loader.storage.release_get([hash_to_bytes(release_id)])[0] | ||||
assert revision | assert release | ||||
assert revision.date.to_dict() == raw_meta["deposit"]["author_date"] | assert release.date.to_dict() == raw_meta["deposit"]["author_date"] | ||||
assert revision.committer_date.to_dict() == raw_meta["deposit"]["committer_date"] | assert not release.metadata | ||||
assert not revision.metadata | |||||
provider = { | provider = { | ||||
"provider_name": "hal", | "provider_name": "hal", | ||||
"provider_type": "deposit_client", | "provider_type": "deposit_client", | ||||
"provider_url": "https://hal-test.archives-ouvertes.fr/", | "provider_url": "https://hal-test.archives-ouvertes.fr/", | ||||
"metadata": None, | "metadata": None, | ||||
} | } | ||||
tool = { | tool = { | ||||
▲ Show 20 Lines • Show All 47 Lines • ▼ Show 20 Lines | expected_metadata.append( | ||||
format="original-artifacts-json", | format="original-artifacts-json", | ||||
authority=authority, | authority=authority, | ||||
fetcher=fetcher, | fetcher=fetcher, | ||||
) | ) | ||||
) | ) | ||||
assert sorted(origin_extrinsic_metadata.results) == sorted(expected_metadata) | assert sorted(origin_extrinsic_metadata.results) == sorted(expected_metadata) | ||||
# Check the revision metadata swh side | # Check the release metadata swh side | ||||
assert release.target_type == ModelObjectType.DIRECTORY | |||||
directory_swhid = ExtendedSWHID( | directory_swhid = ExtendedSWHID( | ||||
object_type=ExtendedObjectType.DIRECTORY, object_id=revision.directory | object_type=ExtendedObjectType.DIRECTORY, object_id=release.target | ||||
) | ) | ||||
actual_directory_metadata = loader.storage.raw_extrinsic_metadata_get( | actual_directory_metadata = loader.storage.raw_extrinsic_metadata_get( | ||||
directory_swhid, authority | directory_swhid, authority | ||||
) | ) | ||||
assert actual_directory_metadata.next_page_token is None | assert actual_directory_metadata.next_page_token is None | ||||
assert len(actual_directory_metadata.results) == len(all_metadata_raw) | assert len(actual_directory_metadata.results) == len(all_metadata_raw) | ||||
revision_swhid = CoreSWHID( | release_swhid = CoreSWHID( | ||||
object_type=ObjectType.REVISION, object_id=hash_to_bytes(revision_id) | object_type=ObjectType.RELEASE, object_id=hash_to_bytes(release_id) | ||||
) | ) | ||||
dir_metadata_template = RawExtrinsicMetadata( | dir_metadata_template = RawExtrinsicMetadata( | ||||
target=directory_swhid, | target=directory_swhid, | ||||
format="sword-v2-atom-codemeta-v2", | format="sword-v2-atom-codemeta-v2", | ||||
authority=authority, | authority=authority, | ||||
fetcher=fetcher, | fetcher=fetcher, | ||||
origin=url, | origin=url, | ||||
revision=revision_swhid, | release=release_swhid, | ||||
# to satisfy the constructor | # to satisfy the constructor | ||||
discovery_date=now(), | discovery_date=now(), | ||||
metadata=b"", | metadata=b"", | ||||
) | ) | ||||
expected_directory_metadata = [] | expected_directory_metadata = [] | ||||
for idx, raw_meta in enumerate(all_metadata_raw): | for idx, raw_meta in enumerate(all_metadata_raw): | ||||
dir_metadata = actual_directory_metadata.results[idx] | dir_metadata = actual_directory_metadata.results[idx] | ||||
Show All 23 Lines | def test_deposit_loading_ok_2(swh_storage, deposit_client, requests_mock_datadir): | ||||
] | ] | ||||
assert len(urls) == 1 | assert len(urls) == 1 | ||||
update_query = urls[0] | update_query = urls[0] | ||||
body = update_query.json() | body = update_query.json() | ||||
expected_body = { | expected_body = { | ||||
"status": "done", | "status": "done", | ||||
"revision_id": revision_id, | "release_id": release_id, | ||||
"directory_id": hash_to_hex(revision.directory), | "directory_id": hash_to_hex(release.target), | ||||
"snapshot_id": expected_snapshot_id, | "snapshot_id": expected_snapshot_id, | ||||
"origin_url": url, | "origin_url": url, | ||||
} | } | ||||
assert body == expected_body | assert body == expected_body | ||||
def test_deposit_loading_ok_3(swh_storage, deposit_client, requests_mock_datadir): | def test_deposit_loading_ok_3(swh_storage, deposit_client, requests_mock_datadir): | ||||
"""Deposit loading can happen on tarball artifacts as well | """Deposit loading can happen on tarball artifacts as well | ||||
The latest deposit changes introduce the internal change. | The latest deposit changes introduce the internal change. | ||||
""" | """ | ||||
external_id = "hal-123456" | external_id = "hal-123456" | ||||
url = f"https://hal-test.archives-ouvertes.fr/{external_id}" | url = f"https://hal-test.archives-ouvertes.fr/{external_id}" | ||||
deposit_id = 888 | deposit_id = 888 | ||||
loader = DepositLoader(swh_storage, url, deposit_id, deposit_client) | loader = DepositLoader(swh_storage, url, deposit_id, deposit_client) | ||||
actual_load_status = loader.load() | actual_load_status = loader.load() | ||||
expected_snapshot_id = "0ac7b54c042a026389f2087dc16f1d5c644ed0e4" | expected_snapshot_id = "212228fe041c763471c14545cf11dbec8003d6b4" | ||||
assert actual_load_status == { | assert actual_load_status == { | ||||
"status": "eventful", | "status": "eventful", | ||||
"snapshot_id": expected_snapshot_id, | "snapshot_id": expected_snapshot_id, | ||||
} | } | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loader.storage, | loader.storage, | ||||
url, | url, | ||||
status="full", | status="full", | ||||
type="deposit", | type="deposit", | ||||
snapshot=hash_to_bytes(expected_snapshot_id), | snapshot=hash_to_bytes(expected_snapshot_id), | ||||
) | ) |