Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/deposit/tests/test_deposit.py
# Copyright (C) 2019-2020 The Software Heritage developers | # Copyright (C) 2019-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import json | import json | ||||
import re | import re | ||||
from typing import List | |||||
import attr | import attr | ||||
import pytest | import pytest | ||||
from swh.core.pytest_plugin import requests_mock_datadir_factory | from swh.core.pytest_plugin import requests_mock_datadir_factory | ||||
from swh.loader.package.deposit.loader import DepositLoader | from swh.loader.package.deposit.loader import DepositLoader | ||||
from swh.loader.package.loader import now | |||||
from swh.loader.package.tests.common import check_metadata_paths | from swh.loader.package.tests.common import check_metadata_paths | ||||
from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats | from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats | ||||
from swh.model.hashutil import hash_to_bytes, hash_to_hex | from swh.model.hashutil import hash_to_bytes, hash_to_hex | ||||
from swh.model.identifiers import SWHID | from swh.model.identifiers import SWHID | ||||
from swh.model.model import ( | from swh.model.model import ( | ||||
MetadataAuthority, | MetadataAuthority, | ||||
MetadataAuthorityType, | MetadataAuthorityType, | ||||
MetadataFetcher, | MetadataFetcher, | ||||
▲ Show 20 Lines • Show All 172 Lines • ▼ Show 20 Lines | authority = MetadataAuthority( | ||||
url="https://hal-test.archives-ouvertes.fr/", | url="https://hal-test.archives-ouvertes.fr/", | ||||
) | ) | ||||
# Check origin metadata | # Check origin metadata | ||||
orig_meta = loader.storage.raw_extrinsic_metadata_get( | orig_meta = loader.storage.raw_extrinsic_metadata_get( | ||||
MetadataTargetType.ORIGIN, url, authority | MetadataTargetType.ORIGIN, url, authority | ||||
) | ) | ||||
assert orig_meta.next_page_token is None | assert orig_meta.next_page_token is None | ||||
assert len(orig_meta.results) == 1 | raw_meta = loader.client.metadata_get(deposit_id) | ||||
all_raw_metadata: List[str] = raw_meta["raw_metadata"] | |||||
# 2 raw metadata xml + 1 json dict | |||||
assert len(orig_meta.results) == len(all_raw_metadata) + 1 | |||||
orig_meta0 = orig_meta.results[0] | orig_meta0 = orig_meta.results[0] | ||||
assert orig_meta0.authority == authority | assert orig_meta0.authority == authority | ||||
assert orig_meta0.fetcher == fetcher | assert orig_meta0.fetcher == fetcher | ||||
# Check revision metadata | # Check revision metadata | ||||
revision_swhid = SWHID(object_type="revision", object_id=revision_id) | revision_swhid = SWHID(object_type="revision", object_id=revision_id) | ||||
rev_meta = loader.storage.raw_extrinsic_metadata_get( | actual_rev_meta = loader.storage.raw_extrinsic_metadata_get( | ||||
MetadataTargetType.REVISION, revision_swhid, authority | MetadataTargetType.REVISION, revision_swhid, authority | ||||
) | ) | ||||
assert rev_meta.next_page_token is None | assert actual_rev_meta.next_page_token is None | ||||
assert len(rev_meta.results) == 1 | assert len(actual_rev_meta.results) == len(all_raw_metadata) | ||||
rev_meta0 = rev_meta.results[0] | for rev_meta in actual_rev_meta.results: | ||||
assert rev_meta0.authority == authority | assert rev_meta.authority == authority | ||||
assert rev_meta0.fetcher == fetcher | assert rev_meta.fetcher == fetcher | ||||
assert rev_meta.metadata.decode() in all_raw_metadata | |||||
# Retrieve the information for deposit status update query to the deposit | # Retrieve the information for deposit status update query to the deposit | ||||
urls = [ | urls = [ | ||||
m | m | ||||
for m in requests_mock_datadir.request_history | for m in requests_mock_datadir.request_history | ||||
if m.url == f"{DEPOSIT_URL}/{deposit_id}/update/" | if m.url == f"{DEPOSIT_URL}/{deposit_id}/update/" | ||||
] | ] | ||||
▲ Show 20 Lines • Show All 48 Lines • ▼ Show 20 Lines | def test_deposit_loading_ok_2(swh_config, requests_mock_datadir): | ||||
# Retrieve the revision | # Retrieve the revision | ||||
revision = loader.storage.revision_get([hash_to_bytes(revision_id)])[0] | revision = loader.storage.revision_get([hash_to_bytes(revision_id)])[0] | ||||
assert revision | assert revision | ||||
assert revision.date.to_dict() == raw_meta["deposit"]["author_date"] | assert revision.date.to_dict() == raw_meta["deposit"]["author_date"] | ||||
assert revision.committer_date.to_dict() == raw_meta["deposit"]["committer_date"] | assert revision.committer_date.to_dict() == raw_meta["deposit"]["committer_date"] | ||||
read_api = f"{DEPOSIT_URL}/{deposit_id}/meta/" | read_api = f"{DEPOSIT_URL}/{deposit_id}/meta/" | ||||
provider = { | |||||
"provider_name": "hal", | |||||
"provider_type": "deposit_client", | |||||
"provider_url": "https://hal-test.archives-ouvertes.fr/", | |||||
"metadata": None, | |||||
} | |||||
tool = { | |||||
"name": "swh-deposit", | |||||
"version": "0.0.1", | |||||
"configuration": {"sword_version": "2"}, | |||||
} | |||||
assert revision.metadata == { | assert revision.metadata == { | ||||
"extrinsic": { | "extrinsic": { | ||||
"provider": read_api, | "provider": read_api, | ||||
"raw": { | "raw": { | ||||
"origin": {"type": "deposit", "url": url,}, | "origin": {"type": "deposit", "url": url,}, | ||||
"origin_metadata": { | "origin_metadata": { | ||||
"metadata": { | "metadata": raw_meta["raw_metadata"], | ||||
vlorentz: isn't this the original XML? | |||||
ardumontAuthorUnsubmitted Done Inline Actionsardumont: yes, it is.
It's prior to your requesting change to continue sending the json one.
I've… | |||||
ardumontAuthorUnsubmitted Done Inline Actionsadapted back to keep the old json format now. ardumont: adapted back to keep the old json format now. | |||||
"@xmlns": ["http://www.w3.org/2005/Atom"], | "provider": provider, | ||||
"author": ["some awesome author", "another one", "no one",], | "tool": tool, | ||||
ardumontAuthorUnsubmitted Done Inline Actionscf. my question in the diff, do we still want to write metadata directly in a Revision model object? ardumont: cf. my question in the diff, do we still want to write metadata directly in a Revision model… | |||||
"codemeta:dateCreated": "2017-10-07T15:17:08Z", | |||||
"codemeta:datePublished": "2017-10-08T15:00:00Z", | |||||
"external_identifier": "some-external-id", | |||||
"url": url, | |||||
}, | |||||
"provider": { | |||||
"metadata": None, | |||||
"provider_name": "hal", | |||||
"provider_type": "deposit_client", | |||||
"provider_url": "https://hal-test.archives-ouvertes.fr/", | |||||
}, | |||||
"tool": { | |||||
"configuration": {"sword_version": "2"}, | |||||
"name": "swh-deposit", | |||||
"version": "0.0.1", | |||||
}, | |||||
}, | }, | ||||
}, | }, | ||||
"when": revision.metadata["extrinsic"]["when"], # dynamic | "when": revision.metadata["extrinsic"]["when"], # dynamic | ||||
}, | }, | ||||
"original_artifact": [ | "original_artifact": [ | ||||
{ | { | ||||
"checksums": { | "checksums": { | ||||
"sha1": "f8c63d7c890a7453498e6cf9fef215d85ec6801d", | "sha1": "f8c63d7c890a7453498e6cf9fef215d85ec6801d", | ||||
Show All 9 Lines | def test_deposit_loading_ok_2(swh_config, requests_mock_datadir): | ||||
fetcher = MetadataFetcher(name="swh-deposit", version="0.0.1",) | fetcher = MetadataFetcher(name="swh-deposit", version="0.0.1",) | ||||
authority = MetadataAuthority( | authority = MetadataAuthority( | ||||
type=MetadataAuthorityType.DEPOSIT_CLIENT, | type=MetadataAuthorityType.DEPOSIT_CLIENT, | ||||
url="https://hal-test.archives-ouvertes.fr/", | url="https://hal-test.archives-ouvertes.fr/", | ||||
) | ) | ||||
# Check the origin metadata swh side | # Check the origin metadata swh side | ||||
orig_meta = loader.storage.raw_extrinsic_metadata_get( | origin_extrinsic_metadata = loader.storage.raw_extrinsic_metadata_get( | ||||
MetadataTargetType.ORIGIN, url, authority | MetadataTargetType.ORIGIN, url, authority | ||||
) | ) | ||||
assert orig_meta.next_page_token is None | assert origin_extrinsic_metadata.next_page_token is None | ||||
assert len(orig_meta.results) == 1 | all_raw_metadata: List[str] = raw_meta["raw_metadata"] | ||||
# 1 raw metadata xml + 1 json dict | |||||
orig_meta0 = orig_meta.results[0] | assert len(origin_extrinsic_metadata.results) == len(all_raw_metadata) + 1 | ||||
expected_metadata = RawExtrinsicMetadata( | expected_metadata = [] | ||||
for idx, raw_meta in enumerate(all_raw_metadata): | |||||
origin_meta = origin_extrinsic_metadata.results[idx] | |||||
expected_metadata.append( | |||||
RawExtrinsicMetadata( | |||||
type=MetadataTargetType.ORIGIN, | type=MetadataTargetType.ORIGIN, | ||||
id=url, | id=url, | ||||
discovery_date=orig_meta0.discovery_date, | discovery_date=origin_meta.discovery_date, | ||||
metadata=json.dumps( | metadata=raw_meta.encode(), | ||||
{ | format="sword-v2-atom-codemeta-v2", | ||||
"@xmlns": ["http://www.w3.org/2005/Atom"], | authority=authority, | ||||
"author": ["some awesome author", "another one", "no one"], | fetcher=fetcher, | ||||
"codemeta:dateCreated": "2017-10-07T15:17:08Z", | ) | ||||
"codemeta:datePublished": "2017-10-08T15:00:00Z", | ) | ||||
"external_identifier": "some-external-id", | |||||
"url": "https://hal-test.archives-ouvertes.fr/some-external-id", | origin_metadata = { | ||||
"metadata": all_raw_metadata, | |||||
"provider": provider, | |||||
"tool": tool, | |||||
} | } | ||||
).encode(), | expected_metadata.append( | ||||
format="sword-v2-atom-codemeta-v2-in-json", | RawExtrinsicMetadata( | ||||
type=MetadataTargetType.ORIGIN, | |||||
id=url, | |||||
discovery_date=origin_extrinsic_metadata.results[-1].discovery_date, | |||||
metadata=json.dumps(origin_metadata).encode(), | |||||
format="original-artifacts-json", | |||||
authority=authority, | authority=authority, | ||||
fetcher=fetcher, | fetcher=fetcher, | ||||
) | ) | ||||
) | |||||
assert orig_meta0 == expected_metadata | assert len(origin_extrinsic_metadata.results) == len(expected_metadata) | ||||
for orig_meta in origin_extrinsic_metadata.results: | |||||
assert orig_meta in expected_metadata | |||||
# Check the revision metadata swh side | # Check the revision metadata swh side | ||||
revision_swhid = SWHID(object_type="revision", object_id=revision_id) | revision_swhid = SWHID(object_type="revision", object_id=revision_id) | ||||
rev_meta = loader.storage.raw_extrinsic_metadata_get( | actual_revision_metadata = loader.storage.raw_extrinsic_metadata_get( | ||||
MetadataTargetType.REVISION, revision_swhid, authority | MetadataTargetType.REVISION, revision_swhid, authority | ||||
) | ) | ||||
assert rev_meta.next_page_token is None | assert actual_revision_metadata.next_page_token is None | ||||
assert len(actual_revision_metadata.results) == len(all_raw_metadata) | |||||
assert len(rev_meta.results) == 1 | |||||
rev_meta0 = rev_meta.results[0] | rev_metadata_template = RawExtrinsicMetadata( | ||||
assert rev_meta0 == attr.evolve( | |||||
expected_metadata, | |||||
type=MetadataTargetType.REVISION, | type=MetadataTargetType.REVISION, | ||||
id=revision_swhid, | id=revision_swhid, | ||||
format="sword-v2-atom-codemeta-v2", | |||||
authority=authority, | |||||
fetcher=fetcher, | |||||
origin=url, | origin=url, | ||||
# to satisfy the constructor | |||||
discovery_date=now(), | |||||
metadata=b"", | |||||
) | |||||
expected_revision_metadata = [] | |||||
for idx, raw_meta in enumerate(all_raw_metadata): | |||||
rev_metadata = actual_revision_metadata.results[idx] | |||||
expected_revision_metadata.append( | |||||
attr.evolve( | |||||
rev_metadata_template, | |||||
discovery_date=rev_metadata.discovery_date, | |||||
metadata=raw_meta.encode(), | |||||
) | ) | ||||
) | |||||
assert actual_revision_metadata.results == expected_revision_metadata | |||||
# Retrieve the information for deposit status update query to the deposit | # Retrieve the information for deposit status update query to the deposit | ||||
urls = [ | urls = [ | ||||
m | m | ||||
for m in requests_mock_datadir.request_history | for m in requests_mock_datadir.request_history | ||||
if m.url == f"{DEPOSIT_URL}/{deposit_id}/update/" | if m.url == f"{DEPOSIT_URL}/{deposit_id}/update/" | ||||
] | ] | ||||
Show All 13 Lines |
isn't this the original XML?