Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9342514
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
20 KB
Subscribers
None
View Options
diff --git a/swh/loader/package/deposit/tests/test_deposit.py b/swh/loader/package/deposit/tests/test_deposit.py
index bd36c95..23cbd62 100644
--- a/swh/loader/package/deposit/tests/test_deposit.py
+++ b/swh/loader/package/deposit/tests/test_deposit.py
@@ -1,418 +1,408 @@
# Copyright (C) 2019-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import json
import re
import attr
import pytest
from swh.model.hashutil import hash_to_bytes, hash_to_hex
from swh.model.identifiers import SWHID
from swh.model.model import (
Snapshot,
SnapshotBranch,
TargetType,
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
MetadataTargetType,
RawExtrinsicMetadata,
)
from swh.loader.package.deposit.loader import DepositLoader
from swh.loader.package.tests.common import check_metadata_paths
from swh.loader.tests import (
assert_last_visit_matches,
check_snapshot,
get_stats,
)
from swh.core.pytest_plugin import requests_mock_datadir_factory
DEPOSIT_URL = "https://deposit.softwareheritage.org/1/private"
@pytest.fixture
def requests_mock_datadir(requests_mock_datadir):
"""Enhance default mock data to mock put requests as the loader does some
internal update queries there.
"""
requests_mock_datadir.put(re.compile("https"))
return requests_mock_datadir
def test_deposit_init_ok(swh_config, swh_loader_config):
url = "some-url"
deposit_id = 999
loader = DepositLoader(url, deposit_id) # Something that does not exist
assert loader.url == url
assert loader.client is not None
assert loader.client.base_url == swh_loader_config["deposit"]["url"]
def test_deposit_loading_unknown_deposit(swh_config, requests_mock_datadir):
"""Loading an unknown deposit should fail
no origin, no visit, no snapshot
"""
# private api url form: 'https://deposit.s.o/1/private/hal/666/raw/'
url = "some-url"
unknown_deposit_id = 667
loader = DepositLoader(url, unknown_deposit_id) # does not exist
actual_load_status = loader.load()
assert actual_load_status == {"status": "failed"}
stats = get_stats(loader.storage)
assert {
"content": 0,
"directory": 0,
"origin": 0,
"origin_visit": 0,
"person": 0,
"release": 0,
"revision": 0,
"skipped_content": 0,
"snapshot": 0,
} == stats
requests_mock_datadir_missing_one = requests_mock_datadir_factory(
ignore_urls=[f"{DEPOSIT_URL}/666/raw/",]
)
def test_deposit_loading_failure_to_retrieve_1_artifact(
swh_config, requests_mock_datadir_missing_one
):
"""Deposit with missing artifact ends up with an uneventful/partial visit
"""
# private api url form: 'https://deposit.s.o/1/private/hal/666/raw/'
url = "some-url-2"
deposit_id = 666
loader = DepositLoader(url, deposit_id)
actual_load_status = loader.load()
assert actual_load_status["status"] == "uneventful"
assert actual_load_status["snapshot_id"] is not None
assert_last_visit_matches(loader.storage, url, status="partial", type="deposit")
stats = get_stats(loader.storage)
assert {
"content": 0,
"directory": 0,
"origin": 1,
"origin_visit": 1,
"person": 0,
"release": 0,
"revision": 0,
"skipped_content": 0,
"snapshot": 1,
} == stats
def test_revision_metadata_structure(swh_config, requests_mock_datadir):
url = "https://hal-test.archives-ouvertes.fr/some-external-id"
deposit_id = 666
loader = DepositLoader(url, deposit_id)
actual_load_status = loader.load()
assert actual_load_status["status"] == "eventful"
assert actual_load_status["snapshot_id"] is not None
expected_revision_id = hash_to_bytes("637318680351f5d78856d13264faebbd91efe9bb")
revision = list(loader.storage.revision_get([expected_revision_id]))[0]
assert revision is not None
check_metadata_paths(
revision["metadata"],
paths=[
("extrinsic.provider", str),
("extrinsic.when", str),
("extrinsic.raw", dict),
("original_artifact", list),
],
)
# Only 2 top-level keys now
assert set(revision["metadata"].keys()) == {"extrinsic", "original_artifact"}
for original_artifact in revision["metadata"]["original_artifact"]:
check_metadata_paths(
original_artifact,
paths=[("filename", str), ("length", int), ("checksums", dict),],
)
def test_deposit_loading_ok(swh_config, requests_mock_datadir):
url = "https://hal-test.archives-ouvertes.fr/some-external-id"
deposit_id = 666
loader = DepositLoader(url, deposit_id)
actual_load_status = loader.load()
expected_snapshot_id = "b2b327b33dc85818bd23c3ccda8b7e675a66ecbd"
assert actual_load_status == {
"status": "eventful",
"snapshot_id": expected_snapshot_id,
}
assert_last_visit_matches(loader.storage, url, status="full", type="deposit")
stats = get_stats(loader.storage)
assert {
"content": 303,
"directory": 12,
"origin": 1,
"origin_visit": 1,
"person": 1,
"release": 0,
"revision": 1,
"skipped_content": 0,
"snapshot": 1,
} == stats
revision_id_hex = "637318680351f5d78856d13264faebbd91efe9bb"
revision_id = hash_to_bytes(revision_id_hex)
expected_snapshot = Snapshot(
id=hash_to_bytes(expected_snapshot_id),
branches={
b"HEAD": SnapshotBranch(
target=revision_id, target_type=TargetType.REVISION,
),
},
)
check_snapshot(expected_snapshot, storage=loader.storage)
revision = next(loader.storage.revision_get([revision_id]))
assert revision
# check metadata
fetcher = MetadataFetcher(name="swh-deposit", version="0.0.1",)
authority = MetadataAuthority(
type=MetadataAuthorityType.DEPOSIT_CLIENT,
url="https://hal-test.archives-ouvertes.fr/",
)
# Check origin metadata
orig_meta = loader.storage.raw_extrinsic_metadata_get(
MetadataTargetType.ORIGIN, url, authority
)
- assert orig_meta is not None
- assert isinstance(orig_meta, dict)
- assert len(orig_meta["results"]) == 1
- assert orig_meta["next_page_token"] is None
- orig_meta0 = orig_meta["results"][0]
+ assert orig_meta.next_page_token is None
+ assert len(orig_meta.results) == 1
+ orig_meta0 = orig_meta.results[0]
assert orig_meta0.authority == authority
assert orig_meta0.fetcher == fetcher
# Check revision metadata
revision_swhid = SWHID(object_type="revision", object_id=revision_id)
rev_meta = loader.storage.raw_extrinsic_metadata_get(
MetadataTargetType.REVISION, revision_swhid, authority
)
- assert rev_meta is not None
- assert isinstance(rev_meta, dict)
- assert len(rev_meta["results"]) == 1
- assert rev_meta["next_page_token"] is None
- rev_meta0 = rev_meta["results"][0]
+ assert rev_meta.next_page_token is None
+ assert len(rev_meta.results) == 1
+ rev_meta0 = rev_meta.results[0]
assert rev_meta0.authority == authority
assert rev_meta0.fetcher == fetcher
# Retrieve the information for deposit status update query to the deposit
urls = [
m
for m in requests_mock_datadir.request_history
if m.url == f"{DEPOSIT_URL}/{deposit_id}/update/"
]
assert len(urls) == 1
update_query = urls[0]
body = update_query.json()
expected_body = {
"status": "done",
"revision_id": revision_id_hex,
"directory_id": hash_to_hex(revision["directory"]),
"snapshot_id": expected_snapshot_id,
"origin_url": url,
}
assert body == expected_body
def test_deposit_loading_ok_2(swh_config, requests_mock_datadir):
"""Field dates should be se appropriately
"""
external_id = "some-external-id"
url = f"https://hal-test.archives-ouvertes.fr/{external_id}"
deposit_id = 777
loader = DepositLoader(url, deposit_id)
actual_load_status = loader.load()
expected_snapshot_id = "3e68440fdd7c81d283f8f3aebb6f0c8657864192"
assert actual_load_status == {
"status": "eventful",
"snapshot_id": expected_snapshot_id,
}
assert_last_visit_matches(loader.storage, url, status="full", type="deposit")
revision_id = "564d18943d71be80d0d73b43a77cfb205bcde96c"
expected_snapshot = Snapshot(
id=hash_to_bytes(expected_snapshot_id),
branches={
b"HEAD": SnapshotBranch(
target=hash_to_bytes(revision_id), target_type=TargetType.REVISION
)
},
)
check_snapshot(expected_snapshot, storage=loader.storage)
raw_meta = loader.client.metadata_get(deposit_id)
# Ensure the date fields are set appropriately in the revision
# Retrieve the revision
revision = next(loader.storage.revision_get([hash_to_bytes(revision_id)]))
assert revision
assert revision["date"] == raw_meta["deposit"]["author_date"]
assert revision["committer_date"] == raw_meta["deposit"]["committer_date"]
read_api = f"{DEPOSIT_URL}/{deposit_id}/meta/"
assert revision["metadata"] == {
"extrinsic": {
"provider": read_api,
"raw": {
"origin": {"type": "deposit", "url": url,},
"origin_metadata": {
"metadata": {
"@xmlns": ["http://www.w3.org/2005/Atom"],
"author": ["some awesome author", "another one", "no one",],
"codemeta:dateCreated": "2017-10-07T15:17:08Z",
"codemeta:datePublished": "2017-10-08T15:00:00Z",
"external_identifier": "some-external-id",
"url": url,
},
"provider": {
"metadata": None,
"provider_name": "hal",
"provider_type": "deposit_client",
"provider_url": "https://hal-test.archives-ouvertes.fr/",
},
"tool": {
"configuration": {"sword_version": "2"},
"name": "swh-deposit",
"version": "0.0.1",
},
},
},
"when": revision["metadata"]["extrinsic"]["when"], # dynamic
},
"original_artifact": [
{
"checksums": {
"sha1": "f8c63d7c890a7453498e6cf9fef215d85ec6801d",
"sha256": "474bf646aeeff6d945eb752b1a9f8a40f3d81a88909ee7bd2d08cc822aa361e6", # noqa
},
"filename": "archive.zip",
"length": 956830,
}
],
}
fetcher = MetadataFetcher(name="swh-deposit", version="0.0.1",)
authority = MetadataAuthority(
type=MetadataAuthorityType.DEPOSIT_CLIENT,
url="https://hal-test.archives-ouvertes.fr/",
)
# Check the origin metadata swh side
orig_meta = loader.storage.raw_extrinsic_metadata_get(
MetadataTargetType.ORIGIN, url, authority
)
- assert orig_meta is not None
- assert isinstance(orig_meta, dict)
- assert len(orig_meta["results"]) == 1
- assert orig_meta["next_page_token"] is None
+ assert orig_meta.next_page_token is None
+ assert len(orig_meta.results) == 1
- assert len(orig_meta["results"]) == 1
-
- orig_meta0 = orig_meta["results"][0]
+ orig_meta0 = orig_meta.results[0]
expected_metadata = RawExtrinsicMetadata(
type=MetadataTargetType.ORIGIN,
id=url,
discovery_date=orig_meta0.discovery_date,
metadata=json.dumps(
{
"@xmlns": ["http://www.w3.org/2005/Atom"],
"author": ["some awesome author", "another one", "no one"],
"codemeta:dateCreated": "2017-10-07T15:17:08Z",
"codemeta:datePublished": "2017-10-08T15:00:00Z",
"external_identifier": "some-external-id",
"url": "https://hal-test.archives-ouvertes.fr/some-external-id",
}
).encode(),
format="sword-v2-atom-codemeta-v2-in-json",
authority=authority,
fetcher=fetcher,
)
assert orig_meta0 == expected_metadata
# Check the revision metadata swh side
revision_swhid = SWHID(object_type="revision", object_id=revision_id)
rev_meta = loader.storage.raw_extrinsic_metadata_get(
MetadataTargetType.REVISION, revision_swhid, authority
)
- assert rev_meta is not None
- assert isinstance(rev_meta, dict)
- assert len(rev_meta["results"]) == 1
- assert rev_meta["next_page_token"] is None
- assert len(rev_meta["results"]) == 1
+ assert rev_meta.next_page_token is None
+
+ assert len(rev_meta.results) == 1
- rev_meta0 = rev_meta["results"][0]
+ rev_meta0 = rev_meta.results[0]
assert rev_meta0 == attr.evolve(
expected_metadata,
type=MetadataTargetType.REVISION,
id=revision_swhid,
origin=url,
)
# Retrieve the information for deposit status update query to the deposit
urls = [
m
for m in requests_mock_datadir.request_history
if m.url == f"{DEPOSIT_URL}/{deposit_id}/update/"
]
assert len(urls) == 1
update_query = urls[0]
body = update_query.json()
expected_body = {
"status": "done",
"revision_id": revision_id,
"directory_id": hash_to_hex(revision["directory"]),
"snapshot_id": expected_snapshot_id,
"origin_url": url,
}
assert body == expected_body
diff --git a/swh/loader/package/tests/test_loader_metadata.py b/swh/loader/package/tests/test_loader_metadata.py
index 38c5936..7daf771 100644
--- a/swh/loader/package/tests/test_loader_metadata.py
+++ b/swh/loader/package/tests/test_loader_metadata.py
@@ -1,184 +1,184 @@
# Copyright (C) 2019-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import datetime
from typing import Iterator, List, Optional, Sequence, Tuple
import attr
from swh.loader.package.loader import (
BasePackageInfo,
PackageLoader,
RawExtrinsicMetadataCore,
)
from swh.model.hashutil import hash_to_bytes
from swh.model.identifiers import SWHID
from swh.model.model import (
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
MetadataTargetType,
RawExtrinsicMetadata,
Sha1Git,
)
from swh.storage import get_storage
from swh.loader.package import __version__
EMPTY_SNAPSHOT_ID = "1a8893e6a86f444e8be8e7bda6cb34fb1735a00e"
FULL_SNAPSHOT_ID = "4a9b608c9f01860a627237dd2409d1d50ec4b054"
AUTHORITY = MetadataAuthority(
type=MetadataAuthorityType.FORGE, url="http://example.org/",
)
ORIGIN_URL = "http://example.org/archive.tgz"
REVISION_ID = hash_to_bytes("8ff44f081d43176474b267de5451f2c2e88089d0")
REVISION_SWHID = SWHID(object_type="revision", object_id=REVISION_ID)
FETCHER = MetadataFetcher(
name="swh.loader.package.tests.test_loader_metadata.MetadataTestLoader",
version=__version__,
)
REVISION_METADATA = [
RawExtrinsicMetadata(
type=MetadataTargetType.REVISION,
id=REVISION_SWHID,
discovery_date=datetime.datetime.now(),
authority=AUTHORITY,
fetcher=FETCHER,
format="test-format1",
metadata=b"foo bar",
origin=ORIGIN_URL,
),
RawExtrinsicMetadata(
type=MetadataTargetType.REVISION,
id=REVISION_SWHID,
discovery_date=datetime.datetime.now() + datetime.timedelta(seconds=1),
authority=AUTHORITY,
fetcher=FETCHER,
format="test-format2",
metadata=b"bar baz",
origin=ORIGIN_URL,
),
]
ORIGIN_METADATA = [
RawExtrinsicMetadata(
type=MetadataTargetType.ORIGIN,
id=ORIGIN_URL,
discovery_date=datetime.datetime.now(),
authority=AUTHORITY,
fetcher=FETCHER,
format="test-format3",
metadata=b"baz qux",
),
]
class MetadataTestLoader(PackageLoader[BasePackageInfo]):
def get_versions(self) -> Sequence[str]:
return ["v1.0.0"]
def _load_revision(self, p_info: BasePackageInfo, origin) -> Optional[Sha1Git]:
return REVISION_ID
def get_metadata_authority(self):
return attr.evolve(AUTHORITY, metadata={})
def get_package_info(self, version: str) -> Iterator[Tuple[str, BasePackageInfo]]:
m0 = REVISION_METADATA[0]
m1 = REVISION_METADATA[1]
p_info = BasePackageInfo(
url=ORIGIN_URL,
filename="archive.tgz",
revision_extrinsic_metadata=[
RawExtrinsicMetadataCore(m0.format, m0.metadata, m0.discovery_date),
RawExtrinsicMetadataCore(m1.format, m1.metadata, m1.discovery_date),
],
)
yield (version, p_info)
def get_extrinsic_origin_metadata(self) -> List[RawExtrinsicMetadataCore]:
m = ORIGIN_METADATA[0]
return [RawExtrinsicMetadataCore(m.format, m.metadata, m.discovery_date)]
def test_load_metadata(swh_config, caplog):
storage = get_storage("memory")
loader = MetadataTestLoader(ORIGIN_URL)
loader.storage = storage
load_status = loader.load()
assert load_status == {
"status": "eventful",
"snapshot_id": FULL_SNAPSHOT_ID,
}
result = storage.raw_extrinsic_metadata_get(
MetadataTargetType.REVISION, REVISION_SWHID, AUTHORITY,
)
- assert result["next_page_token"] is None
- assert result["results"] == REVISION_METADATA
+ assert result.next_page_token is None
+ assert result.results == REVISION_METADATA
result = storage.raw_extrinsic_metadata_get(
MetadataTargetType.ORIGIN, ORIGIN_URL, AUTHORITY,
)
- assert result["next_page_token"] is None
- assert result["results"] == ORIGIN_METADATA
+ assert result.next_page_token is None
+ assert result.results == ORIGIN_METADATA
assert caplog.text == ""
def test_existing_authority(swh_config, caplog):
storage = get_storage("memory")
loader = MetadataTestLoader(ORIGIN_URL)
loader.storage = storage
loader.config["create_authorities"] = False
storage.metadata_authority_add([attr.evolve(AUTHORITY, metadata={})])
load_status = loader.load()
assert load_status == {
"status": "eventful",
"snapshot_id": FULL_SNAPSHOT_ID,
}
result = storage.raw_extrinsic_metadata_get(
MetadataTargetType.REVISION, REVISION_SWHID, AUTHORITY,
)
- assert result["next_page_token"] is None
- assert result["results"] == REVISION_METADATA
+ assert result.next_page_token is None
+ assert result.results == REVISION_METADATA
assert caplog.text == ""
def test_existing_fetcher(swh_config, caplog):
storage = get_storage("memory")
loader = MetadataTestLoader(ORIGIN_URL)
loader.storage = storage
loader.config["create_fetchers"] = False
storage.metadata_fetcher_add([attr.evolve(FETCHER, metadata={})])
load_status = loader.load()
assert load_status == {
"status": "eventful",
"snapshot_id": FULL_SNAPSHOT_ID,
}
result = storage.raw_extrinsic_metadata_get(
MetadataTargetType.REVISION, REVISION_SWHID, AUTHORITY,
)
- assert result["next_page_token"] is None
- assert result["results"] == REVISION_METADATA
+ assert result.next_page_token is None
+ assert result.results == REVISION_METADATA
assert caplog.text == ""
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Fri, Jul 4, 12:47 PM (2 w, 2 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3261280
Attached To
rDLDBASE Generic VCS/Package Loader
Event Timeline
Log In to Comment