Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/tests/test_loader.py
# Copyright (C) 2019-2021 The Software Heritage developers | # Copyright (C) 2019-2022 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import datetime | import datetime | ||||
import hashlib | import hashlib | ||||
import logging | import logging | ||||
import string | import string | ||||
from typing import Optional | |||||
from unittest.mock import Mock, call, patch | from unittest.mock import Mock, call, patch | ||||
import attr | import attr | ||||
import pytest | import pytest | ||||
from swh.loader.core.loader import ( | from swh.loader.core.loader import ( | ||||
SENTRY_ORIGIN_URL_TAG_NAME, | SENTRY_ORIGIN_URL_TAG_NAME, | ||||
SENTRY_VISIT_TYPE_TAG_NAME, | SENTRY_VISIT_TYPE_TAG_NAME, | ||||
) | ) | ||||
from swh.loader.package.loader import BasePackageInfo, PackageLoader | from swh.loader.package.loader import BasePackageInfo, PackageLoader | ||||
from swh.loader.package.utils import EMPTY_AUTHOR | |||||
from swh.model.model import ( | from swh.model.model import ( | ||||
Origin, | Origin, | ||||
OriginVisit, | OriginVisit, | ||||
OriginVisitStatus, | OriginVisitStatus, | ||||
Person, | Person, | ||||
Release, | Release, | ||||
Revision, | Revision, | ||||
RevisionType, | RevisionType, | ||||
Sha1Git, | |||||
Snapshot, | Snapshot, | ||||
SnapshotBranch, | SnapshotBranch, | ||||
TargetType, | TargetType, | ||||
TimestampWithTimezone, | TimestampWithTimezone, | ||||
) | ) | ||||
from swh.model.model import ExtID | from swh.model.model import ExtID | ||||
from swh.model.model import ObjectType as ModelObjectType | from swh.model.model import ObjectType as ModelObjectType | ||||
from swh.model.swhids import CoreSWHID, ObjectType | from swh.model.swhids import CoreSWHID, ObjectType | ||||
Show All 16 Lines | class FakeStorage2(FakeStorage): | ||||
def origin_visit_add(self, visits): | def origin_visit_add(self, visits): | ||||
raise ValueError("We refuse to add an origin visit") | raise ValueError("We refuse to add an origin visit") | ||||
class StubPackageInfo(BasePackageInfo): | class StubPackageInfo(BasePackageInfo): | ||||
pass | pass | ||||
ORIGIN_URL = "https://example.org/package/example" | |||||
class StubPackageLoader(PackageLoader[StubPackageInfo]): | class StubPackageLoader(PackageLoader[StubPackageInfo]): | ||||
visit_type = "stub" | visit_type = "stub" | ||||
def get_versions(self): | def get_versions(self): | ||||
return ["v1.0", "v2.0", "v3.0", "v4.0"] | return ["v1.0", "v2.0", "v3.0", "v4.0"] | ||||
def get_package_info(self, version): | def get_package_info(self, version): | ||||
filename = f"example-{version}.tar.gz" | |||||
p_info = StubPackageInfo( | p_info = StubPackageInfo( | ||||
"http://example.org", f"example-{version}.tar", version=version | f"{ORIGIN_URL}/{filename}", | ||||
filename, | |||||
version=version, | |||||
) | ) | ||||
extid_type = "extid-type1" if version in ("v1.0", "v2.0") else "extid-type2" | extid_type = "extid-type1" if version in ("v1.0", "v2.0") else "extid-type2" | ||||
# Versions 1.0 and 2.0 have an extid of a given type, v3.0 has an extid | # Versions 1.0 and 2.0 have an extid of a given type, v3.0 has an extid | ||||
# of a different type | # of a different type | ||||
patch.object( | patch.object( | ||||
p_info, | p_info, | ||||
"extid", | "extid", | ||||
return_value=(extid_type, 0, f"extid-of-{version}".encode()), | return_value=(extid_type, 0, f"extid-of-{version}".encode()), | ||||
autospec=True, | autospec=True, | ||||
).start() | ).start() | ||||
yield (f"branch-{version}", p_info) | yield (f"branch-{version}", p_info) | ||||
def _load_release(self, p_info, origin): | def build_release( | ||||
return None | self, p_info: StubPackageInfo, uncompressed_path: str, directory: Sha1Git | ||||
) -> Optional[Release]: | |||||
msg = ( | |||||
f"Synthetic release for source package {p_info.url} " | |||||
f"version {p_info.version}\n" | |||||
) | |||||
return Release( | |||||
name=p_info.version.encode(), | |||||
message=msg.encode(), | |||||
date=None, | |||||
author=EMPTY_AUTHOR, | |||||
target_type=ModelObjectType.DIRECTORY, | |||||
target=directory, | |||||
synthetic=True, | |||||
) | |||||
def test_loader_origin_visit_success(swh_storage, requests_mock_datadir): | |||||
loader = StubPackageLoader(swh_storage, ORIGIN_URL) | |||||
assert loader.load() == { | |||||
"snapshot_id": "dcb9ecef64af73f2cdac7f5463cb6dece6b1db61", | |||||
"status": "eventful", | |||||
} | |||||
assert set(loader.last_snapshot().branches.keys()) == { | |||||
ardumont: what about? | |||||
Done Inline ActionsBetter indeed, thanks ! anlambert: Better indeed, thanks ! | |||||
f"branch-{version}".encode() for version in loader.get_versions() | |||||
} | |||||
def test_loader_origin_visit_failure(swh_storage): | def test_loader_origin_visit_failure(swh_storage): | ||||
"""Failure to add origin or origin visit should failed immediately""" | """Failure to add origin or origin visit should failed immediately""" | ||||
loader = StubPackageLoader(swh_storage, "some-url") | loader = StubPackageLoader(swh_storage, "some-url") | ||||
loader.storage = FakeStorage() | loader.storage = FakeStorage() | ||||
actual_load_status = loader.load() | actual_load_status = loader.load() | ||||
Show All 19 Lines | rel2 = Release( | ||||
name=b"bbbb", | name=b"bbbb", | ||||
message=b"bbbb", | message=b"bbbb", | ||||
target=target, | target=target, | ||||
target_type=ModelObjectType.DIRECTORY, | target_type=ModelObjectType.DIRECTORY, | ||||
synthetic=False, | synthetic=False, | ||||
) | ) | ||||
storage.release_add([rel1, rel2]) | storage.release_add([rel1, rel2]) | ||||
loader = StubPackageLoader(storage, "http://example.org/") | loader = StubPackageLoader(storage, ORIGIN_URL) | ||||
p_info = Mock(wraps=BasePackageInfo(None, None, None)) # type: ignore | p_info = Mock(wraps=BasePackageInfo(None, None, None)) # type: ignore | ||||
# The PackageInfo does not support extids | # The PackageInfo does not support extids | ||||
p_info.extid.return_value = None | p_info.extid.return_value = None | ||||
known_extids = {("extid-type", 0, b"extid-of-aaaa"): [rel1.swhid()]} | known_extids = {("extid-type", 0, b"extid-of-aaaa"): [rel1.swhid()]} | ||||
whitelist = {b"unused"} | whitelist = {b"unused"} | ||||
assert loader.resolve_object_from_extids(known_extids, p_info, whitelist) is None | assert loader.resolve_object_from_extids(known_extids, p_info, whitelist) is None | ||||
Show All 32 Lines | def test_resolve_object_from_extids_missing_target() -> None: | ||||
rel = Release( | rel = Release( | ||||
name=b"aaaa", | name=b"aaaa", | ||||
message=b"aaaa", | message=b"aaaa", | ||||
target=target, | target=target, | ||||
target_type=ModelObjectType.DIRECTORY, | target_type=ModelObjectType.DIRECTORY, | ||||
synthetic=False, | synthetic=False, | ||||
) | ) | ||||
loader = StubPackageLoader(storage, "http://example.org/") | loader = StubPackageLoader(storage, ORIGIN_URL) | ||||
p_info = Mock(wraps=BasePackageInfo(None, None, None)) # type: ignore | p_info = Mock(wraps=BasePackageInfo(None, None, None)) # type: ignore | ||||
known_extids = {("extid-type", 0, b"extid-of-aaaa"): [rel.swhid()]} | known_extids = {("extid-type", 0, b"extid-of-aaaa"): [rel.swhid()]} | ||||
p_info.extid.return_value = ("extid-type", 0, b"extid-of-aaaa") | p_info.extid.return_value = ("extid-type", 0, b"extid-of-aaaa") | ||||
whitelist = {rel.id} | whitelist = {rel.id} | ||||
# Targeted release is missing from the storage | # Targeted release is missing from the storage | ||||
assert loader.resolve_object_from_extids(known_extids, p_info, whitelist) is None | assert loader.resolve_object_from_extids(known_extids, p_info, whitelist) is None | ||||
storage.release_add([rel]) | storage.release_add([rel]) | ||||
# Targeted release now exists | # Targeted release now exists | ||||
assert ( | assert ( | ||||
loader.resolve_object_from_extids(known_extids, p_info, whitelist) | loader.resolve_object_from_extids(known_extids, p_info, whitelist) | ||||
== rel.swhid() | == rel.swhid() | ||||
) | ) | ||||
def test_load_get_known_extids() -> None: | def test_load_get_known_extids() -> None: | ||||
"""Checks PackageLoader.load() fetches known extids efficiently""" | """Checks PackageLoader.load() fetches known extids efficiently""" | ||||
storage = Mock(wraps=get_storage("memory")) | storage = Mock(wraps=get_storage("memory")) | ||||
loader = StubPackageLoader(storage, "http://example.org") | loader = StubPackageLoader(storage, ORIGIN_URL) | ||||
loader.load() | loader.load() | ||||
# Calls should be grouped by extid type | # Calls should be grouped by extid type | ||||
storage.extid_get_from_extid.assert_has_calls( | storage.extid_get_from_extid.assert_has_calls( | ||||
[ | [ | ||||
call("extid-type1", [b"extid-of-v1.0", b"extid-of-v2.0"], version=0), | call("extid-type1", [b"extid-of-v1.0", b"extid-of-v2.0"], version=0), | ||||
call("extid-type2", [b"extid-of-v3.0", b"extid-of-v4.0"], version=0), | call("extid-type2", [b"extid-of-v3.0", b"extid-of-v4.0"], version=0), | ||||
Show All 16 Lines | rels = [ | ||||
target=dir_swhid.object_id, | target=dir_swhid.object_id, | ||||
target_type=ModelObjectType.DIRECTORY, | target_type=ModelObjectType.DIRECTORY, | ||||
synthetic=True, | synthetic=True, | ||||
) | ) | ||||
for i in (1, 2, 3, 4) | for i in (1, 2, 3, 4) | ||||
] | ] | ||||
storage.release_add(rels[0:3]) | storage.release_add(rels[0:3]) | ||||
origin = "http://example.org" | origin = ORIGIN_URL | ||||
rel1_swhid = rels[0].swhid() | rel1_swhid = rels[0].swhid() | ||||
rel2_swhid = rels[1].swhid() | rel2_swhid = rels[1].swhid() | ||||
rel3_swhid = rels[2].swhid() | rel3_swhid = rels[2].swhid() | ||||
rel4_swhid = rels[3].swhid() | rel4_swhid = rels[3].swhid() | ||||
# Results of a previous load | # Results of a previous load | ||||
storage.extid_add( | storage.extid_add( | ||||
[ | [ | ||||
Show All 13 Lines | last_snapshot = Snapshot( | ||||
target_type=TargetType.RELEASE, target=rel3_swhid.object_id | target_type=TargetType.RELEASE, target=rel3_swhid.object_id | ||||
), | ), | ||||
} | } | ||||
) | ) | ||||
storage.snapshot_add([last_snapshot]) | storage.snapshot_add([last_snapshot]) | ||||
date = datetime.datetime.now(tz=datetime.timezone.utc) | date = datetime.datetime.now(tz=datetime.timezone.utc) | ||||
storage.origin_add([Origin(url=origin)]) | storage.origin_add([Origin(url=origin)]) | ||||
storage.origin_visit_add( | storage.origin_visit_add( | ||||
[OriginVisit(origin="http://example.org", visit=1, date=date, type="tar")] | [OriginVisit(origin=origin, visit=1, date=date, type="tar")] | ||||
) | ) | ||||
storage.origin_visit_status_add( | storage.origin_visit_status_add( | ||||
[ | [ | ||||
OriginVisitStatus( | OriginVisitStatus( | ||||
origin=origin, | origin=origin, | ||||
visit=1, | visit=1, | ||||
status="full", | status="full", | ||||
date=date, | date=date, | ||||
snapshot=last_snapshot.id, | snapshot=last_snapshot.id, | ||||
) | ) | ||||
] | ] | ||||
) | ) | ||||
loader = StubPackageLoader(storage, "http://example.org") | loader = StubPackageLoader(storage, origin) | ||||
patch.object( | patch.object( | ||||
loader, | loader, | ||||
"_load_release", | "_load_release", | ||||
return_value=(rel4_swhid.object_id, dir_swhid.object_id), | return_value=(rel4_swhid.object_id, dir_swhid.object_id), | ||||
autospec=True, | autospec=True, | ||||
).start() | ).start() | ||||
loader.load() | loader.load() | ||||
assert loader._load_release.mock_calls == [ # type: ignore | assert loader._load_release.mock_calls == [ # type: ignore | ||||
# v1.0: not loaded because there is already its (extid_type, extid, rel) | # v1.0: not loaded because there is already its (extid_type, extid, rel) | ||||
# in the storage. | # in the storage. | ||||
# v2.0: loaded, because there is already a similar extid, but different type | # v2.0: loaded, because there is already a similar extid, but different type | ||||
call( | call( | ||||
StubPackageInfo(origin, "example-v2.0.tar", "v2.0"), | StubPackageInfo( | ||||
f"{origin}/example-v2.0.tar.gz", "example-v2.0.tar.gz", "v2.0" | |||||
), | |||||
Origin(url=origin), | Origin(url=origin), | ||||
), | ), | ||||
# v3.0: loaded despite having an (extid_type, extid) in storage, because | # v3.0: loaded despite having an (extid_type, extid) in storage, because | ||||
# the target of the extid is not in the previous snapshot | # the target of the extid is not in the previous snapshot | ||||
call( | call( | ||||
StubPackageInfo(origin, "example-v3.0.tar", "v3.0"), | StubPackageInfo( | ||||
f"{origin}/example-v3.0.tar.gz", "example-v3.0.tar.gz", "v3.0" | |||||
), | |||||
Origin(url=origin), | Origin(url=origin), | ||||
), | ), | ||||
# v4.0: loaded, because there isn't its extid | # v4.0: loaded, because there isn't its extid | ||||
call( | call( | ||||
StubPackageInfo(origin, "example-v4.0.tar", "v4.0"), | StubPackageInfo( | ||||
f"{origin}/example-v4.0.tar.gz", "example-v4.0.tar.gz", "v4.0" | |||||
), | |||||
Origin(url=origin), | Origin(url=origin), | ||||
), | ), | ||||
] | ] | ||||
# then check the snapshot has all the branches. | # then check the snapshot has all the branches. | ||||
# versions 2.0 to 4.0 all point to rel4_swhid (instead of the value of the last | # versions 2.0 to 4.0 all point to rel4_swhid (instead of the value of the last | ||||
# snapshot), because they had to be loaded (mismatched extid), and the mocked | # snapshot), because they had to be loaded (mismatched extid), and the mocked | ||||
# _load_release always returns rel4_swhid. | # _load_release always returns rel4_swhid. | ||||
▲ Show 20 Lines • Show All 42 Lines • ▼ Show 20 Lines | def test_load_upgrade_from_revision_extids(caplog): | ||||
and add them to the storage. | and add them to the storage. | ||||
Also checks that, if an extid exists pointing to a non-existent revision | Also checks that, if an extid exists pointing to a non-existent revision | ||||
(which should never happen, but you never know...), the release is loaded from | (which should never happen, but you never know...), the release is loaded from | ||||
scratch.""" | scratch.""" | ||||
storage = get_storage("memory") | storage = get_storage("memory") | ||||
origin = "http://example.org" | origin = ORIGIN_URL | ||||
dir1_swhid = CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=b"d" * 20) | dir1_swhid = CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=b"d" * 20) | ||||
dir2_swhid = CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=b"e" * 20) | dir2_swhid = CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=b"e" * 20) | ||||
date = TimestampWithTimezone.from_datetime( | date = TimestampWithTimezone.from_datetime( | ||||
datetime.datetime.now(tz=datetime.timezone.utc) | datetime.datetime.now(tz=datetime.timezone.utc) | ||||
) | ) | ||||
person = Person.from_fullname(b"Jane Doe <jdoe@example.org>") | person = Person.from_fullname(b"Jane Doe <jdoe@example.org>") | ||||
Show All 40 Lines | last_snapshot = Snapshot( | ||||
target_type=TargetType.REVISION, target=rev2_swhid.object_id | target_type=TargetType.REVISION, target=rev2_swhid.object_id | ||||
), | ), | ||||
} | } | ||||
) | ) | ||||
storage.snapshot_add([last_snapshot]) | storage.snapshot_add([last_snapshot]) | ||||
date = datetime.datetime.now(tz=datetime.timezone.utc) | date = datetime.datetime.now(tz=datetime.timezone.utc) | ||||
storage.origin_add([Origin(url=origin)]) | storage.origin_add([Origin(url=origin)]) | ||||
storage.origin_visit_add( | storage.origin_visit_add( | ||||
[OriginVisit(origin="http://example.org", visit=1, date=date, type="tar")] | [OriginVisit(origin=origin, visit=1, date=date, type="tar")] | ||||
) | ) | ||||
storage.origin_visit_status_add( | storage.origin_visit_status_add( | ||||
[ | [ | ||||
OriginVisitStatus( | OriginVisitStatus( | ||||
origin=origin, | origin=origin, | ||||
visit=1, | visit=1, | ||||
status="full", | status="full", | ||||
date=date, | date=date, | ||||
snapshot=last_snapshot.id, | snapshot=last_snapshot.id, | ||||
) | ) | ||||
] | ] | ||||
) | ) | ||||
loader = StubPackageLoader(storage, "http://example.org") | loader = StubPackageLoader(storage, origin) | ||||
patch.object( | patch.object( | ||||
loader, | loader, | ||||
"_load_release", | "_load_release", | ||||
return_value=(rel2_swhid.object_id, dir2_swhid.object_id), | return_value=(rel2_swhid.object_id, dir2_swhid.object_id), | ||||
autospec=True, | autospec=True, | ||||
).start() | ).start() | ||||
patch.object( | patch.object( | ||||
loader, | loader, | ||||
Show All 10 Lines | def test_load_upgrade_from_revision_extids(caplog): | ||||
(record,) = caplog.records | (record,) = caplog.records | ||||
assert record.levelname == "ERROR" | assert record.levelname == "ERROR" | ||||
assert "Failed to upgrade branch branch-v2.0" in record.message | assert "Failed to upgrade branch branch-v2.0" in record.message | ||||
assert loader._load_release.mock_calls == [ | assert loader._load_release.mock_calls == [ | ||||
# v1.0: not loaded because there is already a revision matching it | # v1.0: not loaded because there is already a revision matching it | ||||
# v2.0: loaded, as the revision is missing from the storage even though there | # v2.0: loaded, as the revision is missing from the storage even though there | ||||
# is an extid | # is an extid | ||||
call(StubPackageInfo(origin, "example-v2.0.tar", "v2.0"), Origin(url=origin)), | call( | ||||
StubPackageInfo( | |||||
f"{origin}/example-v2.0.tar.gz", "example-v2.0.tar.gz", "v2.0" | |||||
), | |||||
Origin(url=origin), | |||||
), | |||||
# v3.0: loaded (did not exist yet) | # v3.0: loaded (did not exist yet) | ||||
call(StubPackageInfo(origin, "example-v3.0.tar", "v3.0"), Origin(url=origin)), | call( | ||||
StubPackageInfo( | |||||
f"{origin}/example-v3.0.tar.gz", "example-v3.0.tar.gz", "v3.0" | |||||
), | |||||
Origin(url=origin), | |||||
), | |||||
] | ] | ||||
snapshot = Snapshot( | snapshot = Snapshot( | ||||
branches={ | branches={ | ||||
b"branch-v1.0": SnapshotBranch( | b"branch-v1.0": SnapshotBranch( | ||||
target_type=TargetType.RELEASE, target=rel1_swhid.object_id | target_type=TargetType.RELEASE, target=rel1_swhid.object_id | ||||
), | ), | ||||
b"branch-v2.0": SnapshotBranch( | b"branch-v2.0": SnapshotBranch( | ||||
▲ Show 20 Lines • Show All 63 Lines • ▼ Show 20 Lines | |||||
class StubPackageLoaderWithError(StubPackageLoader): | class StubPackageLoaderWithError(StubPackageLoader): | ||||
def get_versions(self, *args, **kwargs): | def get_versions(self, *args, **kwargs): | ||||
raise Exception("error") | raise Exception("error") | ||||
def test_loader_sentry_tags_on_error(swh_storage, sentry_events): | def test_loader_sentry_tags_on_error(swh_storage, sentry_events): | ||||
origin_url = "http://example.org/package/name" | origin_url = ORIGIN_URL | ||||
loader = StubPackageLoaderWithError(swh_storage, origin_url) | loader = StubPackageLoaderWithError(swh_storage, origin_url) | ||||
loader.load() | loader.load() | ||||
sentry_tags = sentry_events[0]["tags"] | sentry_tags = sentry_events[0]["tags"] | ||||
assert sentry_tags.get(SENTRY_ORIGIN_URL_TAG_NAME) == origin_url | assert sentry_tags.get(SENTRY_ORIGIN_URL_TAG_NAME) == origin_url | ||||
assert ( | assert ( | ||||
sentry_tags.get(SENTRY_VISIT_TYPE_TAG_NAME) | sentry_tags.get(SENTRY_VISIT_TYPE_TAG_NAME) | ||||
== StubPackageLoaderWithError.visit_type | == StubPackageLoaderWithError.visit_type | ||||
) | ) |
what about?