Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/tests/test_init.py
# Copyright (C) 2020 The Software Heritage developers | # Copyright (C) 2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import datetime | import datetime | ||||
import pytest | import pytest | ||||
import os | import os | ||||
import subprocess | import subprocess | ||||
from swh.model.from_disk import DentryPerms | |||||
from swh.model.model import ( | from swh.model.model import ( | ||||
Content, | |||||
Directory, | |||||
DirectoryEntry, | |||||
ObjectType, | |||||
OriginVisit, | OriginVisit, | ||||
OriginVisitStatus, | OriginVisitStatus, | ||||
Person, | |||||
Release, | |||||
Revision, | |||||
RevisionType, | |||||
Snapshot, | Snapshot, | ||||
SnapshotBranch, | SnapshotBranch, | ||||
TargetType, | TargetType, | ||||
Timestamp, | |||||
TimestampWithTimezone, | |||||
) | ) | ||||
from swh.model.hashutil import hash_to_bytes | from swh.model.hashutil import hash_to_bytes | ||||
from swh.loader.tests import ( | from swh.loader.tests import ( | ||||
assert_last_visit_matches, | assert_last_visit_matches, | ||||
encode_target, | encode_target, | ||||
check_snapshot, | check_snapshot, | ||||
prepare_repository_from_archive, | prepare_repository_from_archive, | ||||
InconsistentAliasBranchError, | |||||
InexistentObjectsError, | |||||
) | ) | ||||
hash_hex = "43e45d56f88993aae6a0198013efa80716fd8920" | hash_hex = "43e45d56f88993aae6a0198013efa80716fd8920" | ||||
ORIGIN_VISIT = OriginVisit( | ORIGIN_VISIT = OriginVisit( | ||||
origin="some-url", | origin="some-url", | ||||
visit=1, | visit=1, | ||||
date=datetime.datetime.now(tz=datetime.timezone.utc), | date=datetime.datetime.now(tz=datetime.timezone.utc), | ||||
type="archive", | type="archive", | ||||
) | ) | ||||
ORIGIN_VISIT_STATUS = OriginVisitStatus( | ORIGIN_VISIT_STATUS = OriginVisitStatus( | ||||
origin="some-url", | origin="some-url", | ||||
visit=1, | visit=1, | ||||
date=datetime.datetime.now(tz=datetime.timezone.utc), | date=datetime.datetime.now(tz=datetime.timezone.utc), | ||||
status="full", | status="full", | ||||
snapshot=hash_to_bytes("d81cc0710eb6cf9efd5b920a8453e1e07157b6cd"), | snapshot=hash_to_bytes("d81cc0710eb6cf9efd5b920a8453e1e07157b6cd"), | ||||
metadata=None, | metadata=None, | ||||
) | ) | ||||
CONTENT = Content( | |||||
data=b"42\n", | |||||
length=3, | |||||
sha1=hash_to_bytes("34973274ccef6ab4dfaaf86599792fa9c3fe4689"), | |||||
sha1_git=hash_to_bytes("d81cc0710eb6cf9efd5b920a8453e1e07157b6cd"), | |||||
sha256=hash_to_bytes( | |||||
"673650f936cb3b0a2f93ce09d81be10748b1b203c19e8176b4eefc1964a0cf3a" | |||||
), | |||||
blake2s256=hash_to_bytes( | |||||
"d5fe1939576527e42cfd76a9455a2432fe7f56669564577dd93c4280e76d661d" | |||||
), | |||||
status="visible", | |||||
) | |||||
ardumont: Still need to convert those into model objects (incoming heh ;) | |||||
DIRECTORY = Directory( | |||||
id=hash_to_bytes("34f335a750111ca0a8b64d8034faec9eedc396be"), | |||||
entries=tuple( | |||||
[ | |||||
DirectoryEntry( | |||||
name=b"foo", | |||||
type="file", | |||||
target=CONTENT.sha1_git, | |||||
perms=DentryPerms.content, | |||||
) | |||||
] | |||||
), | |||||
) | |||||
REVISION = Revision( | |||||
id=hash_to_bytes("066b1b62dbfa033362092af468bf6cfabec230e7"), | |||||
message=b"hello", | |||||
author=Person( | |||||
name=b"Nicolas Dandrimont", | |||||
email=b"nicolas@example.com", | |||||
fullname=b"Nicolas Dandrimont <nicolas@example.com> ", | |||||
), | |||||
date=TimestampWithTimezone( | |||||
timestamp=Timestamp(seconds=1234567890, microseconds=0), | |||||
offset=120, | |||||
negative_utc=False, | |||||
), | |||||
committer=Person( | |||||
name=b"St\xc3fano Zacchiroli", | |||||
email=b"stefano@example.com", | |||||
fullname=b"St\xc3fano Zacchiroli <stefano@example.com>", | |||||
), | |||||
committer_date=TimestampWithTimezone( | |||||
timestamp=Timestamp(seconds=1123456789, microseconds=0), | |||||
offset=0, | |||||
negative_utc=True, | |||||
), | |||||
parents=(), | |||||
type=RevisionType.GIT, | |||||
directory=DIRECTORY.id, | |||||
metadata={ | |||||
"checksums": {"sha1": "tarball-sha1", "sha256": "tarball-sha256",}, | |||||
"signed-off-by": "some-dude", | |||||
}, | |||||
extra_headers=( | |||||
(b"gpgsig", b"test123"), | |||||
(b"mergetag", b"foo\\bar"), | |||||
(b"mergetag", b"\x22\xaf\x89\x80\x01\x00"), | |||||
), | |||||
synthetic=True, | |||||
) | |||||
RELEASE = Release( | |||||
id=hash_to_bytes("3e9050196aa288264f2a9d279d6abab8b158448b"), | |||||
name=b"v0.0.2", | |||||
author=Person( | |||||
name=b"tony", email=b"tony@ardumont.fr", fullname=b"tony <tony@ardumont.fr>", | |||||
), | |||||
date=TimestampWithTimezone( | |||||
timestamp=Timestamp(seconds=1634336813, microseconds=0), | |||||
offset=0, | |||||
negative_utc=False, | |||||
), | |||||
target=REVISION.id, | |||||
target_type=ObjectType.REVISION, | |||||
message=b"yet another synthetic release", | |||||
synthetic=True, | |||||
) | |||||
SNAPSHOT = Snapshot( | |||||
id=hash_to_bytes("2498dbf535f882bc7f9a18fb16c9ad27fda7bab7"), | |||||
branches={ | |||||
b"release/0.1.0": SnapshotBranch( | |||||
target=RELEASE.id, target_type=TargetType.RELEASE, | |||||
Not Done Inline ActionsWhy .from_dict() instead of building them directly? vlorentz: Why `.from_dict()` instead of building them directly? | |||||
Done Inline Actionseager to go to test the function and then i forgot to refactor. Also i wanted initially to reuse the storage_data objects and drop those. Thanks. ardumont: eager to go to test the function and then i forgot to refactor.
Also i wanted initially to… | |||||
), | |||||
b"HEAD": SnapshotBranch(target=REVISION.id, target_type=TargetType.REVISION,), | |||||
b"alias": SnapshotBranch(target=b"HEAD", target_type=TargetType.ALIAS,), | |||||
b"evaluation": SnapshotBranch( # branch dedicated to not exist in storage | |||||
target=hash_to_bytes("cc4e04c26672dd74e5fd0fecb78b435fb55368f7"), | |||||
target_type=TargetType.REVISION, | |||||
), | |||||
}, | |||||
) | |||||
@pytest.fixture | @pytest.fixture | ||||
def mock_storage(mocker): | def mock_storage(mocker): | ||||
mock_storage = mocker.patch("swh.loader.tests.origin_get_latest_visit_status") | mock_storage = mocker.patch("swh.loader.tests.origin_get_latest_visit_status") | ||||
mock_storage.return_value = ORIGIN_VISIT, ORIGIN_VISIT_STATUS | mock_storage.return_value = ORIGIN_VISIT, ORIGIN_VISIT_STATUS | ||||
return mock_storage | return mock_storage | ||||
def test_assert_last_visit_matches_raise(mock_storage, mocker): | def test_assert_last_visit_matches_raise(mock_storage, mocker): | ||||
▲ Show 20 Lines • Show All 134 Lines • ▼ Show 20 Lines | for hash_ in [hash_hex, hash_to_bytes(hash_hex)]: | ||||
actual_encode_target = encode_target(target) | actual_encode_target = encode_target(target) | ||||
assert actual_encode_target == { | assert actual_encode_target == { | ||||
"target_type": "revision", | "target_type": "revision", | ||||
"target": hash_to_bytes(hash_hex), | "target": hash_to_bytes(hash_hex), | ||||
} | } | ||||
def test_check_snapshot(swh_storage): | def test_check_snapshot(swh_storage): | ||||
"""Check snapshot should not raise when everything is fine""" | """Everything should be fine when snapshot is found and the snapshot reference up to the | ||||
snapshot = Snapshot( | revision exist in the storage. | ||||
id=hash_to_bytes("2498dbf535f882bc7f9a18fb16c9ad27fda7bab7"), | |||||
branches={ | |||||
b"master": SnapshotBranch( | |||||
target=hash_to_bytes(hash_hex), target_type=TargetType.REVISION, | |||||
), | |||||
}, | |||||
) | |||||
s = swh_storage.snapshot_add([snapshot]) | """ | ||||
# Create a consistent snapshot arborescence tree in storage | |||||
found = False | |||||
for entry in DIRECTORY.entries: | |||||
if entry.target == CONTENT.sha1_git: | |||||
found = True | |||||
break | |||||
assert found is True | |||||
assert REVISION.directory == DIRECTORY.id | |||||
assert RELEASE.target == REVISION.id | |||||
for branch, target in SNAPSHOT.branches.items(): | |||||
if branch == b"alias": | |||||
assert target.target in SNAPSHOT.branches | |||||
elif branch == b"evaluation": | |||||
# this one does not exist and we are safelisting its check below | |||||
continue | |||||
else: | |||||
assert target.target in [REVISION.id, RELEASE.id] | |||||
swh_storage.content_add([CONTENT.to_dict()]) | |||||
swh_storage.directory_add([DIRECTORY.to_dict()]) | |||||
swh_storage.revision_add([REVISION.to_dict()]) | |||||
swh_storage.release_add([RELEASE.to_dict()]) | |||||
s = swh_storage.snapshot_add([SNAPSHOT.to_dict()]) | |||||
assert s == { | assert s == { | ||||
"snapshot:add": 1, | "snapshot:add": 1, | ||||
} | } | ||||
for snap in [snapshot, snapshot.to_dict()]: | for snap in [SNAPSHOT, SNAPSHOT.to_dict()]: | ||||
check_snapshot(snap, swh_storage) | # all should be fine! | ||||
check_snapshot( | |||||
snap, swh_storage, allowed_empty=[(TargetType.REVISION, b"evaluation")] | |||||
) | |||||
def test_check_snapshot_failures(swh_storage): | |||||
"""Failure scenarios: | |||||
def test_check_snapshot_failure(swh_storage): | 0. snapshot parameter is not a snapshot | ||||
"""check_snapshot should raise if something goes wrong""" | 1. snapshot id is correct but branches mismatched | ||||
2. snapshot id is not correct, it's not found in the storage | |||||
3. snapshot reference an alias which does not exist | |||||
4. snapshot is found in storage, targeted revision does not exist | |||||
5. snapshot is found in storage, targeted release does not exist | |||||
The following are not dealt with yet: | |||||
6. snapshot is found in storage, targeted directory does not exist | |||||
7. snapshot is found in storage, targeted content does not exist | |||||
""" | |||||
snap_id_hex = "2498dbf535f882bc7f9a18fb16c9ad27fda7bab7" | snap_id_hex = "2498dbf535f882bc7f9a18fb16c9ad27fda7bab7" | ||||
snapshot = Snapshot( | snapshot = Snapshot( | ||||
id=hash_to_bytes(snap_id_hex), | id=hash_to_bytes(snap_id_hex), | ||||
branches={ | branches={ | ||||
b"master": SnapshotBranch( | b"master": SnapshotBranch( | ||||
target=hash_to_bytes(hash_hex), target_type=TargetType.REVISION, | target=hash_to_bytes(hash_hex), target_type=TargetType.REVISION, | ||||
), | ), | ||||
}, | }, | ||||
) | ) | ||||
s = swh_storage.snapshot_add([snapshot]) | s = swh_storage.snapshot_add([snapshot]) | ||||
assert s == { | assert s == { | ||||
"snapshot:add": 1, | "snapshot:add": 1, | ||||
} | } | ||||
unexpected_snapshot = { | unexpected_snapshot = { | ||||
"id": "2498dbf535f882bc7f9a18fb16c9ad27fda7bab7", # id is correct | "id": "2498dbf535f882bc7f9a18fb16c9ad27fda7bab7", # id is correct | ||||
"branches": { | "branches": { | ||||
"master": {"target": hash_hex, "target_type": "release",} # wrong branch | "master": {"target": hash_hex, "target_type": "release",} # wrong branch | ||||
}, | }, | ||||
} | } | ||||
# id is correct, the branch is wrong, that should raise nonetheless | # 0. not a Snapshot object, raise! | ||||
with pytest.raises(AssertionError, match="variable 'snapshot' must be a snapshot"): | |||||
check_snapshot(ORIGIN_VISIT, swh_storage) | |||||
# 1. snapshot id is correct but branches mismatched | |||||
for snap_id in [snap_id_hex, snapshot.id]: | for snap_id in [snap_id_hex, snapshot.id]: | ||||
with pytest.raises(AssertionError, match="Differing attributes"): | with pytest.raises(AssertionError, match="Differing attributes"): | ||||
unexpected_snapshot["id"] = snap_id | unexpected_snapshot["id"] = snap_id | ||||
check_snapshot(unexpected_snapshot, swh_storage) | check_snapshot(unexpected_snapshot, swh_storage) | ||||
# snapshot id which does not exist | # 2. snapshot id is not correct, it's not found in the storage | ||||
wrong_snap_id_hex = "999666f535f882bc7f9a18fb16c9ad27fda7bab7" | wrong_snap_id_hex = "999666f535f882bc7f9a18fb16c9ad27fda7bab7" | ||||
for snap_id in [wrong_snap_id_hex, hash_to_bytes(wrong_snap_id_hex)]: | for snap_id in [wrong_snap_id_hex, hash_to_bytes(wrong_snap_id_hex)]: | ||||
unexpected_snapshot["id"] = wrong_snap_id_hex | unexpected_snapshot["id"] = wrong_snap_id_hex | ||||
with pytest.raises(AssertionError, match="is not found"): | with pytest.raises(AssertionError, match="is not found"): | ||||
check_snapshot(unexpected_snapshot, swh_storage) | check_snapshot(unexpected_snapshot, swh_storage) | ||||
# not a Snapshot object, raise! | # 3. snapshot references an inexistent alias | ||||
with pytest.raises(AssertionError, match="variable 'snapshot' must be a snapshot"): | snapshot0 = Snapshot( | ||||
check_snapshot(ORIGIN_VISIT, swh_storage) | id=hash_to_bytes("123666f535f882bc7f9a18fb16c9ad27fda7bab7"), | ||||
branches={ | |||||
b"alias": SnapshotBranch(target=b"HEAD", target_type=TargetType.ALIAS,), | |||||
}, | |||||
) | |||||
swh_storage.snapshot_add([snapshot0]) | |||||
with pytest.raises(InconsistentAliasBranchError, match="Alias branch HEAD"): | |||||
check_snapshot(snapshot0, swh_storage) | |||||
# 4. snapshot is found in storage, targeted revision does not exist | |||||
snapshot1 = Snapshot( | |||||
id=hash_to_bytes("456666f535f882bc7f9a18fb16c9ad27fda7bab7"), | |||||
branches={ | |||||
b"alias": SnapshotBranch(target=b"HEAD", target_type=TargetType.ALIAS,), | |||||
b"HEAD": SnapshotBranch( | |||||
target=REVISION.id, target_type=TargetType.REVISION, | |||||
), | |||||
}, | |||||
) | |||||
swh_storage.snapshot_add([snapshot1]) | |||||
with pytest.raises(InexistentObjectsError, match="Branch/Revision"): | |||||
check_snapshot(snapshot1, swh_storage) | |||||
swh_storage.revision_add([REVISION.to_dict()]) | |||||
snapshot2 = Snapshot( | |||||
id=hash_to_bytes("789666f535f882bc7f9a18fb16c9ad27fda7bab7"), | |||||
branches={ | |||||
b"alias": SnapshotBranch(target=b"HEAD", target_type=TargetType.ALIAS,), | |||||
b"HEAD": SnapshotBranch( | |||||
target=REVISION.id, target_type=TargetType.REVISION, | |||||
), | |||||
b"release/0.1.0": SnapshotBranch( | |||||
target=RELEASE.id, target_type=TargetType.RELEASE, | |||||
), | |||||
}, | |||||
) | |||||
swh_storage.snapshot_add([snapshot2]) | |||||
with pytest.raises(InexistentObjectsError, match="Branch/Release"): | |||||
check_snapshot(snapshot2, swh_storage) | |||||
Done Inline ActionsThat's not needed, drop it. ardumont: That's not needed, drop it. | |||||
Done Inline Actionsdone ardumont: done |
Still need to convert those into model objects (incoming heh ;)