Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/tests/storage_tests.py
# Copyright (C) 2015-2020 The Software Heritage developers | # Copyright (C) 2015-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from collections import defaultdict | from collections import defaultdict | ||||
import datetime | import datetime | ||||
from datetime import timedelta | from datetime import timedelta | ||||
import inspect | import inspect | ||||
import itertools | import itertools | ||||
import math | import math | ||||
import random | import random | ||||
import re | |||||
from typing import Any, ClassVar, Dict, Iterator, Optional | from typing import Any, ClassVar, Dict, Iterator, Optional | ||||
import attr | import attr | ||||
from hypothesis import HealthCheck, given, settings, strategies | from hypothesis import HealthCheck, given, settings, strategies | ||||
import pytest | import pytest | ||||
from swh.model import from_disk | from swh.model import from_disk | ||||
from swh.model.hashutil import hash_to_bytes | from swh.model.hashutil import hash_to_bytes | ||||
from swh.model.hypothesis_strategies import objects | from swh.model.hypothesis_strategies import objects | ||||
from swh.model.identifiers import SWHID | from swh.model.identifiers import SWHID | ||||
from swh.model.model import ( | from swh.model.model import ( | ||||
Content, | Content, | ||||
Directory, | Directory, | ||||
MetadataTargetType, | MetadataTargetType, | ||||
Origin, | Origin, | ||||
OriginVisit, | OriginVisit, | ||||
OriginVisitStatus, | OriginVisitStatus, | ||||
Person, | Person, | ||||
Revision, | Revision, | ||||
SkippedContent, | SkippedContent, | ||||
Snapshot, | Snapshot, | ||||
SnapshotBranch, | |||||
TargetType, | TargetType, | ||||
) | ) | ||||
from swh.storage import get_storage | from swh.storage import get_storage | ||||
from swh.storage.common import origin_url_to_sha1 as sha1 | from swh.storage.common import origin_url_to_sha1 as sha1 | ||||
from swh.storage.exc import HashCollision, StorageArgumentException | from swh.storage.exc import HashCollision, StorageArgumentException | ||||
from swh.storage.interface import ListOrder, PagedResult, StorageInterface | from swh.storage.interface import ListOrder, PagedResult, StorageInterface | ||||
from swh.storage.utils import content_hex_hashes, now, round_to_milliseconds | from swh.storage.utils import content_hex_hashes, now, round_to_milliseconds | ||||
▲ Show 20 Lines • Show All 2,868 Lines • ▼ Show 20 Lines | def test_snapshot_add_get_branch_by_type(self, swh_storage, sample_data): | ||||
target_types=["alias"], | target_types=["alias"], | ||||
branches_from=alias1, | branches_from=alias1, | ||||
branches_count=1, | branches_count=1, | ||||
)["branches"] | )["branches"] | ||||
assert len(branches) == 1 | assert len(branches) == 1 | ||||
assert alias1 in branches | assert alias1 in branches | ||||
def test_snapshot_add_get_by_branches_name_pattern(self, swh_storage, sample_data): | |||||
snapshot = Snapshot( | |||||
branches={ | |||||
b"refs/heads/master": SnapshotBranch( | |||||
target=sample_data.revision.id, target_type=TargetType.REVISION, | |||||
), | |||||
b"refs/heads/incoming": SnapshotBranch( | |||||
target=sample_data.revision.id, target_type=TargetType.REVISION, | |||||
), | |||||
b"refs/pull/1": SnapshotBranch( | |||||
target=sample_data.revision.id, target_type=TargetType.REVISION, | |||||
), | |||||
b"refs/pull/2": SnapshotBranch( | |||||
target=sample_data.revision.id, target_type=TargetType.REVISION, | |||||
), | |||||
b"dangling": None, | |||||
}, | |||||
) | |||||
swh_storage.snapshot_add([snapshot]) | |||||
for regexp, nb_results in ( | |||||
("pull", 2), | |||||
("incoming$", 1), | |||||
("^dangling$", 1), | |||||
# does not contain heads | |||||
("^((?!heads).)*$", 3), | |||||
# contains refs but not master | |||||
("^(?:(?!(?:master)).)*(?:refs)(?:(?!(?:master)).)*$", 3), | |||||
): | |||||
pattern = re.compile(regexp) | |||||
branches = swh_storage.snapshot_get_branches( | |||||
snapshot.id, branches_name_pattern=regexp | |||||
)["branches"] | |||||
assert len(branches) == nb_results | |||||
for branch_name in branches: | |||||
assert pattern.search(branch_name.decode("utf-8")) | |||||
def test_snapshot_add_get_by_branches_name_pattern_filtered_paginated( | |||||
self, swh_storage, sample_data | |||||
): | |||||
pattern = "foo" | |||||
nb_branches_by_target_type = 10 | |||||
branches = {} | |||||
for i in range(nb_branches_by_target_type): | |||||
branches[f"branch/directory/bar{i}".encode()] = SnapshotBranch( | |||||
target=sample_data.directory.id, target_type=TargetType.DIRECTORY, | |||||
) | |||||
branches[f"branch/revision/bar{i}".encode()] = SnapshotBranch( | |||||
target=sample_data.revision.id, target_type=TargetType.REVISION, | |||||
) | |||||
branches[f"branch/directory/{pattern}{i}".encode()] = SnapshotBranch( | |||||
target=sample_data.directory.id, target_type=TargetType.DIRECTORY, | |||||
) | |||||
vlorentz: could be improved; it doesn't check that the same branch isn't returned multiple times, or… | |||||
Done Inline Actionsack anlambert: ack | |||||
branches[f"branch/revision/{pattern}{i}".encode()] = SnapshotBranch( | |||||
target=sample_data.revision.id, target_type=TargetType.REVISION, | |||||
) | |||||
snapshot = Snapshot(branches=branches) | |||||
swh_storage.snapshot_add([snapshot]) | |||||
regexp = re.compile(pattern) | |||||
branches_count = nb_branches_by_target_type // 2 | |||||
for target_type in ( | |||||
TargetType.DIRECTORY, | |||||
TargetType.REVISION, | |||||
): | |||||
target_type_str = target_type.value | |||||
partial_branches = swh_storage.snapshot_get_branches( | |||||
snapshot.id, | |||||
branches_name_pattern=pattern, | |||||
target_types=[target_type_str], | |||||
branches_count=branches_count, | |||||
) | |||||
branches = partial_branches["branches"] | |||||
assert len(branches) == branches_count | |||||
for branch_name, branch_data in branches.items(): | |||||
Not Done Inline Actionswhy a regexp instead of in? vlorentz: why a regexp instead of `in`? | |||||
Done Inline Actionsoh right, I initially used regexps when I started that diff, this must have remain through the numerous code changes. anlambert: oh right, I initially used regexps when I started that diff, this must have remain through the… | |||||
assert regexp.search(branch_name.decode("utf-8")) | |||||
assert branch_data.target_type == target_type | |||||
for i in range(branches_count): | |||||
assert f"branch/{target_type_str}/{pattern}{i}".encode() in branches | |||||
assert ( | |||||
partial_branches["next_branch"] | |||||
== f"branch/{target_type_str}/{pattern}{branches_count}".encode() | |||||
) | |||||
partial_branches = swh_storage.snapshot_get_branches( | |||||
snapshot.id, | |||||
branches_name_pattern=pattern, | |||||
target_types=[target_type_str], | |||||
branches_from=partial_branches["next_branch"], | |||||
) | |||||
branches = partial_branches["branches"] | |||||
assert len(branches) == branches_count | |||||
for branch_name, branch_data in branches.items(): | |||||
assert regexp.search(branch_name.decode("utf-8")) | |||||
assert branch_data.target_type == target_type | |||||
Not Done Inline Actionssame comment as above on the assertions, same code should work if you add a slicing vlorentz: same comment as above on the assertions, same code should work if you add a slicing | |||||
Done Inline Actionsack anlambert: ack | |||||
for i in range(branches_count, 2 * branches_count): | |||||
assert f"branch/{target_type_str}/{pattern}{i}".encode() in branches | |||||
assert partial_branches["next_branch"] is None | |||||
def test_snapshot_add_get(self, swh_storage, sample_data): | def test_snapshot_add_get(self, swh_storage, sample_data): | ||||
snapshot = sample_data.snapshot | snapshot = sample_data.snapshot | ||||
origin = sample_data.origin | origin = sample_data.origin | ||||
swh_storage.origin_add([origin]) | swh_storage.origin_add([origin]) | ||||
visit = OriginVisit( | visit = OriginVisit( | ||||
origin=origin.url, | origin=origin.url, | ||||
date=sample_data.date_visit1, | date=sample_data.date_visit1, | ||||
type=sample_data.type_visit1, | type=sample_data.type_visit1, | ||||
) | ) | ||||
ov1 = swh_storage.origin_visit_add([visit])[0] | ov1 = swh_storage.origin_visit_add([visit])[0] | ||||
swh_storage.snapshot_add([snapshot]) | swh_storage.snapshot_add([snapshot]) | ||||
swh_storage.origin_visit_status_add( | swh_storage.origin_visit_status_add( | ||||
[ | [ | ||||
Not Done Inline Actionsditto vlorentz: ditto | |||||
Done Inline Actionsack anlambert: ack | |||||
OriginVisitStatus( | OriginVisitStatus( | ||||
origin=origin.url, | origin=origin.url, | ||||
visit=ov1.visit, | visit=ov1.visit, | ||||
date=now(), | date=now(), | ||||
status="ongoing", | status="ongoing", | ||||
snapshot=snapshot.id, | snapshot=snapshot.id, | ||||
) | ) | ||||
] | ] | ||||
▲ Show 20 Lines • Show All 996 Lines • Show Last 20 Lines |
could be improved; it doesn't check that the same branch isn't returned multiple times, or bogus branches are returned.
What about something like this?
It also makes pytest give better errors, showing what branches are missing/too much instead of just a number inequality