Changeset View
Changeset View
Standalone View
Standalone View
swh/web/tests/conftest.py
# Copyright (C) 2018-2021 The Software Heritage developers | # Copyright (C) 2018-2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU Affero General Public License version 3, or any later version | # License: GNU Affero General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from collections import defaultdict | from collections import defaultdict | ||||
from datetime import timedelta | from datetime import timedelta | ||||
import functools | |||||
import json | import json | ||||
import os | import os | ||||
import random | import random | ||||
import shutil | import shutil | ||||
from subprocess import PIPE, run | from subprocess import PIPE, run | ||||
import sys | import sys | ||||
from typing import Any, Dict, List, Optional | from typing import Any, Dict, List, Optional | ||||
▲ Show 20 Lines • Show All 232 Lines • ▼ Show 20 Lines | def empty_content(): | ||||
"""Fixture returning the empty content ingested into the test archive. | """Fixture returning the empty content ingested into the test archive. | ||||
""" | """ | ||||
empty_content = Content.from_data(data=b"").to_dict() | empty_content = Content.from_data(data=b"").to_dict() | ||||
for algo in DEFAULT_ALGORITHMS: | for algo in DEFAULT_ALGORITHMS: | ||||
empty_content[algo] = hash_to_hex(empty_content[algo]) | empty_content[algo] = hash_to_hex(empty_content[algo]) | ||||
return empty_content | return empty_content | ||||
@pytest.fixture(scope="function") | @functools.lru_cache(maxsize=None) | ||||
def content_text(tests_data): | def _content_text(): | ||||
""" | return list( | ||||
Fixture returning a random textual content ingested into the test archive. | |||||
""" | |||||
return random.choice( | |||||
list( | |||||
filter( | filter( | ||||
lambda c: c["mimetype"].startswith("text/"), | lambda c: c["mimetype"].startswith("text/"), | ||||
_known_swh_objects(tests_data, "contents"), | _known_swh_objects(get_tests_data(), "contents"), | ||||
) | |||||
) | ) | ||||
) | ) | ||||
@pytest.fixture(scope="function") | @pytest.fixture(scope="function") | ||||
def content_text_non_utf8(tests_data): | def content_text(): | ||||
"""Fixture returning a random textual content not encoded to UTF-8 ingested | """ | ||||
into the test archive. | Fixture returning a random textual content ingested into the test archive. | ||||
""" | """ | ||||
return random.choice( | return random.choice(_content_text()) | ||||
list( | |||||
@functools.lru_cache(maxsize=None) | |||||
def _content_text_non_utf8(): | |||||
return list( | |||||
filter( | filter( | ||||
lambda c: c["mimetype"].startswith("text/") | lambda c: c["mimetype"].startswith("text/") | ||||
and c["encoding"] not in ("utf-8", "us-ascii"), | and c["encoding"] not in ("utf-8", "us-ascii"), | ||||
_known_swh_objects(tests_data, "contents"), | _known_swh_objects(get_tests_data(), "contents"), | ||||
) | |||||
) | ) | ||||
) | ) | ||||
@pytest.fixture(scope="function") | @pytest.fixture(scope="function") | ||||
def content_application_no_highlight(tests_data): | def content_text_non_utf8(): | ||||
"""Fixture returning a random textual content with mimetype | """Fixture returning a random textual content not encoded to UTF-8 ingested | ||||
starting with application/ and no detected programming language to | into the test archive. | ||||
highlight ingested into the test archive. | |||||
""" | """ | ||||
return random.choice( | return random.choice(_content_text_non_utf8()) | ||||
list( | |||||
@functools.lru_cache(maxsize=None) | |||||
def _content_application_no_highlight(): | |||||
return list( | |||||
filter( | filter( | ||||
lambda c: c["mimetype"].startswith("application/") | lambda c: c["mimetype"].startswith("application/") | ||||
and c["encoding"] != "binary" | and c["encoding"] != "binary" | ||||
and c["hljs_language"] == "nohighlight", | and c["hljs_language"] == "nohighlight", | ||||
_known_swh_objects(tests_data, "contents"), | _known_swh_objects(get_tests_data(), "contents"), | ||||
) | |||||
) | ) | ||||
) | ) | ||||
@pytest.fixture(scope="function") | @pytest.fixture(scope="function") | ||||
def content_text_no_highlight(tests_data): | def content_application_no_highlight(): | ||||
"""Fixture returning a random textual content with no detected | """Fixture returning a random textual content with mimetype | ||||
programming language to highlight ingested into the test archive. | starting with application/ and no detected programming language to | ||||
highlight ingested into the test archive. | |||||
""" | """ | ||||
return random.choice( | return random.choice(_content_application_no_highlight()) | ||||
list( | |||||
@functools.lru_cache(maxsize=None) | |||||
def _content_text_no_highlight(): | |||||
return list( | |||||
filter( | filter( | ||||
lambda c: c["mimetype"].startswith("text/") | lambda c: c["mimetype"].startswith("text/") | ||||
and c["hljs_language"] == "nohighlight", | and c["hljs_language"] == "nohighlight", | ||||
_known_swh_objects(tests_data, "contents"), | _known_swh_objects(get_tests_data(), "contents"), | ||||
) | |||||
) | ) | ||||
) | ) | ||||
@pytest.fixture(scope="function") | @pytest.fixture(scope="function") | ||||
def content_image_type(tests_data): | def content_text_no_highlight(): | ||||
"""Fixture returning a random image content ingested into the test archive. | """Fixture returning a random textual content with no detected | ||||
programming language to highlight ingested into the test archive. | |||||
""" | """ | ||||
return random.choice( | return random.choice(_content_text_no_highlight()) | ||||
list( | |||||
@functools.lru_cache(maxsize=None) | |||||
def _content_image_type(): | |||||
return list( | |||||
filter( | filter( | ||||
lambda c: c["mimetype"] in browsers_supported_image_mimes, | lambda c: c["mimetype"] in browsers_supported_image_mimes, | ||||
_known_swh_objects(tests_data, "contents"), | _known_swh_objects(get_tests_data(), "contents"), | ||||
) | |||||
) | ) | ||||
) | ) | ||||
@pytest.fixture(scope="function") | @pytest.fixture(scope="function") | ||||
def content_unsupported_image_type_rendering(tests_data): | def content_image_type(): | ||||
"""Fixture returning a random image content ingested into the test archive that | """Fixture returning a random image content ingested into the test archive. | ||||
can not be rendered by browsers. | |||||
""" | """ | ||||
return random.choice( | return random.choice(_content_image_type()) | ||||
list( | |||||
@functools.lru_cache(maxsize=None) | |||||
def _content_unsupported_image_type_rendering(): | |||||
return list( | |||||
filter( | filter( | ||||
lambda c: c["mimetype"].startswith("image/") | lambda c: c["mimetype"].startswith("image/") | ||||
and c["mimetype"] not in browsers_supported_image_mimes, | and c["mimetype"] not in browsers_supported_image_mimes, | ||||
_known_swh_objects(tests_data, "contents"), | _known_swh_objects(get_tests_data(), "contents"), | ||||
) | |||||
) | ) | ||||
) | ) | ||||
@pytest.fixture(scope="function") | @pytest.fixture(scope="function") | ||||
def content_utf8_detected_as_binary(tests_data): | def content_unsupported_image_type_rendering(): | ||||
"""Fixture returning a random textual content detected as binary | """Fixture returning a random image content ingested into the test archive that | ||||
by libmagic while they are valid UTF-8 encoded files. | can not be rendered by browsers. | ||||
""" | """ | ||||
return random.choice(_content_unsupported_image_type_rendering()) | |||||
@functools.lru_cache(maxsize=None) | |||||
def _content_utf8_detected_as_binary(): | |||||
def utf8_binary_detected(content): | def utf8_binary_detected(content): | ||||
if content["encoding"] != "binary": | if content["encoding"] != "binary": | ||||
return False | return False | ||||
try: | try: | ||||
content["raw_data"].decode("utf-8") | content["raw_data"].decode("utf-8") | ||||
except Exception: | except Exception: | ||||
return False | return False | ||||
else: | else: | ||||
return True | return True | ||||
return random.choice( | return list( | ||||
list(filter(utf8_binary_detected, _known_swh_objects(tests_data, "contents"))) | filter(utf8_binary_detected, _known_swh_objects(get_tests_data(), "contents")) | ||||
) | ) | ||||
@pytest.fixture(scope="function") | @pytest.fixture(scope="function") | ||||
def content_utf8_detected_as_binary(): | |||||
"""Fixture returning a random textual content detected as binary | |||||
by libmagic while they are valid UTF-8 encoded files. | |||||
""" | |||||
return random.choice(_content_utf8_detected_as_binary()) | |||||
@pytest.fixture(scope="function") | |||||
def contents_with_ctags(): | def contents_with_ctags(): | ||||
""" | """ | ||||
Fixture returning contents ingested into the test archive. | Fixture returning contents ingested into the test archive. | ||||
Those contents are ctags compatible, that is running ctags on those lay results. | Those contents are ctags compatible, that is running ctags on those lay results. | ||||
""" | """ | ||||
return { | return { | ||||
"sha1s": [ | "sha1s": [ | ||||
"0ab37c02043ebff946c1937523f60aadd0844351", | "0ab37c02043ebff946c1937523f60aadd0844351", | ||||
Show All 19 Lines | |||||
@pytest.fixture(scope="function") | @pytest.fixture(scope="function") | ||||
def directory(tests_data): | def directory(tests_data): | ||||
"""Fixture returning a random directory ingested into the test archive. | """Fixture returning a random directory ingested into the test archive. | ||||
""" | """ | ||||
return random.choice(_known_swh_objects(tests_data, "directories")) | return random.choice(_known_swh_objects(tests_data, "directories")) | ||||
def _directory_with_entry_type(tests_data, type_): | @functools.lru_cache(maxsize=None) | ||||
return random.choice( | def _directory_with_entry_type(type_): | ||||
list( | tests_data = get_tests_data() | ||||
return list( | |||||
filter( | filter( | ||||
lambda d: any( | lambda d: any( | ||||
[ | [ | ||||
e["type"] == type_ | e["type"] == type_ | ||||
for e in list( | for e in list(tests_data["storage"].directory_ls(hash_to_bytes(d))) | ||||
tests_data["storage"].directory_ls(hash_to_bytes(d)) | |||||
) | |||||
] | ] | ||||
), | ), | ||||
_known_swh_objects(tests_data, "directories"), | _known_swh_objects(tests_data, "directories"), | ||||
) | ) | ||||
) | ) | ||||
) | |||||
@pytest.fixture(scope="function") | @pytest.fixture(scope="function") | ||||
def directory_with_subdirs(tests_data): | def directory_with_subdirs(): | ||||
"""Fixture returning a random directory containing sub directories ingested | """Fixture returning a random directory containing sub directories ingested | ||||
into the test archive. | into the test archive. | ||||
""" | """ | ||||
return _directory_with_entry_type(tests_data, "dir") | return random.choice(_directory_with_entry_type("dir")) | ||||
@pytest.fixture(scope="function") | @pytest.fixture(scope="function") | ||||
def directory_with_files(tests_data): | def directory_with_files(): | ||||
"""Fixture returning a random directory containing at least one regular file. | """Fixture returning a random directory containing at least one regular file. | ||||
""" | """ | ||||
return _directory_with_entry_type(tests_data, "file") | return random.choice(_directory_with_entry_type("file")) | ||||
@pytest.fixture(scope="function") | @pytest.fixture(scope="function") | ||||
def unknown_directory(tests_data): | def unknown_directory(tests_data): | ||||
"""Fixture returning a random directory not ingested into the test archive. | """Fixture returning a random directory not ingested into the test archive. | ||||
""" | """ | ||||
while True: | while True: | ||||
new_directory = random_sha1() | new_directory = random_sha1() | ||||
▲ Show 20 Lines • Show All 54 Lines • ▼ Show 20 Lines | def _get_origin_dfs_revisions_walker(tests_data): | ||||
if snapshot.branches[b"HEAD"].target_type.value == "alias": | if snapshot.branches[b"HEAD"].target_type.value == "alias": | ||||
target = snapshot.branches[b"HEAD"].target | target = snapshot.branches[b"HEAD"].target | ||||
head = snapshot.branches[target].target | head = snapshot.branches[target].target | ||||
else: | else: | ||||
head = snapshot.branches[b"HEAD"].target | head = snapshot.branches[b"HEAD"].target | ||||
return get_revisions_walker("dfs", storage, head) | return get_revisions_walker("dfs", storage, head) | ||||
@pytest.fixture(scope="function") | @functools.lru_cache(maxsize=None) | ||||
def ancestor_revisions(tests_data): | def _ancestor_revisions_data(): | ||||
"""Fixture returning a pair of revisions ingested into the test archive | |||||
with an ancestor relation. | |||||
""" | |||||
# get a dfs revisions walker for one of the origins | # get a dfs revisions walker for one of the origins | ||||
# loaded into the test archive | # loaded into the test archive | ||||
revisions_walker = _get_origin_dfs_revisions_walker(tests_data) | revisions_walker = _get_origin_dfs_revisions_walker(get_tests_data()) | ||||
master_revisions = [] | master_revisions = [] | ||||
children = defaultdict(list) | children = defaultdict(list) | ||||
init_rev_found = False | init_rev_found = False | ||||
# get revisions only authored in the master branch | # get revisions only authored in the master branch | ||||
for rev in revisions_walker: | for rev in revisions_walker: | ||||
for rev_p in rev["parents"]: | for rev_p in rev["parents"]: | ||||
children[rev_p].append(rev["id"]) | children[rev_p].append(rev["id"]) | ||||
if not init_rev_found: | if not init_rev_found: | ||||
master_revisions.append(rev) | master_revisions.append(rev) | ||||
if not rev["parents"]: | if not rev["parents"]: | ||||
init_rev_found = True | init_rev_found = True | ||||
return master_revisions, children | |||||
@pytest.fixture(scope="function") | |||||
def ancestor_revisions(): | |||||
"""Fixture returning a pair of revisions ingested into the test archive | |||||
with an ancestor relation. | |||||
""" | |||||
master_revisions, children = _ancestor_revisions_data() | |||||
# head revision | # head revision | ||||
root_rev = master_revisions[0] | root_rev = master_revisions[0] | ||||
# pick a random revision, different from head, only authored | # pick a random revision, different from head, only authored | ||||
# in the master branch | # in the master branch | ||||
ancestor_rev_idx = random.choice(list(range(1, len(master_revisions) - 1))) | ancestor_rev_idx = random.choice(list(range(1, len(master_revisions) - 1))) | ||||
ancestor_rev = master_revisions[ancestor_rev_idx] | ancestor_rev = master_revisions[ancestor_rev_idx] | ||||
ancestor_child_revs = children[ancestor_rev["id"]] | ancestor_child_revs = children[ancestor_rev["id"]] | ||||
return { | return { | ||||
"sha1_git_root": hash_to_hex(root_rev["id"]), | "sha1_git_root": hash_to_hex(root_rev["id"]), | ||||
"sha1_git": hash_to_hex(ancestor_rev["id"]), | "sha1_git": hash_to_hex(ancestor_rev["id"]), | ||||
"children": [hash_to_hex(r) for r in ancestor_child_revs], | "children": [hash_to_hex(r) for r in ancestor_child_revs], | ||||
} | } | ||||
@pytest.fixture(scope="function") | @functools.lru_cache(maxsize=None) | ||||
def non_ancestor_revisions(tests_data): | def _non_ancestor_revisions_data(): | ||||
"""Fixture returning a pair of revisions ingested into the test archive | |||||
with no ancestor relation. | |||||
""" | |||||
# get a dfs revisions walker for one of the origins | # get a dfs revisions walker for one of the origins | ||||
# loaded into the test archive | # loaded into the test archive | ||||
revisions_walker = _get_origin_dfs_revisions_walker(tests_data) | revisions_walker = _get_origin_dfs_revisions_walker(get_tests_data()) | ||||
merge_revs = [] | merge_revs = [] | ||||
children = defaultdict(list) | children = defaultdict(list) | ||||
# get all merge revisions | # get all merge revisions | ||||
for rev in revisions_walker: | for rev in revisions_walker: | ||||
if len(rev["parents"]) > 1: | if len(rev["parents"]) > 1: | ||||
merge_revs.append(rev) | merge_revs.append(rev) | ||||
for rev_p in rev["parents"]: | for rev_p in rev["parents"]: | ||||
children[rev_p].append(rev["id"]) | children[rev_p].append(rev["id"]) | ||||
return merge_revs, children | |||||
@pytest.fixture(scope="function") | |||||
def non_ancestor_revisions(): | |||||
"""Fixture returning a pair of revisions ingested into the test archive | |||||
with no ancestor relation. | |||||
""" | |||||
merge_revs, children = _non_ancestor_revisions_data() | |||||
# find a merge revisions whose parents have a unique child revision | # find a merge revisions whose parents have a unique child revision | ||||
random.shuffle(merge_revs) | random.shuffle(merge_revs) | ||||
selected_revs = None | selected_revs = None | ||||
for merge_rev in merge_revs: | for merge_rev in merge_revs: | ||||
if all(len(children[rev_p]) == 1 for rev_p in merge_rev["parents"]): | if all(len(children[rev_p]) == 1 for rev_p in merge_rev["parents"]): | ||||
selected_revs = merge_rev["parents"] | selected_revs = merge_rev["parents"] | ||||
return { | return { | ||||
▲ Show 20 Lines • Show All 62 Lines • ▼ Show 20 Lines | |||||
@pytest.fixture(scope="function") | @pytest.fixture(scope="function") | ||||
def origin(tests_data): | def origin(tests_data): | ||||
"""Fixture returning a random origin ingested into the test archive. | """Fixture returning a random origin ingested into the test archive. | ||||
""" | """ | ||||
return random.choice(_known_swh_objects(tests_data, "origins")) | return random.choice(_known_swh_objects(tests_data, "origins")) | ||||
@pytest.fixture(scope="function") | @functools.lru_cache(maxsize=None) | ||||
def origin_with_multiple_visits(tests_data): | def _origin_with_multiple_visits(): | ||||
"""Fixture returning a random origin with multiple visits ingested | tests_data = get_tests_data() | ||||
into the test archive. | |||||
""" | |||||
origins = [] | origins = [] | ||||
storage = tests_data["storage"] | storage = tests_data["storage"] | ||||
for origin in tests_data["origins"]: | for origin in tests_data["origins"]: | ||||
visit_page = storage.origin_visit_get(origin["url"]) | visit_page = storage.origin_visit_get(origin["url"]) | ||||
if len(visit_page.results) > 1: | if len(visit_page.results) > 1: | ||||
origins.append(origin) | origins.append(origin) | ||||
return random.choice(origins) | return origins | ||||
@pytest.fixture(scope="function") | @pytest.fixture(scope="function") | ||||
def origin_with_releases(tests_data): | def origin_with_multiple_visits(): | ||||
"""Fixture returning a random origin with releases ingested into the test archive. | """Fixture returning a random origin with multiple visits ingested | ||||
into the test archive. | |||||
""" | """ | ||||
return random.choice(_origin_with_multiple_visits()) | |||||
@functools.lru_cache(maxsize=None) | |||||
def _origin_with_releases(): | |||||
tests_data = get_tests_data() | |||||
origins = [] | origins = [] | ||||
for origin in tests_data["origins"]: | for origin in tests_data["origins"]: | ||||
snapshot = snapshot_get_latest(tests_data["storage"], origin["url"]) | snapshot = snapshot_get_latest(tests_data["storage"], origin["url"]) | ||||
if any([b.target_type.value == "release" for b in snapshot.branches.values()]): | if any([b.target_type.value == "release" for b in snapshot.branches.values()]): | ||||
origins.append(origin) | origins.append(origin) | ||||
return random.choice(origins) | return origins | ||||
@pytest.fixture(scope="function") | @pytest.fixture(scope="function") | ||||
def origin_with_pull_request_branches(tests_data): | def origin_with_releases(): | ||||
"""Fixture returning a random origin with pull request branches ingested | """Fixture returning a random origin with releases ingested into the test archive. | ||||
into the test archive. | |||||
""" | """ | ||||
return random.choice(_origin_with_releases()) | |||||
@functools.lru_cache(maxsize=None) | |||||
def _origin_with_pull_request_branches(): | |||||
tests_data = get_tests_data() | |||||
origins = [] | origins = [] | ||||
storage = tests_data["storage"] | storage = tests_data["storage"] | ||||
for origin in storage.origin_list(limit=1000).results: | for origin in storage.origin_list(limit=1000).results: | ||||
snapshot = snapshot_get_latest(storage, origin.url) | snapshot = snapshot_get_latest(storage, origin.url) | ||||
if any([b"refs/pull/" in b for b in snapshot.branches]): | if any([b"refs/pull/" in b for b in snapshot.branches]): | ||||
origins.append(origin) | origins.append(origin) | ||||
return random.choice(origins) | return origins | ||||
@pytest.fixture(scope="function") | |||||
def origin_with_pull_request_branches(): | |||||
"""Fixture returning a random origin with pull request branches ingested | |||||
into the test archive. | |||||
""" | |||||
return random.choice(_origin_with_pull_request_branches()) | |||||
def _object_type_swhid(tests_data, object_type): | @functools.lru_cache(maxsize=None) | ||||
return random.choice( | def _object_type_swhid(object_type): | ||||
list( | return list( | ||||
filter( | filter( | ||||
lambda swhid: swhid.object_type == object_type, | lambda swhid: swhid.object_type == object_type, | ||||
_known_swh_objects(tests_data, "swhids"), | _known_swh_objects(get_tests_data(), "swhids"), | ||||
) | |||||
) | ) | ||||
) | ) | ||||
@pytest.fixture(scope="function") | @pytest.fixture(scope="function") | ||||
def content_swhid(tests_data): | def content_swhid(): | ||||
"""Fixture returning a qualified SWHID for a random content object | """Fixture returning a qualified SWHID for a random content object | ||||
ingested into the test archive. | ingested into the test archive. | ||||
""" | """ | ||||
return _object_type_swhid(tests_data, ObjectType.CONTENT) | return random.choice(_object_type_swhid(ObjectType.CONTENT)) | ||||
@pytest.fixture(scope="function") | @pytest.fixture(scope="function") | ||||
def directory_swhid(tests_data): | def directory_swhid(): | ||||
"""Fixture returning a qualified SWHID for a random directory object | """Fixture returning a qualified SWHID for a random directory object | ||||
ingested into the test archive. | ingested into the test archive. | ||||
""" | """ | ||||
return _object_type_swhid(tests_data, ObjectType.DIRECTORY) | return random.choice(_object_type_swhid(ObjectType.DIRECTORY)) | ||||
@pytest.fixture(scope="function") | @pytest.fixture(scope="function") | ||||
def release_swhid(tests_data): | def release_swhid(): | ||||
"""Fixture returning a qualified SWHID for a random release object | """Fixture returning a qualified SWHID for a random release object | ||||
ingested into the test archive. | ingested into the test archive. | ||||
""" | """ | ||||
return _object_type_swhid(tests_data, ObjectType.RELEASE) | return random.choice(_object_type_swhid(ObjectType.RELEASE)) | ||||
@pytest.fixture(scope="function") | @pytest.fixture(scope="function") | ||||
def revision_swhid(tests_data): | def revision_swhid(): | ||||
"""Fixture returning a qualified SWHID for a random revision object | """Fixture returning a qualified SWHID for a random revision object | ||||
ingested into the test archive. | ingested into the test archive. | ||||
""" | """ | ||||
return _object_type_swhid(tests_data, ObjectType.REVISION) | return random.choice(_object_type_swhid(ObjectType.REVISION)) | ||||
@pytest.fixture(scope="function") | @pytest.fixture(scope="function") | ||||
def snapshot_swhid(tests_data): | def snapshot_swhid(): | ||||
"""Fixture returning a qualified SWHID for a snapshot object | """Fixture returning a qualified SWHID for a snapshot object | ||||
ingested into the test archive. | ingested into the test archive. | ||||
""" | """ | ||||
return _object_type_swhid(tests_data, ObjectType.SNAPSHOT) | return random.choice(_object_type_swhid(ObjectType.SNAPSHOT)) | ||||
# Fixture to manipulate data from a sample archive used in the tests | # Fixture to manipulate data from a sample archive used in the tests | ||||
@pytest.fixture(scope="function") | @pytest.fixture(scope="function") | ||||
def archive_data(tests_data): | def archive_data(tests_data): | ||||
return _ArchiveData(tests_data) | return _ArchiveData(tests_data) | ||||
▲ Show 20 Lines • Show All 370 Lines • Show Last 20 Lines |