diff --git a/swh/web/tests/strategies.py b/swh/web/tests/strategies.py index 02e1c293..65185461 100644 --- a/swh/web/tests/strategies.py +++ b/swh/web/tests/strategies.py @@ -1,381 +1,378 @@ # Copyright (C) 2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import random from collections import defaultdict from datetime import datetime -from hypothesis import settings +from hypothesis import settings, assume from hypothesis.strategies import ( just, sampled_from, lists, composite, datetimes, - integers + integers, binary ) -from string import ascii_letters, hexdigits from swh.model.hashutil import hash_to_hex, hash_to_bytes from swh.model.identifiers import directory_identifier from swh.storage.algos.revisions_walker import get_revisions_walker from swh.storage.tests.algos.test_snapshot import ( # noqa origins as new_origin_strategy, snapshots as new_snapshot ) from swh.web.tests.data import get_tests_data # Module dedicated to the generation of input data for tests through # the use of hypothesis. # Some of these data are sampled from a test archive created and populated # in the swh.web.tests.data module. +tests_data = get_tests_data() +storage = tests_data['storage'] + # Set some hypothesis settings settings.register_profile("swh-web", settings(deadline=None)) settings.load_profile("swh-web") # The following strategies exploit the hypothesis capabilities def _known_swh_object(object_type): - tests_data = get_tests_data() return sampled_from(tests_data[object_type]) -def _unknown_swh_object(draw, object_type): - tests_data = get_tests_data() - storage = tests_data['storage'] - while True: - sha1_git = draw(sha1()) - # some tests will use the generated id to create a revision on the fly - if object_type == 'revisions': - obj = next(storage.revision_get([hash_to_bytes(sha1_git)])) - if obj is None: - return sha1_git - elif sha1_git not in tests_data[object_type]: - return sha1_git - - def sha1(): """ Hypothesis strategy returning a valid hexadecimal sha1 value. """ - sha1 = ''.join(random.choice(hexdigits) for x in range(40)) - return just(sha1.lower()) + return binary( + min_size=20, max_size=20).filter( + lambda s: int.from_bytes(s, byteorder='little')).map(hash_to_hex) def invalid_sha1(): """ Hypothesis strategy returning an invalid sha1 representation. """ - invalid_sha1 = ''.join(random.choice(ascii_letters) for x in range(50)) - return just(invalid_sha1.lower()) + return binary( + min_size=50, max_size=50).filter( + lambda s: int.from_bytes(s, byteorder='little')).map(hash_to_hex) def sha256(): """ Hypothesis strategy returning a valid hexadecimal sha256 value. """ - sha256 = ''.join(random.choice(hexdigits) for x in range(64)) - return just(sha256.lower()) + return binary( + min_size=32, max_size=32).filter( + lambda s: int.from_bytes(s, byteorder='little')).map(hash_to_hex) def content(): """ Hypothesis strategy returning a random content ingested into the test archive. """ return _known_swh_object('contents') def contents(): """ Hypothesis strategy returning random contents ingested into the test archive. """ return lists(content(), min_size=2, max_size=8) @composite -def unknown_content(draw): +def new_content(draw): + blake2s256_hex = draw(sha256()) + sha1_hex = draw(sha1()) + sha1_git_hex = draw(sha1()) + sha256_hex = draw(sha256()) + + assume(sha1_hex != sha1_git_hex) + assume(blake2s256_hex != sha256_hex) + + return { + 'blake2S256': blake2s256_hex, + 'sha1': sha1_hex, + 'sha1_git': sha1_git_hex, + 'sha256': sha256_hex + } + + +def unknown_content(): """ Hypothesis strategy returning a random content not ingested into the test archive. """ - tests_data = get_tests_data() - while True: - unknown_content = { - 'blake2s256': draw(sha256()), - 'sha1': draw(sha1()), - 'sha1_git': draw(sha1()), - 'sha256': draw(sha256()) - } - if unknown_content not in tests_data['contents']: - return unknown_content + return new_content().filter( + lambda c: next(storage.content_get( + [hash_to_bytes(c['sha1'])])) is None) def unknown_contents(): """ Hypothesis strategy returning random contents not ingested into the test archive. """ return lists(unknown_content(), min_size=2, max_size=8) def directory(): """ Hypothesis strategy returning a random directory ingested into the test archive. """ return _known_swh_object('directories') def empty_directory(): """ Hypothesis strategy returning the empty directory ingested into the test archive. """ return just(directory_identifier({'entries': []})) -@composite -def unknown_directory(draw): +def unknown_directory(): """ Hypothesis strategy returning a random directory not ingested into the test archive. """ - return _unknown_swh_object(draw, 'directories') + return sha1().filter( + lambda s: len(list(storage.directory_missing([hash_to_bytes(s)]))) > 0) def origin(): """ Hypothesis strategy returning a random origin ingested into the test archive. """ return _known_swh_object('origins') def new_origin(): """ Hypothesis strategy returning a random origin not ingested into the test archive. """ - tests_data = get_tests_data() - storage = tests_data['storage'] return new_origin_strategy().filter( lambda origin: storage.origin_get(origin) is None) def visit_dates(nb_dates=None): """ Hypothesis strategy returning a list of visit dates. """ min_size = nb_dates if nb_dates else 2 max_size = nb_dates if nb_dates else 8 return lists(datetimes(min_value=datetime(2015, 1, 1, 0, 0), max_value=datetime(2018, 12, 31, 0, 0)), min_size=min_size, max_size=max_size, unique=True).map(sorted) def release(): """ Hypothesis strategy returning a random release ingested into the test archive. """ return _known_swh_object('releases') -@composite -def unknown_release(draw): +def unknown_release(): """ Hypothesis strategy returning a random revision not ingested into the test archive. """ - return _unknown_swh_object(draw, 'releases') + return sha1().filter( + lambda s: next(storage.release_get([s])) is None) def revision(): """ Hypothesis strategy returning a random revision ingested into the test archive. """ return _known_swh_object('revisions') -@composite -def unknown_revision(draw): +def unknown_revision(): """ Hypothesis strategy returning a random revision not ingested into the test archive. """ - return _unknown_swh_object(draw, 'revisions') + return sha1().filter( + lambda s: next(storage.revision_get([hash_to_bytes(s)])) is None) def revisions(): """ Hypothesis strategy returning random revisions ingested into the test archive. """ return lists(revision(), min_size=2, max_size=8) def unknown_revisions(): """ Hypothesis strategy returning random revisions not ingested into the test archive. """ return lists(unknown_revision(), min_size=2, max_size=8) def snapshot(): """ Hypothesis strategy returning a random snapshot ingested into the test archive. """ return _known_swh_object('snapshots') def new_snapshots(nb_snapshots=None): min_size = nb_snapshots if nb_snapshots else 2 max_size = nb_snapshots if nb_snapshots else 8 return lists(new_snapshot(min_size=2, max_size=10, only_objects=True), min_size=min_size, max_size=max_size) -@composite -def unknown_snapshot(draw): +def unknown_snapshot(): """ Hypothesis strategy returning a random revision not ingested into the test archive. """ - return _unknown_swh_object(draw, 'snapshots') + return sha1().filter( + lambda s: storage.snapshot_get(hash_to_bytes(s)) is None) def person(): """ Hypothesis strategy returning a random person ingested into the test archive. """ return _known_swh_object('persons') def unknown_person(): """ Hypothesis strategy returning a random person not ingested into the test archive. """ - persons = get_tests_data()['persons'] + persons = tests_data['persons'] return integers(min_value=max(persons)+1) def _get_origin_dfs_revisions_walker(): - storage = get_tests_data()['storage'] - origin = random.choice(get_tests_data()['origins'][:-1]) + origin = random.choice(tests_data['origins'][:-1]) snapshot = storage.snapshot_get_latest(origin['id']) head = snapshot['branches'][b'HEAD']['target'] return get_revisions_walker('dfs', storage, head) def ancestor_revisions(): """ Hypothesis strategy returning a pair of revisions ingested into the test archive with an ancestor relation. """ # get a dfs revisions walker for one of the origins # loaded into the test archive revisions_walker = _get_origin_dfs_revisions_walker() master_revisions = [] children = defaultdict(list) init_rev_found = False # get revisions only authored in the master branch for rev in revisions_walker: for rev_p in rev['parents']: children[rev_p].append(rev['id']) if not init_rev_found: master_revisions.append(rev) if not rev['parents']: init_rev_found = True # head revision root_rev = master_revisions[0] # pick a random revision, different from head, only authored # in the master branch ancestor_rev_idx = random.choice(list(range(1, len(master_revisions)-1))) ancestor_rev = master_revisions[ancestor_rev_idx] ancestor_child_revs = children[ancestor_rev['id']] return just({ 'sha1_git_root': hash_to_hex(root_rev['id']), 'sha1_git': hash_to_hex(ancestor_rev['id']), 'children': [hash_to_hex(r) for r in ancestor_child_revs] }) def non_ancestor_revisions(): """ Hypothesis strategy returning a pair of revisions ingested into the test archive with no ancestor relation. """ # get a dfs revisions walker for one of the origins # loaded into the test archive revisions_walker = _get_origin_dfs_revisions_walker() merge_revs = [] children = defaultdict(list) # get all merge revisions for rev in revisions_walker: if len(rev['parents']) > 1: merge_revs.append(rev) for rev_p in rev['parents']: children[rev_p].append(rev['id']) # find a merge revisions whose parents have a unique child revision random.shuffle(merge_revs) selected_revs = None for merge_rev in merge_revs: if all(len(children[rev_p]) == 1 for rev_p in merge_rev['parents']): selected_revs = merge_rev['parents'] return just({ 'sha1_git_root': hash_to_hex(selected_revs[0]), 'sha1_git': hash_to_hex(selected_revs[1]) }) # The following strategies returns data specific to some tests # that can not be generated and thus are hardcoded. def contents_with_ctags(): """ Hypothesis strategy returning contents ingested into the test archive. Those contents are ctags compatible, that is running ctags on those lay results. """ return just({ 'sha1s': ['0ab37c02043ebff946c1937523f60aadd0844351', '15554cf7608dde6bfefac7e3d525596343a85b6f', '2ce837f1489bdfb8faf3ebcc7e72421b5bea83bd', '30acd0b47fc25e159e27a980102ddb1c4bea0b95', '4f81f05aaea3efb981f9d90144f746d6b682285b', '5153aa4b6e4455a62525bc4de38ed0ff6e7dd682', '59d08bafa6a749110dfb65ba43a61963d5a5bf9f', '7568285b2d7f31ae483ae71617bd3db873deaa2c', '7ed3ee8e94ac52ba983dd7690bdc9ab7618247b4', '8ed7ef2e7ff9ed845e10259d08e4145f1b3b5b03', '9b3557f1ab4111c8607a4f2ea3c1e53c6992916c', '9c20da07ed14dc4fcd3ca2b055af99b2598d8bdd', 'c20ceebd6ec6f7a19b5c3aebc512a12fbdc9234b', 'e89e55a12def4cd54d5bff58378a3b5119878eb7', 'e8c0654fe2d75ecd7e0b01bee8a8fc60a130097e', 'eb6595e559a1d34a2b41e8d4835e0e4f98a5d2b5'], 'symbol_name': 'ABS' }) def revision_with_submodules(): """ Hypothesis strategy returning a revision that is known to point to a directory with revision entries (aka git submodule) """ return just({ 'rev_sha1_git': 'ffcb69001f3f6745dfd5b48f72ab6addb560e234', 'rev_dir_sha1_git': 'd92a21446387fa28410e5a74379c934298f39ae2', 'rev_dir_rev_path': 'libtess2' })