Changeset View
Standalone View
swh/web/tests/strategies.py
- This file was added.
# Copyright (C) 2018 The Software Heritage developers | |||||
# See the AUTHORS file at the top-level directory of this distribution | |||||
# License: GNU Affero General Public License version 3, or any later version | |||||
# See top-level LICENSE file for more information | |||||
import random | |||||
from collections import defaultdict | |||||
from datetime import datetime | |||||
from hypothesis import settings | |||||
from hypothesis.strategies import ( | |||||
just, sampled_from, lists, composite, datetimes | |||||
) | |||||
from string import ascii_letters, hexdigits | |||||
from swh.model.hashutil import hash_to_hex | |||||
from swh.storage.algos.revisions_walker import get_revisions_walker | |||||
from swh.storage.tests.algos.test_snapshot import origins | |||||
from swh.web.tests.data import get_tests_data | |||||
# Module dedicated to the generation of input data for tests through | |||||
# the use of hypothesis. | |||||
# Some of these data are sampled from a test archive created and populated | |||||
# in the swh.web.tests.data module. | |||||
# Set some hypothesis settings | |||||
settings.register_profile("swh-web", settings(deadline=None, max_examples=1)) | |||||
settings.load_profile("swh-web") | |||||
# The following strategies exploit the hypothesis capabilities | |||||
def _known_swh_object(object_type): | |||||
ardumont: `swh` | |||||
Done Inline Actionswell seen! anlambert: well seen! | |||||
tests_data = get_tests_data() | |||||
return sampled_from(tests_data[object_type]) | |||||
def _unknown_swh_object(draw, object_type): | |||||
Not Done Inline Actionsswh ardumont: `swh` | |||||
tests_data = get_tests_data() | |||||
while True: | |||||
sha1_git = draw(sha1()) | |||||
if sha1_git not in tests_data[object_type]: | |||||
return sha1_git | |||||
def sha1(): | |||||
""" | |||||
Hypothesis strategy returning a valid hexadecimal sha1 value. | |||||
""" | |||||
sha1 = ''.join(random.choice(hexdigits) for x in range(40)) | |||||
return just(sha1.lower()) | |||||
def invalid_sha1(): | |||||
""" | |||||
Hypothesis strategy returning an invalid sha1 representation. | |||||
""" | |||||
invalid_sha1 = ''.join(random.choice(ascii_letters) for x in range(50)) | |||||
return just(invalid_sha1.lower()) | |||||
def sha256(): | |||||
""" | |||||
Hypothesis strategy returning a valid hexadecimal sha256 value. | |||||
""" | |||||
sha256 = ''.join(random.choice(hexdigits) for x in range(64)) | |||||
return just(sha256.lower()) | |||||
Not Done Inline ActionsIt looks like these three strategies generate a single value. Shouldn't we define an auxiliary function and use it with [hypothesis.strategies.builds](https://hypothesis.readthedocs.io/en/latest/data.html#hypothesis.strategies.builds)? Or maybe make them composite strategies that take bytes as argument and build the hashes from there? vlorentz: It looks like these three strategies generate a single value. Shouldn't we define an auxiliary… | |||||
Not Done Inline ActionsAn example of what you proposed would be great ;) ardumont: An example of what you proposed would be great ;) | |||||
Done Inline ActionsIn order for the tests to not take too long to execute, I have limited the maximum number of examples generated by hypothesis to 1. So that's why I implemented this way. anlambert: In order for the tests to not take too long to execute, I have limited the maximum number of… | |||||
def content(): | |||||
""" | |||||
Hypothesis strategy returning a random content ingested | |||||
into the test archive. | |||||
""" | |||||
return _known_swh_object('contents') | |||||
def contents(): | |||||
""" | |||||
Hypothesis strategy returning random contents ingested | |||||
into the test archive. | |||||
""" | |||||
return lists(content(), min_size=2, max_size=8) | |||||
@composite | |||||
def unknown_content(draw): | |||||
""" | |||||
Hypothesis strategy returning a random content not ingested | |||||
into the test archive. | |||||
""" | |||||
tests_data = get_tests_data() | |||||
while True: | |||||
unknown_content = { | |||||
'blake2s256': draw(sha256()), | |||||
'sha1': draw(sha1()), | |||||
'sha1_git': draw(sha1()), | |||||
'sha256': draw(sha256()) | |||||
} | |||||
if unknown_content not in tests_data['contents']: | |||||
return unknown_content | |||||
def unknown_contents(): | |||||
""" | |||||
Hypothesis strategy returning random contents not ingested | |||||
into the test archive. | |||||
""" | |||||
return lists(unknown_content(), min_size=2, max_size=8) | |||||
Not Done Inline ActionsApparently, composite does some magic here (draw is not marked as optional in the main unknown_content definition). ardumont: Apparently, composite does some magic here (draw is not marked as optional in the main… | |||||
Done Inline ActionsQuoting hypothesis doc [1]:
[1] https://hypothesis.readthedocs.io/en/latest/data.html#composite-strategies anlambert: Quoting hypothesis doc [1]:
> The decorated function has the initial argument removed from the… | |||||
def directory(): | |||||
""" | |||||
Hypothesis strategy returning a random directory ingested | |||||
into the test archive. | |||||
""" | |||||
return _known_swh_object('directories') | |||||
@composite | |||||
def unknown_directory(draw): | |||||
""" | |||||
Hypothesis strategy returning a random directory not ingested | |||||
into the test archive. | |||||
""" | |||||
return _unknown_swh_object(draw, 'directories') | |||||
def origin(): | |||||
""" | |||||
Hypothesis strategy returning a random origin not ingested | |||||
into the test archive. | |||||
""" | |||||
return origins() | |||||
def visit_dates(): | |||||
""" | |||||
Hypothesis strategy returning a list of visit dates. | |||||
""" | |||||
return lists(datetimes(min_value=datetime(2015, 1, 1, 0, 0), | |||||
max_value=datetime(2018, 12, 31, 0, 0)), | |||||
min_size=2, max_size=8, unique=True) | |||||
def release(): | |||||
""" | |||||
Hypothesis strategy returning a random release ingested | |||||
into the test archive. | |||||
""" | |||||
return _known_swh_object('releases') | |||||
@composite | |||||
def unknown_release(draw): | |||||
""" | |||||
Hypothesis strategy returning a random revision not ingested | |||||
into the test archive. | |||||
""" | |||||
return _unknown_swh_object(draw, 'releases') | |||||
Not Done Inline Actionsswh ardumont: `swh` | |||||
def revision(): | |||||
""" | |||||
Hypothesis strategy returning a random revision ingested | |||||
into the test archive. | |||||
""" | |||||
return _known_swh_object('revisions') | |||||
Not Done Inline ActionsRename to something like content_sha1s_with_ctags? vlorentz: Rename to something like `content_sha1s_with_ctags`? | |||||
Done Inline Actionsack anlambert: ack | |||||
@composite | |||||
def unknown_revision(draw): | |||||
""" | |||||
Hypothesis strategy returning a random revision not ingested | |||||
into the test archive. | |||||
""" | |||||
return _unknown_swh_object(draw, 'revisions') | |||||
def snapshot(): | |||||
""" | |||||
Hypothesis strategy returning a random snapshot ingested | |||||
into the test archive. | |||||
""" | |||||
return _known_swh_object('snapshots') | |||||
@composite | |||||
def unknown_snapshot(draw): | |||||
""" | |||||
Hypothesis strategy returning a random revision not ingested | |||||
into the test archive. | |||||
""" | |||||
return _unknown_swh_object(draw, 'snapshots') | |||||
def _get_origin_dfs_revisions_walker(): | |||||
storage = get_tests_data()['storage'] | |||||
origin = random.choice(get_tests_data()['origins']) | |||||
snapshot = storage.snapshot_get_latest(origin['id']) | |||||
head = snapshot['branches'][b'HEAD']['target'] | |||||
return get_revisions_walker('dfs', storage, head) | |||||
def ancestor_revisions(): | |||||
Not Done Inline ActionsI don't understand this strategy. vlorentz: I don't understand this strategy. | |||||
Not Done Inline ActionsAntoine most probably elected this as the single revision having an ancestor relation as a building step. The docstring should be fixed to remove the plural on the sentence returning revisions. ardumont: Antoine most probably elected this as the single revision having an ancestor relation as a… | |||||
Done Inline ActionsYes, I harcoded some revision ids for the tests input by looking at the following git log ouput: ✔ ~/dev/highlightjs-line-numbers.js [master|✔] 11:58 $ git log --oneline --graph * 4ff78fc Merge pull request #52 from wcoder/dev |\ | * 00356ff Update version | * f4a0634 Fix whitespaces for not modified sources | * 9029d20 Add support nohighlight |/ * f1cdbc3 Merge pull request #49 from wcoder/dev |\ | * 35dcbb3 Update version | * d7c52cd Merge branch 'vai0-master' into dev | |\ | | * 346fb4d Merge branch 'master' of https://github.com/vai0/highlightjs-line-numbers.js into vai0-master | | |\ | | | * f3f53fe Delete highlightjs-line-numbers.min.js | | | * 0081bb8 update dist | | | * 4692cfd Merge pull request #1 from vai0/add-readyState | | | |\ | |_|/ / |/| | | | | | * 1b77273 add forgotten readyState | |_|/ |/| | | * | 0e411b1 Merge pull request #48 from anlambert/fix-duplicateMultilineNode-empty-lines | |\ \ | | |/ | |/| | | * 708a35e Update highlightjs-line-numbers.js | | * 1cf33c8 Fix output when hljs multi-line element contains an empty line | |/ |/| | * aa5ebf1 Merge branch 'master' into dev | |\ | |/ |/| * | e03ee42 Update issue templates * | 5bb5a73 Create CODE_OF_CONDUCT.md * | c0519a1 Update README.md * | 8c67b55 Update README.md * | 675285e Merge pull request #33 from wcoder/dev |\ \ | | * 1b24c7d Update version | |/ | * a007797 Update README.md | * 59f905d Fix for the last line if empty | * 352a5ba Add fix |/ * 4ab966b Merge pull request #30 from wcoder/bugfixes/multiline-nodes I agree that it could be generated programmatically, will look into it. anlambert: Yes, I harcoded some revision ids for the tests input by looking at the following `git log`… | |||||
""" | |||||
Hypothesis strategy returning a pair of revisions ingested into the | |||||
test archive with an ancestor relation. | |||||
""" | |||||
# get a dfs revisions walker for one of the origins | |||||
# loaded into the test archive | |||||
revisions_walker = _get_origin_dfs_revisions_walker() | |||||
master_revisions = [] | |||||
children = defaultdict(list) | |||||
init_rev_found = False | |||||
# get revisions only authored in the master branch | |||||
for rev in revisions_walker: | |||||
for rev_p in rev['parents']: | |||||
children[rev_p].append(rev['id']) | |||||
if not init_rev_found: | |||||
master_revisions.append(rev) | |||||
if not rev['parents']: | |||||
init_rev_found = True | |||||
# head revision | |||||
root_rev = master_revisions[0] | |||||
# pick a random revision, different from head, only authored | |||||
# in the master branch | |||||
ancestor_rev_idx = random.choice(list(range(1, len(master_revisions)-1))) | |||||
ancestor_rev = master_revisions[ancestor_rev_idx] | |||||
ancestor_child_revs = children[ancestor_rev['id']] | |||||
return just({ | |||||
'sha1_git_root': hash_to_hex(root_rev['id']), | |||||
'sha1_git': hash_to_hex(ancestor_rev['id']), | |||||
'children': [hash_to_hex(r) for r in ancestor_child_revs] | |||||
}) | |||||
def non_ancestor_revisions(): | |||||
""" | |||||
Hypothesis strategy returning a pair of revisions ingested into the | |||||
test archive with no ancestor relation. | |||||
Not Done Inline Actionswith no ancestor relation ardumont: `with no ancestor relation` | |||||
""" | |||||
# get a dfs revisions walker for one of the origins | |||||
# loaded into the test archive | |||||
revisions_walker = _get_origin_dfs_revisions_walker() | |||||
merge_revs = [] | |||||
children = defaultdict(list) | |||||
# get all merge revisions | |||||
for rev in revisions_walker: | |||||
if len(rev['parents']) > 1: | |||||
merge_revs.append(rev) | |||||
for rev_p in rev['parents']: | |||||
children[rev_p].append(rev['id']) | |||||
# find a merge revisions whose parents have a unique child revision | |||||
Not Done Inline Actionswhose parents ardumont: `whose parents` | |||||
random.shuffle(merge_revs) | |||||
selected_revs = None | |||||
for merge_rev in merge_revs: | |||||
if all(len(children[rev_p]) == 1 | |||||
for rev_p in merge_rev['parents']): | |||||
selected_revs = merge_rev['parents'] | |||||
return just({ | |||||
'sha1_git_root': hash_to_hex(selected_revs[0]), | |||||
'sha1_git': hash_to_hex(selected_revs[1]) | |||||
}) | |||||
# The following strategies returns data specific to some tests | |||||
# that can not be generated and thus are hardcoded. | |||||
def contents_with_ctags(): | |||||
""" | |||||
Hypothesis strategy returning contents ingested into the test | |||||
archive. Those contents are ctags compatible, that is running | |||||
Not Done Inline Actionstest archive. Those contents are ctags compatible, that is running ctags on those lay results. ardumont: `test archive. Those contents are ctags compatible, that is running ctags on those lay results.` | |||||
ctags on those lay results. | |||||
""" | |||||
return just({ | |||||
'sha1s': ['0ab37c02043ebff946c1937523f60aadd0844351', | |||||
'15554cf7608dde6bfefac7e3d525596343a85b6f', | |||||
'2ce837f1489bdfb8faf3ebcc7e72421b5bea83bd', | |||||
'30acd0b47fc25e159e27a980102ddb1c4bea0b95', | |||||
'4f81f05aaea3efb981f9d90144f746d6b682285b', | |||||
'5153aa4b6e4455a62525bc4de38ed0ff6e7dd682', | |||||
'59d08bafa6a749110dfb65ba43a61963d5a5bf9f', | |||||
'7568285b2d7f31ae483ae71617bd3db873deaa2c', | |||||
'7ed3ee8e94ac52ba983dd7690bdc9ab7618247b4', | |||||
'8ed7ef2e7ff9ed845e10259d08e4145f1b3b5b03', | |||||
'9b3557f1ab4111c8607a4f2ea3c1e53c6992916c', | |||||
'9c20da07ed14dc4fcd3ca2b055af99b2598d8bdd', | |||||
'c20ceebd6ec6f7a19b5c3aebc512a12fbdc9234b', | |||||
'e89e55a12def4cd54d5bff58378a3b5119878eb7', | |||||
'e8c0654fe2d75ecd7e0b01bee8a8fc60a130097e', | |||||
'eb6595e559a1d34a2b41e8d4835e0e4f98a5d2b5'], | |||||
'symbol_name': 'ABS' | |||||
}) |
swh