Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9341227
strategies.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
15 KB
Subscribers
None
strategies.py
View Options
# Copyright (C) 2018-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
import
random
from
collections
import
defaultdict
from
datetime
import
datetime
from
hypothesis
import
settings
,
assume
from
hypothesis.strategies
import
(
just
,
sampled_from
,
lists
,
composite
,
datetimes
,
integers
,
binary
,
text
,
characters
)
from
swh.model.hashutil
import
hash_to_hex
,
hash_to_bytes
from
swh.model.identifiers
import
directory_identifier
from
swh.storage.algos.revisions_walker
import
get_revisions_walker
from
swh.model.hypothesis_strategies
import
(
origins
as
new_origin_strategy
,
snapshots
as
new_snapshot
)
from
swh.web.tests.data
import
get_tests_data
# Module dedicated to the generation of input data for tests through
# the use of hypothesis.
# Some of these data are sampled from a test archive created and populated
# in the swh.web.tests.data module.
# Set the swh-web hypothesis profile if none has been explicitly set
hypothesis_default_settings
=
settings
.
get_profile
(
'default'
)
if
repr
(
settings
())
==
repr
(
hypothesis_default_settings
):
settings
.
load_profile
(
'swh-web'
)
# The following strategies exploit the hypothesis capabilities
def
_filter_checksum
(
cs
):
generated_checksums
=
get_tests_data
()[
'generated_checksums'
]
if
not
int
.
from_bytes
(
cs
,
byteorder
=
'little'
)
or
\
cs
in
generated_checksums
:
return
False
generated_checksums
.
add
(
cs
)
return
True
def
_known_swh_object
(
object_type
):
return
sampled_from
(
get_tests_data
()[
object_type
])
def
sha1
():
"""
Hypothesis strategy returning a valid hexadecimal sha1 value.
"""
return
binary
(
min_size
=
20
,
max_size
=
20
)
.
filter
(
_filter_checksum
)
.
map
(
hash_to_hex
)
def
invalid_sha1
():
"""
Hypothesis strategy returning an invalid sha1 representation.
"""
return
binary
(
min_size
=
50
,
max_size
=
50
)
.
filter
(
_filter_checksum
)
.
map
(
hash_to_hex
)
def
sha256
():
"""
Hypothesis strategy returning a valid hexadecimal sha256 value.
"""
return
binary
(
min_size
=
32
,
max_size
=
32
)
.
filter
(
_filter_checksum
)
.
map
(
hash_to_hex
)
def
content
():
"""
Hypothesis strategy returning a random content ingested
into the test archive.
"""
return
_known_swh_object
(
'contents'
)
def
contents
():
"""
Hypothesis strategy returning random contents ingested
into the test archive.
"""
return
lists
(
content
(),
min_size
=
2
,
max_size
=
8
)
def
content_text
():
"""
Hypothesis strategy returning random textual contents ingested
into the test archive.
"""
return
content
()
.
filter
(
lambda
c
:
c
[
'mimetype'
]
.
startswith
(
'text/'
))
def
content_text_non_utf8
():
"""
Hypothesis strategy returning random textual contents not encoded
to UTF-8 ingested into the test archive.
"""
return
content
()
.
filter
(
lambda
c
:
c
[
'mimetype'
]
.
startswith
(
'text/'
)
and
c
[
'encoding'
]
not
in
(
'utf-8'
,
'us-ascii'
))
def
content_text_no_highlight
():
"""
Hypothesis strategy returning random textual contents with no detected
programming language to highlight ingested into the test archive.
"""
return
content
()
.
filter
(
lambda
c
:
c
[
'mimetype'
]
.
startswith
(
'text/'
)
and
c
[
'hljs_language'
]
==
'nohighlight'
)
def
content_image_type
():
"""
Hypothesis strategy returning random image contents ingested
into the test archive.
"""
return
content
()
.
filter
(
lambda
c
:
c
[
'mimetype'
]
.
startswith
(
'image/'
))
@composite
def
new_content
(
draw
):
blake2s256_hex
=
draw
(
sha256
())
sha1_hex
=
draw
(
sha1
())
sha1_git_hex
=
draw
(
sha1
())
sha256_hex
=
draw
(
sha256
())
assume
(
sha1_hex
!=
sha1_git_hex
)
assume
(
blake2s256_hex
!=
sha256_hex
)
return
{
'blake2S256'
:
blake2s256_hex
,
'sha1'
:
sha1_hex
,
'sha1_git'
:
sha1_git_hex
,
'sha256'
:
sha256_hex
}
def
unknown_content
():
"""
Hypothesis strategy returning a random content not ingested
into the test archive.
"""
return
new_content
()
.
filter
(
lambda
c
:
next
(
get_tests_data
()[
'storage'
]
.
content_get
(
[
hash_to_bytes
(
c
[
'sha1'
])]))
is
None
)
def
unknown_contents
():
"""
Hypothesis strategy returning random contents not ingested
into the test archive.
"""
return
lists
(
unknown_content
(),
min_size
=
2
,
max_size
=
8
)
def
directory
():
"""
Hypothesis strategy returning a random directory ingested
into the test archive.
"""
return
_known_swh_object
(
'directories'
)
def
directory_with_subdirs
():
"""
Hypothesis strategy returning a random directory containing
sub directories ingested into the test archive.
"""
storage
=
get_tests_data
()[
'storage'
]
return
directory
()
.
filter
(
lambda
d
:
any
([
e
[
'type'
]
==
'dir'
for
e
in
list
(
storage
.
directory_ls
(
hash_to_bytes
(
d
)))]))
def
empty_directory
():
"""
Hypothesis strategy returning the empty directory ingested
into the test archive.
"""
return
just
(
directory_identifier
({
'entries'
:
[]}))
def
unknown_directory
():
"""
Hypothesis strategy returning a random directory not ingested
into the test archive.
"""
storage
=
get_tests_data
()[
'storage'
]
return
sha1
()
.
filter
(
lambda
s
:
len
(
list
(
storage
.
directory_missing
([
hash_to_bytes
(
s
)])))
>
0
)
def
origin
():
"""
Hypothesis strategy returning a random origin ingested
into the test archive.
"""
return
_known_swh_object
(
'origins'
)
def
origin_with_multiple_visits
():
"""
Hypothesis strategy returning a random origin ingested
into the test archive.
"""
ret
=
[]
tests_data
=
get_tests_data
()
for
origin
in
tests_data
[
'origins'
]:
visits
=
list
(
tests_data
[
'storage'
]
.
origin_visit_get
(
origin
[
'url'
]))
if
len
(
visits
)
>
1
:
ret
.
append
(
origin
)
return
sampled_from
(
ret
)
def
origin_with_release
():
"""
Hypothesis strategy returning a random origin ingested
into the test archive.
"""
ret
=
[]
tests_data
=
get_tests_data
()
for
origin
in
tests_data
[
'origins'
]:
snapshot
=
tests_data
[
'storage'
]
.
snapshot_get_latest
(
origin
[
'url'
])
if
any
([
b
[
'target_type'
]
==
'release'
for
b
in
snapshot
[
'branches'
]
.
values
()]):
ret
.
append
(
origin
)
return
sampled_from
(
ret
)
def
unknown_origin_id
():
"""
Hypothesis strategy returning a random origin id not ingested
into the test archive.
"""
return
integers
(
min_value
=
1000000
)
def
new_origin
():
"""
Hypothesis strategy returning a random origin not ingested
into the test archive.
"""
storage
=
get_tests_data
()[
'storage'
]
return
new_origin_strategy
()
.
map
(
lambda
origin
:
origin
.
to_dict
())
.
filter
(
lambda
origin
:
storage
.
origin_get
([
origin
])[
0
]
is
None
)
def
new_origins
(
nb_origins
=
None
):
"""
Hypothesis strategy returning random origins not ingested
into the test archive.
"""
min_size
=
nb_origins
if
nb_origins
is
not
None
else
2
max_size
=
nb_origins
if
nb_origins
is
not
None
else
8
size
=
random
.
randint
(
min_size
,
max_size
)
return
lists
(
new_origin
(),
min_size
=
size
,
max_size
=
size
,
unique_by
=
lambda
o
:
tuple
(
sorted
(
o
.
items
())))
def
visit_dates
(
nb_dates
=
None
):
"""
Hypothesis strategy returning a list of visit dates.
"""
min_size
=
nb_dates
if
nb_dates
else
2
max_size
=
nb_dates
if
nb_dates
else
8
return
lists
(
datetimes
(
min_value
=
datetime
(
2015
,
1
,
1
,
0
,
0
),
max_value
=
datetime
(
2018
,
12
,
31
,
0
,
0
)),
min_size
=
min_size
,
max_size
=
max_size
,
unique
=
True
)
.
map
(
sorted
)
def
release
():
"""
Hypothesis strategy returning a random release ingested
into the test archive.
"""
return
_known_swh_object
(
'releases'
)
def
unknown_release
():
"""
Hypothesis strategy returning a random revision not ingested
into the test archive.
"""
return
sha1
()
.
filter
(
lambda
s
:
next
(
get_tests_data
()[
'storage'
]
.
release_get
([
s
]))
is
None
)
def
revision
():
"""
Hypothesis strategy returning a random revision ingested
into the test archive.
"""
return
_known_swh_object
(
'revisions'
)
def
unknown_revision
():
"""
Hypothesis strategy returning a random revision not ingested
into the test archive.
"""
storage
=
get_tests_data
()[
'storage'
]
return
sha1
()
.
filter
(
lambda
s
:
next
(
storage
.
revision_get
([
hash_to_bytes
(
s
)]))
is
None
)
@composite
def
new_person
(
draw
):
"""
Hypothesis strategy returning random raw swh person data.
"""
name
=
draw
(
text
(
min_size
=
5
,
max_size
=
30
,
alphabet
=
characters
(
min_codepoint
=
0
,
max_codepoint
=
255
)))
email
=
'
%s
@company.org'
%
name
return
{
'name'
:
name
.
encode
(),
'email'
:
email
.
encode
(),
'fullname'
:
(
'
%s
<
%s
>'
%
(
name
,
email
))
.
encode
()
}
@composite
def
new_swh_date
(
draw
):
"""
Hypothesis strategy returning random raw swh date data.
"""
timestamp
=
draw
(
datetimes
(
min_value
=
datetime
(
2015
,
1
,
1
,
0
,
0
),
max_value
=
datetime
(
2018
,
12
,
31
,
0
,
0
))
.
map
(
lambda
d
:
int
(
d
.
timestamp
())))
return
{
'timestamp'
:
timestamp
,
'offset'
:
0
,
'negative_utc'
:
False
,
}
@composite
def
new_revision
(
draw
):
"""
Hypothesis strategy returning random raw swh revision data
not ingested into the test archive.
"""
return
{
'id'
:
draw
(
unknown_revision
()
.
map
(
hash_to_bytes
)),
'directory'
:
draw
(
sha1
()
.
map
(
hash_to_bytes
)),
'author'
:
draw
(
new_person
()),
'committer'
:
draw
(
new_person
()),
'message'
:
draw
(
text
(
min_size
=
20
,
max_size
=
100
)
.
map
(
lambda
t
:
t
.
encode
())),
'date'
:
draw
(
new_swh_date
()),
'committer_date'
:
draw
(
new_swh_date
()),
'synthetic'
:
False
,
'type'
:
'git'
,
'parents'
:
[],
'metadata'
:
[],
}
def
revisions
(
min_size
=
2
,
max_size
=
8
):
"""
Hypothesis strategy returning random revisions ingested
into the test archive.
"""
return
lists
(
revision
(),
min_size
=
min_size
,
max_size
=
max_size
)
def
unknown_revisions
(
min_size
=
2
,
max_size
=
8
):
"""
Hypothesis strategy returning random revisions not ingested
into the test archive.
"""
return
lists
(
unknown_revision
(),
min_size
=
min_size
,
max_size
=
max_size
)
def
snapshot
():
"""
Hypothesis strategy returning a random snapshot ingested
into the test archive.
"""
return
_known_swh_object
(
'snapshots'
)
def
new_snapshots
(
nb_snapshots
=
None
):
min_size
=
nb_snapshots
if
nb_snapshots
else
2
max_size
=
nb_snapshots
if
nb_snapshots
else
8
return
lists
(
new_snapshot
(
min_size
=
2
,
max_size
=
10
,
only_objects
=
True
)
.
map
(
lambda
snp
:
snp
.
to_dict
()),
min_size
=
min_size
,
max_size
=
max_size
)
def
unknown_snapshot
():
"""
Hypothesis strategy returning a random revision not ingested
into the test archive.
"""
storage
=
get_tests_data
()[
'storage'
]
return
sha1
()
.
filter
(
lambda
s
:
storage
.
snapshot_get
(
hash_to_bytes
(
s
))
is
None
)
def
person
():
"""
Hypothesis strategy returning a random person ingested
into the test archive.
"""
return
_known_swh_object
(
'persons'
)
def
unknown_person
():
"""
Hypothesis strategy returning a random person not ingested
into the test archive.
"""
return
integers
(
min_value
=
1000000
)
def
_get_origin_dfs_revisions_walker
():
tests_data
=
get_tests_data
()
storage
=
tests_data
[
'storage'
]
origin
=
random
.
choice
(
tests_data
[
'origins'
][:
-
1
])
snapshot
=
storage
.
snapshot_get_latest
(
origin
[
'url'
])
head
=
snapshot
[
'branches'
][
b
'HEAD'
][
'target'
]
return
get_revisions_walker
(
'dfs'
,
storage
,
head
)
def
ancestor_revisions
():
"""
Hypothesis strategy returning a pair of revisions ingested into the
test archive with an ancestor relation.
"""
# get a dfs revisions walker for one of the origins
# loaded into the test archive
revisions_walker
=
_get_origin_dfs_revisions_walker
()
master_revisions
=
[]
children
=
defaultdict
(
list
)
init_rev_found
=
False
# get revisions only authored in the master branch
for
rev
in
revisions_walker
:
for
rev_p
in
rev
[
'parents'
]:
children
[
rev_p
]
.
append
(
rev
[
'id'
])
if
not
init_rev_found
:
master_revisions
.
append
(
rev
)
if
not
rev
[
'parents'
]:
init_rev_found
=
True
# head revision
root_rev
=
master_revisions
[
0
]
# pick a random revision, different from head, only authored
# in the master branch
ancestor_rev_idx
=
random
.
choice
(
list
(
range
(
1
,
len
(
master_revisions
)
-
1
)))
ancestor_rev
=
master_revisions
[
ancestor_rev_idx
]
ancestor_child_revs
=
children
[
ancestor_rev
[
'id'
]]
return
just
({
'sha1_git_root'
:
hash_to_hex
(
root_rev
[
'id'
]),
'sha1_git'
:
hash_to_hex
(
ancestor_rev
[
'id'
]),
'children'
:
[
hash_to_hex
(
r
)
for
r
in
ancestor_child_revs
]
})
def
non_ancestor_revisions
():
"""
Hypothesis strategy returning a pair of revisions ingested into the
test archive with no ancestor relation.
"""
# get a dfs revisions walker for one of the origins
# loaded into the test archive
revisions_walker
=
_get_origin_dfs_revisions_walker
()
merge_revs
=
[]
children
=
defaultdict
(
list
)
# get all merge revisions
for
rev
in
revisions_walker
:
if
len
(
rev
[
'parents'
])
>
1
:
merge_revs
.
append
(
rev
)
for
rev_p
in
rev
[
'parents'
]:
children
[
rev_p
]
.
append
(
rev
[
'id'
])
# find a merge revisions whose parents have a unique child revision
random
.
shuffle
(
merge_revs
)
selected_revs
=
None
for
merge_rev
in
merge_revs
:
if
all
(
len
(
children
[
rev_p
])
==
1
for
rev_p
in
merge_rev
[
'parents'
]):
selected_revs
=
merge_rev
[
'parents'
]
return
just
({
'sha1_git_root'
:
hash_to_hex
(
selected_revs
[
0
]),
'sha1_git'
:
hash_to_hex
(
selected_revs
[
1
])
})
# The following strategies returns data specific to some tests
# that can not be generated and thus are hardcoded.
def
contents_with_ctags
():
"""
Hypothesis strategy returning contents ingested into the test
archive. Those contents are ctags compatible, that is running
ctags on those lay results.
"""
return
just
({
'sha1s'
:
[
'0ab37c02043ebff946c1937523f60aadd0844351'
,
'15554cf7608dde6bfefac7e3d525596343a85b6f'
,
'2ce837f1489bdfb8faf3ebcc7e72421b5bea83bd'
,
'30acd0b47fc25e159e27a980102ddb1c4bea0b95'
,
'4f81f05aaea3efb981f9d90144f746d6b682285b'
,
'5153aa4b6e4455a62525bc4de38ed0ff6e7dd682'
,
'59d08bafa6a749110dfb65ba43a61963d5a5bf9f'
,
'7568285b2d7f31ae483ae71617bd3db873deaa2c'
,
'7ed3ee8e94ac52ba983dd7690bdc9ab7618247b4'
,
'8ed7ef2e7ff9ed845e10259d08e4145f1b3b5b03'
,
'9b3557f1ab4111c8607a4f2ea3c1e53c6992916c'
,
'9c20da07ed14dc4fcd3ca2b055af99b2598d8bdd'
,
'c20ceebd6ec6f7a19b5c3aebc512a12fbdc9234b'
,
'e89e55a12def4cd54d5bff58378a3b5119878eb7'
,
'e8c0654fe2d75ecd7e0b01bee8a8fc60a130097e'
,
'eb6595e559a1d34a2b41e8d4835e0e4f98a5d2b5'
],
'symbol_name'
:
'ABS'
})
def
revision_with_submodules
():
"""
Hypothesis strategy returning a revision that is known to
point to a directory with revision entries (aka git submodule)
"""
return
just
({
'rev_sha1_git'
:
'ffcb69001f3f6745dfd5b48f72ab6addb560e234'
,
'rev_dir_sha1_git'
:
'd92a21446387fa28410e5a74379c934298f39ae2'
,
'rev_dir_rev_path'
:
'libtess2'
})
File Metadata
Details
Attached
Mime Type
text/x-python
Expires
Fri, Jul 4, 11:51 AM (3 w, 2 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3389779
Attached To
rDWAPPS Web applications
Event Timeline
Log In to Comment