Changeset View
Changeset View
Standalone View
Standalone View
swh/search/tests/test_search.py
# Copyright (C) 2019-2020 The Software Heritage developers | # Copyright (C) 2019-2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from itertools import combinations | |||||
from hypothesis import given, settings, strategies | from hypothesis import given, settings, strategies | ||||
from swh.core.api.classes import stream_results | from swh.core.api.classes import stream_results | ||||
class CommonSearchTest: | class CommonSearchTest: | ||||
def test_origin_url_unique_word_prefix(self): | def test_origin_url_unique_word_prefix(self): | ||||
origin_foobar_baz = {"url": "http://foobar.baz"} | origin_foobar_baz = {"url": "http://foobar.baz"} | ||||
▲ Show 20 Lines • Show All 82 Lines • ▼ Show 20 Lines | def test_origin_with_visit_added(self): | ||||
[{**o, "has_visits": True} for o in [origin_foobar_baz]] | [{**o, "has_visits": True} for o in [origin_foobar_baz]] | ||||
) | ) | ||||
self.search.flush() | self.search.flush() | ||||
actual_page = self.search.origin_search(url_pattern="foobar", with_visit=True) | actual_page = self.search.origin_search(url_pattern="foobar", with_visit=True) | ||||
assert actual_page.next_page_token is None | assert actual_page.next_page_token is None | ||||
assert actual_page.results == [origin_foobar_baz] | assert actual_page.results == [origin_foobar_baz] | ||||
def test_origin_no_visit_types_search(self): | |||||
origins = [{"url": "http://foobar.baz"}] | |||||
self.search.origin_update(origins) | |||||
self.search.flush() | |||||
actual_page = self.search.origin_search(url_pattern="http", visit_types=["git"]) | |||||
assert actual_page.next_page_token is None | |||||
results = [r["url"] for r in actual_page.results] | |||||
expected_results = [] | |||||
assert sorted(results) == sorted(expected_results) | |||||
actual_page = self.search.origin_search(url_pattern="http", visit_types=None) | |||||
assert actual_page.next_page_token is None | |||||
results = [r["url"] for r in actual_page.results] | |||||
expected_results = [origin["url"] for origin in origins] | |||||
assert sorted(results) == sorted(expected_results) | |||||
def test_origin_visit_types_search(self): | |||||
origins = [ | |||||
{"url": "http://foobar.baz", "visit_types": ["git"]}, | |||||
{"url": "http://barbaz.qux", "visit_types": ["svn"]}, | |||||
{"url": "http://qux.quux", "visit_types": ["hg"]}, | |||||
] | |||||
self.search.origin_update(origins) | |||||
self.search.flush() | |||||
for origin in origins: | |||||
actual_page = self.search.origin_search( | |||||
url_pattern="http", visit_types=origin["visit_types"] | |||||
) | |||||
vlorentz: The function name should mention updates (as it's what it's really testing); and there should… | |||||
Done Inline Actionsack anlambert: ack | |||||
assert actual_page.next_page_token is None | |||||
results = [r["url"] for r in actual_page.results] | |||||
expected_results = [origin["url"]] | |||||
assert sorted(results) == sorted(expected_results) | |||||
actual_page = self.search.origin_search(url_pattern="http", visit_types=None) | |||||
assert actual_page.next_page_token is None | |||||
results = [r["url"] for r in actual_page.results] | |||||
expected_results = [origin["url"] for origin in origins] | |||||
assert sorted(results) == sorted(expected_results) | |||||
def test_origin_visit_types_update_search(self): | |||||
origins = [ | |||||
{"url": "http://foobar.baz", "visit_types": ["git"]}, | |||||
{"url": "http://foobar.baz", "visit_types": ["hg"]}, | |||||
{"url": "http://foobar.baz", "visit_types": ["svn"]}, | |||||
] | |||||
visit_types = [] | |||||
for origin in origins: | |||||
self.search.origin_update([origin]) | |||||
self.search.flush() | |||||
visit_types += origin["visit_types"] | |||||
for i in range(len(visit_types)): | |||||
for types in combinations(visit_types, r=i + 1): | |||||
actual_page = self.search.origin_search( | |||||
url_pattern="http", visit_types=list(types) | |||||
) | |||||
Not Done Inline ActionsIMO this is clearer vlorentz: IMO this is clearer | |||||
assert actual_page.next_page_token is None | |||||
Not Done Inline Actionscould you split this into two functions? eg. add_visit_type and check_visit_types. vlorentz: could you split this into two functions? eg. `add_visit_type` and `check_visit_types`. | |||||
results = [r["url"] for r in actual_page.results] | |||||
expected_results = [origin["url"]] | |||||
Not Done Inline ActionsWhat does it add to test_origin_with_multiple_visit_types_search? vlorentz: What does it add to `test_origin_with_multiple_visit_types_search`? | |||||
Done Inline ActionsIt tests combination of visit types when searching but I guess I could merge that test code in the one above. anlambert: It tests combination of visit types when searching but I guess I could merge that test code in… | |||||
assert sorted(results) == sorted(expected_results) | |||||
def test_origin_intrinsic_metadata_description(self): | def test_origin_intrinsic_metadata_description(self): | ||||
origin1_nothin = {"url": "http://origin1"} | origin1_nothin = {"url": "http://origin1"} | ||||
Done Inline ActionsI find this to be harder to read than the code being tested. Could you keep it simpler, by removing all the loops and writing the origin types explicitly every time? vlorentz: I find this to be harder to read than the code being tested.
Could you keep it simpler, by… | |||||
Done Inline ActionsAck, will simplify a bit anlambert: Ack, will simplify a bit | |||||
origin2_foobar = {"url": "http://origin2"} | origin2_foobar = {"url": "http://origin2"} | ||||
origin3_barbaz = {"url": "http://origin3"} | origin3_barbaz = {"url": "http://origin3"} | ||||
self.search.origin_update( | self.search.origin_update( | ||||
[ | [ | ||||
{**origin1_nothin, "intrinsic_metadata": {},}, | {**origin1_nothin, "intrinsic_metadata": {},}, | ||||
{ | { | ||||
**origin2_foobar, | **origin2_foobar, | ||||
▲ Show 20 Lines • Show All 195 Lines • ▼ Show 20 Lines | def test_origin_intrinsic_metadata_inconsistent_type(self): | ||||
actual_page = self.search.origin_search(metadata_pattern="baz qux") | actual_page = self.search.origin_search(metadata_pattern="baz qux") | ||||
assert actual_page.next_page_token is None | assert actual_page.next_page_token is None | ||||
assert actual_page.results == [origin3_bazqux] | assert actual_page.results == [origin3_bazqux] | ||||
actual_page = self.search.origin_search(metadata_pattern="foo bar") | actual_page = self.search.origin_search(metadata_pattern="foo bar") | ||||
assert actual_page.next_page_token is None | assert actual_page.next_page_token is None | ||||
assert actual_page.results == [origin1_foobar] | assert actual_page.results == [origin1_foobar] | ||||
# TODO: add more tests with more codemeta terms | # TODO: add more tests with more codemeta terms | ||||
# TODO: add more tests with edge cases | # TODO: add more tests with edge cases | ||||
@settings(deadline=None) | @settings(deadline=None) | ||||
@given(strategies.integers(min_value=1, max_value=4)) | @given(strategies.integers(min_value=1, max_value=4)) | ||||
def test_origin_url_paging(self, limit): | def test_origin_url_paging(self, limit): | ||||
# TODO: no hypothesis | # TODO: no hypothesis | ||||
origin1_foo = {"url": "http://origin1/foo"} | origin1_foo = {"url": "http://origin1/foo"} | ||||
origin2_foobar = {"url": "http://origin2/foo/bar"} | origin2_foobar = {"url": "http://origin2/foo/bar"} | ||||
origin3_foobarbaz = {"url": "http://origin3/foo/bar/baz"} | origin3_foobarbaz = {"url": "http://origin3/foo/bar/baz"} | ||||
self.reset() | self.reset() | ||||
self.search.origin_update([origin1_foo, origin2_foobar, origin3_foobarbaz]) | self.search.origin_update([origin1_foo, origin2_foobar, origin3_foobarbaz]) | ||||
self.search.flush() | self.search.flush() | ||||
results = stream_results( | results = stream_results( | ||||
self.search.origin_search, url_pattern="foo bar baz", limit=limit | self.search.origin_search, url_pattern="foo bar baz", limit=limit | ||||
) | ) | ||||
results = [res["url"] for res in results] | results = [res["url"] for res in results] | ||||
expected_results = [o["url"] for o in [origin3_foobarbaz]] | expected_results = [o["url"] for o in [origin3_foobarbaz]] | ||||
assert sorted(results[0 : len(expected_results)]) == sorted(expected_results) | assert sorted(results[0 : len(expected_results)]) == sorted(expected_results) | ||||
results = stream_results( | results = stream_results( | ||||
Not Done Inline Actionswhy this new test? vlorentz: why this new test? | |||||
Done Inline ActionsAs I have modified the way document gets updated in elasticsearch, I added a new commit with that test to ensure no update regression for metadata. anlambert: As I have modified the way document gets updated in elasticsearch, I added a new commit with… | |||||
self.search.origin_search, url_pattern="foo bar", limit=limit | self.search.origin_search, url_pattern="foo bar", limit=limit | ||||
) | ) | ||||
results = [res["url"] for res in results] | results = [res["url"] for res in results] | ||||
expected_results = [o["url"] for o in [origin2_foobar, origin3_foobarbaz]] | expected_results = [o["url"] for o in [origin2_foobar, origin3_foobarbaz]] | ||||
assert sorted(results[0 : len(expected_results)]) == sorted(expected_results) | assert sorted(results[0 : len(expected_results)]) == sorted(expected_results) | ||||
results = stream_results( | results = stream_results( | ||||
self.search.origin_search, url_pattern="foo", limit=limit | self.search.origin_search, url_pattern="foo", limit=limit | ||||
▲ Show 20 Lines • Show All 57 Lines • Show Last 20 Lines |
The function name should mention updates (as it's what it's really testing); and there should be a separate test for updating the origins in a separate step (ie. self.search.origin_update([{"url": "http://foobar.baz", "visit_types": ["git", "svn"]}]) + self.search.origin_update([{"url": "http://foobar.baz", "visit_types": ["hg"]}]))