Changeset View
Changeset View
Standalone View
Standalone View
swh/search/tests/test_search.py
# Copyright (C) 2019-2020 The Software Heritage developers | # Copyright (C) 2019-2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from itertools import combinations | |||||
from hypothesis import given, settings, strategies | from hypothesis import given, settings, strategies | ||||
from swh.core.api.classes import stream_results | from swh.core.api.classes import stream_results | ||||
class CommonSearchTest: | class CommonSearchTest: | ||||
def test_origin_url_unique_word_prefix(self): | def test_origin_url_unique_word_prefix(self): | ||||
origin_foobar_baz = {"url": "http://foobar.baz"} | origin_foobar_baz = {"url": "http://foobar.baz"} | ||||
▲ Show 20 Lines • Show All 82 Lines • ▼ Show 20 Lines | def test_origin_with_visit_added(self): | ||||
[{**o, "has_visits": True} for o in [origin_foobar_baz]] | [{**o, "has_visits": True} for o in [origin_foobar_baz]] | ||||
) | ) | ||||
self.search.flush() | self.search.flush() | ||||
actual_page = self.search.origin_search(url_pattern="foobar", with_visit=True) | actual_page = self.search.origin_search(url_pattern="foobar", with_visit=True) | ||||
assert actual_page.next_page_token is None | assert actual_page.next_page_token is None | ||||
assert actual_page.results == [origin_foobar_baz] | assert actual_page.results == [origin_foobar_baz] | ||||
def test_origin_no_visit_types_search(self): | |||||
origins = [{"url": "http://foobar.baz"}] | |||||
self.search.origin_update(origins) | |||||
self.search.flush() | |||||
actual_page = self.search.origin_search(url_pattern="http", visit_types=["git"]) | |||||
assert actual_page.next_page_token is None | |||||
results = [r["url"] for r in actual_page.results] | |||||
expected_results = [] | |||||
assert sorted(results) == sorted(expected_results) | |||||
actual_page = self.search.origin_search(url_pattern="http", visit_types=None) | |||||
assert actual_page.next_page_token is None | |||||
results = [r["url"] for r in actual_page.results] | |||||
expected_results = [origin["url"] for origin in origins] | |||||
assert sorted(results) == sorted(expected_results) | |||||
def test_origin_visit_types_search(self): | |||||
origins = [ | |||||
{"url": "http://foobar.baz", "visit_types": ["git"]}, | |||||
{"url": "http://barbaz.qux", "visit_types": ["svn"]}, | |||||
{"url": "http://qux.quux", "visit_types": ["hg"]}, | |||||
] | |||||
self.search.origin_update(origins) | |||||
self.search.flush() | |||||
for origin in origins: | |||||
actual_page = self.search.origin_search( | |||||
url_pattern="http", visit_types=origin["visit_types"] | |||||
) | |||||
vlorentz: The function name should mention updates (as it's what it's really testing); and there should… | |||||
Done Inline Actionsack anlambert: ack | |||||
assert actual_page.next_page_token is None | |||||
results = [r["url"] for r in actual_page.results] | |||||
expected_results = [origin["url"]] | |||||
assert sorted(results) == sorted(expected_results) | |||||
actual_page = self.search.origin_search(url_pattern="http", visit_types=None) | |||||
assert actual_page.next_page_token is None | |||||
results = [r["url"] for r in actual_page.results] | |||||
expected_results = [origin["url"] for origin in origins] | |||||
assert sorted(results) == sorted(expected_results) | |||||
def test_origin_visit_types_update_search(self): | |||||
self.search.origin_update([{"url": "http://foobar.baz"}]) | |||||
self.search.flush() | |||||
origins = [ | |||||
{"url": "http://foobar.baz", "visit_types": ["git"]}, | |||||
{"url": "http://foobar.baz", "visit_types": ["hg"]}, | |||||
{"url": "http://foobar.baz", "visit_types": ["svn"]}, | |||||
] | |||||
visit_types = [] | |||||
for origin in origins: | |||||
self.search.origin_update([origin]) | |||||
self.search.flush() | |||||
visit_types += origin["visit_types"] | |||||
for i in range(len(visit_types)): | |||||
for types in combinations(visit_types, r=i + 1): | |||||
vlorentzUnsubmitted Not Done Inline ActionsIMO this is clearer vlorentz: IMO this is clearer | |||||
Not Done Inline Actionscould you split this into two functions? eg. add_visit_type and check_visit_types. vlorentz: could you split this into two functions? eg. `add_visit_type` and `check_visit_types`. | |||||
actual_page = self.search.origin_search( | |||||
url_pattern="http", visit_types=list(types) | |||||
Not Done Inline ActionsWhat does it add to test_origin_with_multiple_visit_types_search? vlorentz: What does it add to `test_origin_with_multiple_visit_types_search`? | |||||
Done Inline ActionsIt tests combination of visit types when searching but I guess I could merge that test code in the one above. anlambert: It tests combination of visit types when searching but I guess I could merge that test code in… | |||||
) | |||||
assert actual_page.next_page_token is None | |||||
results = [r["url"] for r in actual_page.results] | |||||
expected_results = [origin["url"]] | |||||
assert sorted(results) == sorted(expected_results) | |||||
vlorentzUnsubmitted Done Inline ActionsI find this to be harder to read than the code being tested. Could you keep it simpler, by removing all the loops and writing the origin types explicitly every time? vlorentz: I find this to be harder to read than the code being tested.
Could you keep it simpler, by… | |||||
anlambertAuthorUnsubmitted Done Inline ActionsAck, will simplify a bit anlambert: Ack, will simplify a bit | |||||
def test_origin_intrinsic_metadata_description(self): | def test_origin_intrinsic_metadata_description(self): | ||||
origin1_nothin = {"url": "http://origin1"} | origin1_nothin = {"url": "http://origin1"} | ||||
origin2_foobar = {"url": "http://origin2"} | origin2_foobar = {"url": "http://origin2"} | ||||
origin3_barbaz = {"url": "http://origin3"} | origin3_barbaz = {"url": "http://origin3"} | ||||
self.search.origin_update( | self.search.origin_update( | ||||
[ | [ | ||||
{**origin1_nothin, "intrinsic_metadata": {},}, | {**origin1_nothin, "intrinsic_metadata": {},}, | ||||
▲ Show 20 Lines • Show All 198 Lines • ▼ Show 20 Lines | def test_origin_intrinsic_metadata_inconsistent_type(self): | ||||
actual_page = self.search.origin_search(metadata_pattern="baz qux") | actual_page = self.search.origin_search(metadata_pattern="baz qux") | ||||
assert actual_page.next_page_token is None | assert actual_page.next_page_token is None | ||||
assert actual_page.results == [origin3_bazqux] | assert actual_page.results == [origin3_bazqux] | ||||
actual_page = self.search.origin_search(metadata_pattern="foo bar") | actual_page = self.search.origin_search(metadata_pattern="foo bar") | ||||
assert actual_page.next_page_token is None | assert actual_page.next_page_token is None | ||||
assert actual_page.results == [origin1_foobar] | assert actual_page.results == [origin1_foobar] | ||||
def test_origin_intrinsic_metadata_update(self): | |||||
origin = {"url": "http://origin1"} | |||||
origin_data = { | |||||
**origin, | |||||
"intrinsic_metadata": { | |||||
"@context": "https://doi.org/10.5063/schema/codemeta-2.0", | |||||
"author": "John Doe", | |||||
}, | |||||
} | |||||
self.search.origin_update([origin_data]) | |||||
self.search.flush() | |||||
actual_page = self.search.origin_search(metadata_pattern="John") | |||||
assert actual_page.next_page_token is None | |||||
assert actual_page.results == [origin] | |||||
origin_data["intrinsic_metadata"]["author"] = "Jane Doe" | |||||
self.search.origin_update([origin_data]) | |||||
self.search.flush() | |||||
actual_page = self.search.origin_search(metadata_pattern="Jane") | |||||
assert actual_page.next_page_token is None | |||||
assert actual_page.results == [origin] | |||||
vlorentzUnsubmitted Not Done Inline Actionswhy this new test? vlorentz: why this new test? | |||||
anlambertAuthorUnsubmitted Done Inline ActionsAs I have modified the way document gets updated in elasticsearch, I added a new commit with that test to ensure no update regression for metadata. anlambert: As I have modified the way document gets updated in elasticsearch, I added a new commit with… | |||||
# TODO: add more tests with more codemeta terms | # TODO: add more tests with more codemeta terms | ||||
# TODO: add more tests with edge cases | # TODO: add more tests with edge cases | ||||
@settings(deadline=None) | @settings(deadline=None) | ||||
@given(strategies.integers(min_value=1, max_value=4)) | @given(strategies.integers(min_value=1, max_value=4)) | ||||
def test_origin_url_paging(self, limit): | def test_origin_url_paging(self, limit): | ||||
# TODO: no hypothesis | # TODO: no hypothesis | ||||
▲ Show 20 Lines • Show All 81 Lines • Show Last 20 Lines |
The function name should mention updates (as it's what it's really testing); and there should be a separate test for updating the origins in a separate step (ie. self.search.origin_update([{"url": "http://foobar.baz", "visit_types": ["git", "svn"]}]) + self.search.origin_update([{"url": "http://foobar.baz", "visit_types": ["hg"]}]))