Changeset View
Standalone View
swh/search/tests/test_search.py
Show First 20 Lines • Show All 443 Lines • ▼ Show 20 Lines | def test_origin_sort_by_search(self): | ||||
_check_results(["-nb_visits"], ORIGINS[::-1]) | _check_results(["-nb_visits"], ORIGINS[::-1]) | ||||
_check_results(["last_visit_date"], ORIGINS) | _check_results(["last_visit_date"], ORIGINS) | ||||
_check_results(["-last_visit_date"], ORIGINS[::-1]) | _check_results(["-last_visit_date"], ORIGINS[::-1]) | ||||
_check_results(["nb_visits", "-last_visit_date"], ORIGINS) | _check_results(["nb_visits", "-last_visit_date"], ORIGINS) | ||||
_check_results(["-last_visit_date", "nb_visits"], ORIGINS[::-1]) | _check_results(["-last_visit_date", "nb_visits"], ORIGINS[::-1]) | ||||
def test_origin_instrinsic_metadata_license_search(self): | |||||
ORIGINS = [ | |||||
{ | |||||
"url": "http://foobar.1.com", | |||||
"intrinsic_metadata": { | |||||
"@context": "https://doi.org/10.5063/schema/codemeta-2.0", | |||||
"description": "foo bar", | |||||
"license": "https://spdx.org/licenses/MIT", | |||||
}, | |||||
}, | |||||
{ | |||||
"url": "http://foobar.2.com", | |||||
"intrinsic_metadata": { | |||||
"@context": "https://doi.org/10.5063/schema/codemeta-2.0", | |||||
"description": "foo bar", | |||||
"license": "BSD-3-Clause", | |||||
}, | |||||
}, | |||||
] | |||||
self.search.origin_update(ORIGINS) | |||||
vlorentz: `license` should be an URI | |||||
Done Inline ActionsOkay. I'll change that. But, I "saved" https://pypi.org/project/Django/ in my self hosted swh instance. Its license field is "BSD-3-Clause" which isn't a URI. While https://github.com/Edyta2801/Ex_2_Webpage has license "https://spdx.org/licenses/ISC" So I think license isn't always a URI. KShivendu: Okay. I'll change that.
But, I "saved" https://pypi.org/project/Django/ in my self hosted swh… | |||||
Not Done Inline ActionsThen it's a bug in the indexer. According to https://www.w3.org/TR/json-ld/#node-objects :
vlorentz: Then it's a bug in the indexer. According to https://www.w3.org/TR/json-ld/#node-objects :
>… | |||||
self.search.flush() | |||||
page = self.search.origin_search(url_pattern="foobar", licenses=["MIT"]) | |||||
results = [r["url"] for r in page.results] | |||||
assert results == [ORIGINS[0]["url"]] | |||||
page = self.search.origin_search(url_pattern="foobar", licenses=["bsd"]) | |||||
results = [r["url"] for r in page.results] | |||||
assert results == [ORIGINS[1]["url"]] | |||||
page = self.search.origin_search( | |||||
url_pattern="foobar", licenses=["mit", "3-Clause"] | |||||
) | |||||
Done Inline ActionsThis test passes for Elasticsearch if I use page = self.search.origin_search(url_pattern="foobar", license=["mit"]) .... page = self.search.origin_search(url_pattern="foobar", license=["bsd"]) ... page = self.search.origin_search(url_pattern="foobar", license=["mit", "clause"]) Because of this I'll have to apply the analyzer which is used for license field (which is of type text). This is one such way to apply the analyzer on the query but it's only available for terms Any suggestions ? KShivendu: This test passes for Elasticsearch if I use
```
page = self.search.origin_search… | |||||
Done Inline Actions
*only available for match KShivendu: > This is one such way to apply the analyzer on the query but it's only available for terms… | |||||
Done Inline ActionsI fixed this with "should" and "match". You may ignore this thread. KShivendu: I fixed this with "should" and "match". You may ignore this thread. | |||||
results = [r["url"] for r in page.results] | |||||
Not Done Inline ActionsThis tests only checks either programming_language or license filtering works; because it would still succeed if either didn't filter anything. You should write separate tests for each. vlorentz: This tests only checks either `programming_language` or `license` filtering works; because it… | |||||
assert sorted(results) == sorted([o["url"] for o in ORIGINS]) | |||||
def test_origin_instrinsic_metadata_programming_language_search(self): | |||||
ORIGINS = [ | |||||
{ | |||||
"url": "http://foobar.1.com", | |||||
"intrinsic_metadata": { | |||||
"@context": "https://doi.org/10.5063/schema/codemeta-2.0", | |||||
"description": "foo bar", | |||||
"programmingLanguage": "python", | |||||
}, | |||||
}, | |||||
{ | |||||
"url": "http://foobar.2.com", | |||||
"intrinsic_metadata": { | |||||
"@context": "https://doi.org/10.5063/schema/codemeta-2.0", | |||||
"description": "foo bar", | |||||
"programmingLanguage": "javascript", | |||||
}, | |||||
}, | |||||
] | |||||
self.search.origin_update(ORIGINS) | |||||
self.search.flush() | |||||
page = self.search.origin_search( | |||||
url_pattern="foobar", programming_languages=["python"] | |||||
) | |||||
results = [r["url"] for r in page.results] | |||||
assert results == [ORIGINS[0]["url"]] | |||||
page = self.search.origin_search( | |||||
url_pattern="foobar", programming_languages=["javascript"] | |||||
) | |||||
results = [r["url"] for r in page.results] | |||||
assert results == [ORIGINS[1]["url"]] | |||||
page = self.search.origin_search( | |||||
url_pattern="foobar", programming_languages=["python", "javascript"] | |||||
) | |||||
results = [r["url"] for r in page.results] | |||||
assert sorted(results) == sorted([o["url"] for o in ORIGINS]) | |||||
def test_origin_update_with_no_visit_types(self): | def test_origin_update_with_no_visit_types(self): | ||||
""" | """ | ||||
Update an origin with visit types first then with no visit types, | Update an origin with visit types first then with no visit types, | ||||
check origin can still be searched with visit types afterwards. | check origin can still be searched with visit types afterwards. | ||||
""" | """ | ||||
origin_url = "http://foobar.baz" | origin_url = "http://foobar.baz" | ||||
self.search.origin_update([{"url": origin_url, "visit_types": ["git"]}]) | self.search.origin_update([{"url": origin_url, "visit_types": ["git"]}]) | ||||
self.search.flush() | self.search.flush() | ||||
Show All 31 Lines | def test_origin_intrinsic_metadata_description(self): | ||||
}, | }, | ||||
] | ] | ||||
) | ) | ||||
self.search.flush() | self.search.flush() | ||||
actual_page = self.search.origin_search(metadata_pattern="foo") | actual_page = self.search.origin_search(metadata_pattern="foo") | ||||
assert actual_page.next_page_token is None | assert actual_page.next_page_token is None | ||||
assert actual_page.results == [origin2_foobar] | assert actual_page.results == [origin2_foobar] | ||||
Done Inline ActionsI should move this to test_in_memory.py. KShivendu: I should move this to `test_in_memory.py`. | |||||
actual_page = self.search.origin_search(metadata_pattern="foo bar") | actual_page = self.search.origin_search(metadata_pattern="foo bar") | ||||
assert actual_page.next_page_token is None | assert actual_page.next_page_token is None | ||||
assert actual_page.results == [origin2_foobar] | assert actual_page.results == [origin2_foobar] | ||||
actual_page = self.search.origin_search(metadata_pattern="bar baz") | actual_page = self.search.origin_search(metadata_pattern="bar baz") | ||||
assert actual_page.next_page_token is None | assert actual_page.next_page_token is None | ||||
assert actual_page.results == [origin3_barbaz] | assert actual_page.results == [origin3_barbaz] | ||||
▲ Show 20 Lines • Show All 388 Lines • Show Last 20 Lines |
license should be an URI